LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Morten Rasmussen <morten.rasmussen@arm.com>
To: peterz@infradead.org, mingo@redhat.com
Cc: valentin.schneider@arm.com, dietmar.eggemann@arm.com,
	vincent.guittot@linaro.org, gaku.inami.xh@renesas.com,
	linux-kernel@vger.kernel.org,
	Morten Rasmussen <morten.rasmussen@arm.com>
Subject: [PATCHv2 2/7] sched/fair: Add group_misfit_task load-balance type
Date: Thu, 15 Mar 2018 14:46:59 +0000	[thread overview]
Message-ID: <1521125224-15434-3-git-send-email-morten.rasmussen@arm.com> (raw)
In-Reply-To: <1521125224-15434-1-git-send-email-morten.rasmussen@arm.com>

To maximize throughput in systems with asymmetric cpu capacities (e.g.
ARM big.LITTLE) load-balancing has to consider task and cpu utilization
as well as per-cpu compute capacity when load-balancing in addition to
the current average load based load-balancing policy. Tasks with high
utilization that are scheduled on a lower capacity cpu need to be
identified and migrated to a higher capacity cpu if possible to maximize
throughput.

To implement this additional policy an additional group_type
(load-balance scenario) is added: group_misfit_task. This represents
scenarios where a sched_group has one or more tasks that are not
suitable for its per-cpu capacity. group_misfit_task is only considered
if the system is not overloaded or imbalanced (group_imbalanced or
group_overloaded).

Identifying misfit tasks requires the rq lock to be held. To avoid
taking remote rq locks to examine source sched_groups for misfit tasks,
each cpu is responsible for tracking misfit tasks themselves and update
the rq->misfit_task flag. This means checking task utilization when
tasks are scheduled and on sched_tick.

cc: Ingo Molnar <mingo@redhat.com>
cc: Peter Zijlstra <peterz@infradead.org>

Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
---
 kernel/sched/fair.c  | 57 +++++++++++++++++++++++++++++++++++++++++++---------
 kernel/sched/sched.h |  2 ++
 2 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e4ce572113a9..1e06c722bc2e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -697,6 +697,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
 static unsigned long task_h_load(struct task_struct *p);
+static unsigned long capacity_of(int cpu);
 
 /* Give new sched_entity start runnable values to heavy its load in infant time */
 void init_entity_runnable_average(struct sched_entity *se)
@@ -1407,7 +1408,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
 static unsigned long weighted_cpuload(struct rq *rq);
 static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
-static unsigned long capacity_of(int cpu);
 
 /* Cached statistics for all CPUs within a node */
 struct numa_stats {
@@ -3873,6 +3873,30 @@ static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq)
 
 static int idle_balance(struct rq *this_rq, struct rq_flags *rf);
 
+static inline unsigned long task_util(struct task_struct *p);
+static inline int task_fits_capacity(struct task_struct *p, long capacity)
+{
+	return capacity * 1024 > task_util(p) * capacity_margin;
+}
+
+static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
+{
+	if (!static_branch_unlikely(&sched_asym_cpucapacity))
+		return;
+
+	if (!p) {
+		rq->misfit_task_load = 0;
+		return;
+	}
+
+	if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) {
+		rq->misfit_task_load = 0;
+		return;
+	}
+
+	rq->misfit_task_load = task_h_load(p);
+}
+
 #else /* CONFIG_SMP */
 
 static inline int
@@ -3902,6 +3926,8 @@ static inline int idle_balance(struct rq *rq, struct rq_flags *rf)
 	return 0;
 }
 
+static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
+
 #endif /* CONFIG_SMP */
 
 static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -5835,7 +5861,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
 	return target;
 }
 
-static inline unsigned long task_util(struct task_struct *p);
 static unsigned long cpu_util_wake(int cpu, struct task_struct *p);
 
 static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
@@ -6398,7 +6423,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
 	/* Bring task utilization in sync with prev_cpu */
 	sync_entity_load_avg(&p->se);
 
-	return min_cap * 1024 < task_util(p) * capacity_margin;
+	return !task_fits_capacity(p, min_cap);
 }
 
 /*
@@ -6829,9 +6854,12 @@ done: __maybe_unused;
 	if (hrtick_enabled(rq))
 		hrtick_start_fair(rq, p);
 
+	update_misfit_status(p, rq);
+
 	return p;
 
 idle:
+	update_misfit_status(NULL, rq);
 	new_tasks = idle_balance(rq, rf);
 
 	/*
@@ -7037,6 +7065,13 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10;
 
 enum fbq_type { regular, remote, all };
 
+enum group_type {
+	group_other = 0,
+	group_misfit_task,
+	group_imbalanced,
+	group_overloaded,
+};
+
 #define LBF_ALL_PINNED	0x01
 #define LBF_NEED_BREAK	0x02
 #define LBF_DST_PINNED  0x04
@@ -7578,12 +7613,6 @@ static unsigned long task_h_load(struct task_struct *p)
 
 /********** Helpers for find_busiest_group ************************/
 
-enum group_type {
-	group_other = 0,
-	group_imbalanced,
-	group_overloaded,
-};
-
 /*
  * sg_lb_stats - stats of a sched_group required for load_balancing
  */
@@ -7599,6 +7628,7 @@ struct sg_lb_stats {
 	unsigned int group_weight;
 	enum group_type group_type;
 	int group_no_capacity;
+	int group_misfit_task_load; /* A cpu has a task too big for its capacity */
 #ifdef CONFIG_NUMA_BALANCING
 	unsigned int nr_numa_running;
 	unsigned int nr_preferred_running;
@@ -7898,6 +7928,9 @@ group_type group_classify(struct sched_group *group,
 	if (sg_imbalanced(group))
 		return group_imbalanced;
 
+	if (sgs->group_misfit_task_load)
+		return group_misfit_task;
+
 	return group_other;
 }
 
@@ -7972,6 +8005,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 		 */
 		if (!nr_running && idle_cpu(i))
 			sgs->idle_cpus++;
+
+		if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
+		    !sgs->group_misfit_task_load && rq->misfit_task_load)
+			sgs->group_misfit_task_load = rq->misfit_task_load;
 	}
 
 	/* Adjust by relative CPU capacity of the group */
@@ -9753,6 +9790,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 
 	if (static_branch_unlikely(&sched_numa_balancing))
 		task_tick_numa(rq, curr);
+
+	update_misfit_status(curr, rq);
 }
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 017f62069e0b..22177dfc1f04 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -815,6 +815,8 @@ struct rq {
 
 	unsigned char		idle_balance;
 
+	unsigned int		misfit_task_load;
+
 	/* For active balancing */
 	int			active_balance;
 	int			push_cpu;
-- 
2.7.4

  parent reply	other threads:[~2018-03-15 14:49 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-15 14:46 [PATCHv2 0/7] sched/fair: Migrate 'misfit' tasks on asymmetric capacity systems Morten Rasmussen
2018-03-15 14:46 ` [PATCHv2 1/7] sched: Add static_key for asymmetric cpu capacity optimizations Morten Rasmussen
2018-04-27 14:04   ` [PATCHv2,1/7] " Jiada Wang
2018-04-29 19:09     ` Valentin Schneider
2018-03-15 14:46 ` Morten Rasmussen [this message]
2018-04-11 10:45   ` [PATCHv2 2/7] sched/fair: Add group_misfit_task load-balance type Peter Zijlstra
2018-03-15 14:47 ` [PATCHv2 3/7] sched/fair: Consider misfit tasks when load-balancing Morten Rasmussen
2018-04-11 10:46   ` Peter Zijlstra
2018-04-11 10:53   ` Peter Zijlstra
2018-03-15 14:47 ` [PATCHv2 4/7] sched/fair: Kick nohz balance if rq->misfit_task Morten Rasmussen
2018-03-15 14:47 ` [PATCHv2 5/7] sched: Change root_domain->overload type to int Morten Rasmussen
2018-03-15 14:47 ` [PATCHv2 6/7] sched: Wrap rq->rd->overload accesses with READ/WRITE_ONCE Morten Rasmussen
2018-03-15 14:47 ` [PATCHv2 7/7] sched/fair: Set sd->overload when misfit Morten Rasmussen
2018-03-20  5:30 ` [PATCHv2 0/7] sched/fair: Migrate 'misfit' tasks on asymmetric capacity systems Gaku Inami
2018-03-20  9:17   ` Morten Rasmussen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1521125224-15434-3-git-send-email-morten.rasmussen@arm.com \
    --to=morten.rasmussen@arm.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=gaku.inami.xh@renesas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=valentin.schneider@arm.com \
    --cc=vincent.guittot@linaro.org \
    --subject='Re: [PATCHv2 2/7] sched/fair: Add group_misfit_task load-balance type' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).