LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: mingo@kernel.org, linux-kernel@vger.kernel.org
Cc: subhra.mazumdar@oracle.com, steven.sistare@oracle.com,
	dhaval.giani@oracle.com, rohit.k.jain@oracle.com,
	umgwanakikbuti@gmail.com, matt@codeblueprint.co.uk,
	riel@surriel.com, peterz@infradead.org
Subject: [RFC 07/11] sched/fair: Fold the select_idle_sibling() scans
Date: Wed, 30 May 2018 16:22:43 +0200	[thread overview]
Message-ID: <20180530143106.239069559@infradead.org> (raw)
In-Reply-To: <20180530142236.667774973@infradead.org>

[-- Attachment #1: peterz-sis-again-6.patch --]
[-- Type: text/plain, Size: 3857 bytes --]

Currently select_idle_sibling() does 3 separate scans:

 - searches for an idle core
 - searches for an idle cpu
 - searches for an idle thread

The core scan is gates by there actually having been an idle core, it
has a worst case issue where we'll always scan the entire LLC to
establish there is no idle core left anymore before gating.

The cpu scan is done proportional to the remaining average idle time.

And since the cpu scan might not actually see our own sibling threads
(if they're enumerated far away in the CPU space), check if there's an
idle sibling thread.

Rohit suggested we could maybe do all 3 in a single proportional
search.

This uses the new SMT topology bits previously introduced to do the
core/smt iteration. And relies on the select_idle_cpu()'s change to nr
cores.

Basically we iterate @nr cores and select the first idle thread of the
core with the least amount of busy threads that first in the task
affinity mask.

ORIG

1:        0.559639567 seconds time elapsed    ( +-  1.44% )
2:        0.630091207 seconds time elapsed    ( +-  2.93% )
5:        2.329768398 seconds time elapsed    ( +-  1.21% )
10:       3.920248646 seconds time elapsed    ( +-  2.39% )
20:       6.501776759 seconds time elapsed    ( +-  1.02% )
40:      10.482109619 seconds time elapsed    ( +-  2.16% )

FOLD

1:        0.568188455 seconds time elapsed    ( +-  0.40% )
2:        0.643264625 seconds time elapsed    ( +-  1.27% )
5:        2.385378263 seconds time elapsed    ( +-  1.12% )
10:       3.808555491 seconds time elapsed    ( +-  1.46% )
20:       6.431994272 seconds time elapsed    ( +-  1.21% )
40:       9.423539507 seconds time elapsed    ( +-  2.07% )

Suggested-by: Rohit Jain <rohit.k.jain@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/fair.c     |   48 ++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/features.h |    1 +
 2 files changed, 49 insertions(+)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6350,6 +6350,41 @@ static int select_idle_smt(struct task_s
 	return -1;
 }
 
+static int __select_idle_core(struct task_struct *p, struct sched_domain *sd,
+			      int target, int nr, int *ploops)
+{
+	int best_busy = INT_MAX, best_cpu = -1;
+	int core, cpu;
+
+	for_each_cpu_wrap(core, sched_domain_cores(sd), target) {
+		int first_idle = -1;
+		int busy = 0;
+
+		if ((*ploops)++ >= nr)
+			break;
+
+		for (cpu = core; cpu < nr_cpumask_bits; cpu = cpumask_next(cpu, cpu_smt_mask(core))) {
+			if (!available_idle_cpu(cpu))
+				busy++;
+			else if (first_idle < 0 && cpumask_test_cpu(cpu, &p->cpus_allowed))
+				first_idle = cpu;
+		}
+
+		if (first_idle < 0)
+			continue;
+
+		if (!busy)
+			return first_idle;
+
+		if (busy < best_busy) {
+			best_busy = busy;
+			best_cpu = first_idle;
+		}
+	}
+
+	return best_cpu;
+}
+
 #else /* CONFIG_SCHED_SMT */
 
 #define sched_smt_weight	1
@@ -6441,6 +6476,11 @@ static int select_idle_cpu(struct task_s
 
 	time = local_clock();
 
+#ifdef CONFIG_SCHED_SMT
+	if (sched_feat(SIS_FOLD) && static_branch_likely(&sched_smt_present))
+		cpu = __select_idle_core(p, sd, target, nr, &loops);
+	else
+#endif
 	cpu = __select_idle_cpu(p, sd, target, nr * sched_smt_weight, &loops);
 
 	time = local_clock() - time;
@@ -6503,6 +6543,14 @@ static int select_idle_sibling(struct ta
 	if (!sd)
 		return target;
 
+	if (sched_feat(SIS_FOLD)) {
+		i = select_idle_cpu(p, sd, target);
+		if ((unsigned)i < nr_cpumask_bits)
+			target = i;
+
+		return target;
+	}
+
 	i = select_idle_core(p, sd, target);
 	if ((unsigned)i < nr_cpumask_bits)
 		return i;
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -60,6 +60,7 @@ SCHED_FEAT(SIS_PROP, true)
 
 SCHED_FEAT(SIS_AGE, true)
 SCHED_FEAT(SIS_ONCE, true)
+SCHED_FEAT(SIS_FOLD, false)
 
 /*
  * Issue a WARN when we do multiple update_rq_clock() calls

  parent reply	other threads:[~2018-05-30 14:37 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-30 14:22 [RFC 00/11] select_idle_sibling rework Peter Zijlstra
2018-05-30 14:22 ` [RFC 01/11] sched/fair: Fix select_idle_cpu()s cost accounting Peter Zijlstra
2018-05-30 14:22 ` [RFC 02/11] sched/fair: Age the average idle time Peter Zijlstra
2018-05-30 14:22 ` [RFC 03/11] sched/fair: Only use time once Peter Zijlstra
2018-05-30 14:22 ` [RFC 04/11] sched/topology: Introduce sched_domain_cores() Peter Zijlstra
2018-05-30 14:22 ` [RFC 05/11] sched/fair: Re-arrange select_idle_cpu() Peter Zijlstra
2018-05-30 14:22 ` [RFC 06/11] sched/fair: Make select_idle_cpu() proportional to cores Peter Zijlstra
2018-05-30 14:22 ` Peter Zijlstra [this message]
2018-05-30 14:22 ` [RFC 08/11] sched/fair: Optimize SIS_FOLD Peter Zijlstra
2018-05-30 14:22 ` [RFC 09/11] sched/fair: Remove SIS_AVG_PROP Peter Zijlstra
2018-05-30 14:22 ` [RFC 10/11] sched/fair: Remove SIS_AGE/SIS_ONCE Peter Zijlstra
2018-05-30 14:22 ` [RFC 11/11] sched/fair: Remove SIS_FOLD Peter Zijlstra
2018-06-19 22:06 ` [RFC 00/11] select_idle_sibling rework Matt Fleming
2018-06-20 22:20   ` Steven Sistare

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180530143106.239069559@infradead.org \
    --to=peterz@infradead.org \
    --cc=dhaval.giani@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matt@codeblueprint.co.uk \
    --cc=mingo@kernel.org \
    --cc=riel@surriel.com \
    --cc=rohit.k.jain@oracle.com \
    --cc=steven.sistare@oracle.com \
    --cc=subhra.mazumdar@oracle.com \
    --cc=umgwanakikbuti@gmail.com \
    --subject='Re: [RFC 07/11] sched/fair: Fold the select_idle_sibling() scans' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).