LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] sched/core: An optimization of pick_next_task() not sure
@ 2021-08-16 15:44 Tao Zhou
  2021-08-16 18:52 ` Peter Zijlstra
  0 siblings, 1 reply; 5+ messages in thread
From: Tao Zhou @ 2021-08-16 15:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, tglx, joel, chris.hyser, joshdon, mingo, vincent.guittot,
	valentin.schneider, mgorman, tao.zhou

When find a new candidate max, wipe the stale and start over.
Goto again: and use the new max to loop to pick the the task.

Here first want to get the max of the core and use this new
max to loop once to pick the task on each thread.

Not sure this is an optimization and just stop here a little
and move on..

Compiled.
---
 kernel/sched/core.c | 52 +++++++++++++++++----------------------------
 1 file changed, 20 insertions(+), 32 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 20ffcc044134..bddcd328df96 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5403,7 +5403,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	const struct sched_class *class;
 	const struct cpumask *smt_mask;
 	bool fi_before = false;
-	int i, j, cpu, occ = 0;
+	int i, cpu, occ = 0;
 	bool need_sync;
 
 	if (!sched_core_enabled(rq))
@@ -5508,11 +5508,27 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	 * order.
 	 */
 	for_each_class(class) {
-again:
+		struct rq *rq_i;
+		struct task_struct *p;
+
 		for_each_cpu_wrap(i, smt_mask, cpu) {
-			struct rq *rq_i = cpu_rq(i);
-			struct task_struct *p;
+			rq_i = cpu_rq(i);
+			p = pick_task(rq_i, class, max, fi_before);
+			/*
+			 * If this new candidate is of higher priority than the
+			 * previous; and they're incompatible; pick_task makes
+			 * sure that p's priority is more than max if it doesn't
+			 * match max's cookie. Update max.
+			 *
+			 * NOTE: this is a linear max-filter and is thus bounded
+			 * in execution time.
+			 */
+			if (!max || !cookie_match(max, p))
+				max = p;
+		}
 
+		for_each_cpu_wrap(i, smt_mask, cpu) {
+			rq_i = cpu_rq(i);
 			if (rq_i->core_pick)
 				continue;
 
@@ -5536,34 +5552,6 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 					rq->core->core_forceidle_seq++;
 			}
 
-			/*
-			 * If this new candidate is of higher priority than the
-			 * previous; and they're incompatible; we need to wipe
-			 * the slate and start over. pick_task makes sure that
-			 * p's priority is more than max if it doesn't match
-			 * max's cookie.
-			 *
-			 * NOTE: this is a linear max-filter and is thus bounded
-			 * in execution time.
-			 */
-			if (!max || !cookie_match(max, p)) {
-				struct task_struct *old_max = max;
-
-				rq->core->core_cookie = p->core_cookie;
-				max = p;
-
-				if (old_max) {
-					rq->core->core_forceidle = false;
-					for_each_cpu(j, smt_mask) {
-						if (j == i)
-							continue;
-
-						cpu_rq(j)->core_pick = NULL;
-					}
-					occ = 1;
-					goto again;
-				}
-			}
 		}
 	}
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] sched/core: An optimization of pick_next_task() not sure
  2021-08-16 15:44 [PATCH] sched/core: An optimization of pick_next_task() not sure Tao Zhou
@ 2021-08-16 18:52 ` Peter Zijlstra
  2021-08-16 19:02   ` Josh Don
  2021-08-17 16:45   ` Tao Zhou
  0 siblings, 2 replies; 5+ messages in thread
From: Peter Zijlstra @ 2021-08-16 18:52 UTC (permalink / raw)
  To: Tao Zhou
  Cc: linux-kernel, tglx, joel, chris.hyser, joshdon, mingo,
	vincent.guittot, valentin.schneider, mgorman

On Mon, Aug 16, 2021 at 11:44:01PM +0800, Tao Zhou wrote:
> When find a new candidate max, wipe the stale and start over.
> Goto again: and use the new max to loop to pick the the task.
> 
> Here first want to get the max of the core and use this new
> max to loop once to pick the task on each thread.
> 
> Not sure this is an optimization and just stop here a little
> and move on..
> 

Did you find this retry was an issue on your workload? Or was this from
reading the source?

> ---
>  kernel/sched/core.c | 52 +++++++++++++++++----------------------------
>  1 file changed, 20 insertions(+), 32 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 20ffcc044134..bddcd328df96 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5403,7 +5403,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>  	const struct sched_class *class;
>  	const struct cpumask *smt_mask;
>  	bool fi_before = false;
> -	int i, j, cpu, occ = 0;
> +	int i, cpu, occ = 0;
>  	bool need_sync;
>  
>  	if (!sched_core_enabled(rq))
> @@ -5508,11 +5508,27 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>  	 * order.
>  	 */
>  	for_each_class(class) {
> -again:
> +		struct rq *rq_i;
> +		struct task_struct *p;
> +
>  		for_each_cpu_wrap(i, smt_mask, cpu) {
> -			struct rq *rq_i = cpu_rq(i);
> -			struct task_struct *p;
> +			rq_i = cpu_rq(i);
> +			p = pick_task(rq_i, class, max, fi_before);
> +			/*
> +			 * If this new candidate is of higher priority than the
> +			 * previous; and they're incompatible; pick_task makes
> +			 * sure that p's priority is more than max if it doesn't
> +			 * match max's cookie. Update max.
> +			 *
> +			 * NOTE: this is a linear max-filter and is thus bounded
> +			 * in execution time.
> +			 */
> +			if (!max || !cookie_match(max, p))
> +				max = p;
> +		}
>  
> +		for_each_cpu_wrap(i, smt_mask, cpu) {
> +			rq_i = cpu_rq(i);
>  			if (rq_i->core_pick)
>  				continue;
>  

This now calls pick_task() twice for each CPU, which seems unfortunate;
perhaps add q->core_temp storage to cache that result. Also, since the
first iteration is now explicitly about the max filter, perhaps we
shouuld move that part of pick_task() into the loop and simplify things
further?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] sched/core: An optimization of pick_next_task() not sure
  2021-08-16 18:52 ` Peter Zijlstra
@ 2021-08-16 19:02   ` Josh Don
  2021-08-17 16:45   ` Tao Zhou
  1 sibling, 0 replies; 5+ messages in thread
From: Josh Don @ 2021-08-16 19:02 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Tao Zhou, linux-kernel, Thomas Gleixner, Joel Fernandes,
	Hyser,Chris, Ingo Molnar, Vincent Guittot, Valentin Schneider,
	Mel Gorman

On Mon, Aug 16, 2021 at 11:53 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Aug 16, 2021 at 11:44:01PM +0800, Tao Zhou wrote:
> > When find a new candidate max, wipe the stale and start over.
> > Goto again: and use the new max to loop to pick the the task.
> >
> > Here first want to get the max of the core and use this new
> > max to loop once to pick the task on each thread.
> >
> > Not sure this is an optimization and just stop here a little
> > and move on..
> >
>
> Did you find this retry was an issue on your workload? Or was this from
> reading the source?

I would be surprised if this made a noticeable difference on SMT2, but
I could certainly see how this would help with larger SMT
configurations where we have more opportunities to invalidate the
other siblings.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] sched/core: An optimization of pick_next_task() not sure
  2021-08-16 18:52 ` Peter Zijlstra
  2021-08-16 19:02   ` Josh Don
@ 2021-08-17 16:45   ` Tao Zhou
  2021-08-18 19:23     ` Tao Zhou
  1 sibling, 1 reply; 5+ messages in thread
From: Tao Zhou @ 2021-08-17 16:45 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, tglx, joel, chris.hyser, joshdon, mingo,
	vincent.guittot, valentin.schneider, mgorman, taozhou

Hi Peter,

On Mon, Aug 16, 2021 at 08:52:39PM +0200, Peter Zijlstra wrote:

> On Mon, Aug 16, 2021 at 11:44:01PM +0800, Tao Zhou wrote:
> > When find a new candidate max, wipe the stale and start over.
> > Goto again: and use the new max to loop to pick the the task.
> > 
> > Here first want to get the max of the core and use this new
> > max to loop once to pick the task on each thread.
> > 
> > Not sure this is an optimization and just stop here a little
> > and move on..
> > 
> 
> Did you find this retry was an issue on your workload? Or was this from
> reading the source?

Thank you for your reply. Sorry for my late reply.
This was from reading the source..

> 
> > ---
> >  kernel/sched/core.c | 52 +++++++++++++++++----------------------------
> >  1 file changed, 20 insertions(+), 32 deletions(-)
> > 
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 20ffcc044134..bddcd328df96 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -5403,7 +5403,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> >  	const struct sched_class *class;
> >  	const struct cpumask *smt_mask;
> >  	bool fi_before = false;
> > -	int i, j, cpu, occ = 0;
> > +	int i, cpu, occ = 0;
> >  	bool need_sync;
> >  
> >  	if (!sched_core_enabled(rq))
> > @@ -5508,11 +5508,27 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> >  	 * order.
> >  	 */
> >  	for_each_class(class) {
> > -again:
> > +		struct rq *rq_i;
> > +		struct task_struct *p;
> > +
> >  		for_each_cpu_wrap(i, smt_mask, cpu) {
> > -			struct rq *rq_i = cpu_rq(i);
> > -			struct task_struct *p;
> > +			rq_i = cpu_rq(i);
> > +			p = pick_task(rq_i, class, max, fi_before);
> > +			/*
> > +			 * If this new candidate is of higher priority than the
> > +			 * previous; and they're incompatible; pick_task makes
> > +			 * sure that p's priority is more than max if it doesn't
> > +			 * match max's cookie. Update max.
> > +			 *
> > +			 * NOTE: this is a linear max-filter and is thus bounded
> > +			 * in execution time.
> > +			 */
> > +			if (!max || !cookie_match(max, p))
> > +				max = p;
> > +		}
> >  
> > +		for_each_cpu_wrap(i, smt_mask, cpu) {
> > +			rq_i = cpu_rq(i);
> >  			if (rq_i->core_pick)
> >  				continue;
> >  
> 
> This now calls pick_task() twice for each CPU, which seems unfortunate;
> perhaps add q->core_temp storage to cache that result. Also, since the
> first iteration is now explicitly about the max filter, perhaps we
> shouuld move that part of pick_task() into the loop and simplify things
> further?

Here is my ugly patch below..
Not compiled..


From b3de16fb6f3e6cd2a8a9f7a579e80df74fb2d865 Mon Sep 17 00:00:00 2001
From: Tao Zhou <tao.zhou@linux.dev>
Date: Wed, 18 Aug 2021 00:07:38 +0800
Subject: [PATCH] optimize pick_next_task()

---
 kernel/sched/core.c  | 71 +++++++++++++++++++++++++++++++++-----------
 kernel/sched/sched.h |  1 +
 2 files changed, 54 insertions(+), 18 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 20ffcc044134..c2a403bacf99 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5380,18 +5380,32 @@ pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *ma
 	if (cookie_equals(class_pick, cookie))
 		return class_pick;
 
-	cookie_pick = sched_core_find(rq, cookie);
+	return class_pick;
+}
 
-	/*
-	 * If class > max && class > cookie, it is the highest priority task on
-	 * the core (so far) and it must be selected, otherwise we must go with
-	 * the cookie pick in order to satisfy the constraint.
-	 */
-	if (prio_less(cookie_pick, class_pick, in_fi) &&
-	    (!max || prio_less(max, class_pick, in_fi)))
-		return class_pick;
+static task_struct *
+filter_max_prio(struct rq *rq, struct task_struct *class_pick,
+				struct task_struct **cookie_pick, struct task_struct *max,
+				bool in_fi)
+{
+	unsigned long cookie = rq->core->core_cookie;
 
-	return cookie_pick;
+	*cookie_pick = NULL;
+	if (cookie && !cookie_equals(class_pick, cookie)) {
+		*cookie_pick = sched_core_find(rq, cookie);
+		/*
+		 * If class > max && class > cookie, it is the
+		 * highest priority task on the core (so far)
+		 * and it must be selected, otherwise we must
+		 * go with the cookie pick in order to satisfy
+		 * the constraint.
+		 */
+		if (prio_less(cookie_pick, class_pick, in_fi) &&
+		    (!max || prio_less(max, class_pick, in_fi)))
+			return class_pick;
+	}
+
+	return NULL;
 }
 
 extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
@@ -5508,24 +5522,44 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	 * order.
 	 */
 	for_each_class(class) {
-again:
+		struct task_struct *class_pick, *cookie_pick;
+		struct rq *rq_i;
+
+		for_each_cpu_wrap(i, smt_mask, cpu) {
+			rq_i = cpu_rq(i);
+			class_pick = pick_task(rq_i, class, max, fi_before);
+			rq_i->core_temp = class_pick;
+			/*
+			 * This sibling doesn't yet have a suitable task to
+			 * run.
+			 */
+			if (!class_pick)
+				continue;
+
+			if (filter_max_prio(rq_i, class_pick, &cookie_pick, max, fi_before))
+				max = class_pick;
+		}
+
 		for_each_cpu_wrap(i, smt_mask, cpu) {
-			struct rq *rq_i = cpu_rq(i);
 			struct task_struct *p;
+			rq_i = cpu_rq(i);
 
 			if (rq_i->core_pick)
 				continue;
 
 			/*
-			 * If this sibling doesn't yet have a suitable task to
-			 * run; ask for the most eligible task, given the
-			 * highest priority task already selected for this
-			 * core.
+			 * This sibling doesn't yet have a suitable task to
+			 * run.
 			 */
-			p = pick_task(rq_i, class, max, fi_before);
-			if (!p)
+			if (!rq_i->core_temp)
 				continue;
 
+			p = class_pick = rq_i->core_temp;
+			if (!filter_max_prio(rq_i, class_pick, &cookie_pick, max, fi_before)) {
+				if (cookie_pick)
+					p = cookie_pick;
+			}
+
 			if (!is_task_rq_idle(p))
 				occ++;
 
@@ -9024,6 +9058,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SCHED_CORE
 		rq->core = NULL;
 		rq->core_pick = NULL;
+		rq->core_temp = NULL;
 		rq->core_enabled = 0;
 		rq->core_tree = RB_ROOT;
 		rq->core_forceidle = false;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14a41a243f7b..2b21a3846b8e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1089,6 +1089,7 @@ struct rq {
 	/* per rq */
 	struct rq		*core;
 	struct task_struct	*core_pick;
+	struct task_struct	*core_temp;
 	unsigned int		core_enabled;
 	unsigned int		core_sched_seq;
 	struct rb_root		core_tree;
-- 
2.31.1


Thanks,
Tao

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] sched/core: An optimization of pick_next_task() not sure
  2021-08-17 16:45   ` Tao Zhou
@ 2021-08-18 19:23     ` Tao Zhou
  0 siblings, 0 replies; 5+ messages in thread
From: Tao Zhou @ 2021-08-18 19:23 UTC (permalink / raw)
  To: tao.zhou
  Cc: linux-kernel, peterz, tglx, joel, chris.hyser, joshdon, mingo,
	vincent.guittot, valentin.schneider, mgorman

On Wed, Aug 18, 2021 at 12:45:17AM +0800, Tao Zhou wrote:

> Hi Peter,
> 
> On Mon, Aug 16, 2021 at 08:52:39PM +0200, Peter Zijlstra wrote:
> 
> > On Mon, Aug 16, 2021 at 11:44:01PM +0800, Tao Zhou wrote:
> > > When find a new candidate max, wipe the stale and start over.
> > > Goto again: and use the new max to loop to pick the the task.
> > > 
> > > Here first want to get the max of the core and use this new
> > > max to loop once to pick the task on each thread.
> > > 
> > > Not sure this is an optimization and just stop here a little
> > > and move on..
> > > 
> > 
> > Did you find this retry was an issue on your workload? Or was this from
> > reading the source?
> 
> Thank you for your reply. Sorry for my late reply.
> This was from reading the source..
> 
> > 
> > > ---
> > >  kernel/sched/core.c | 52 +++++++++++++++++----------------------------
> > >  1 file changed, 20 insertions(+), 32 deletions(-)
> > > 
> > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > > index 20ffcc044134..bddcd328df96 100644
> > > --- a/kernel/sched/core.c
> > > +++ b/kernel/sched/core.c
> > > @@ -5403,7 +5403,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> > >  	const struct sched_class *class;
> > >  	const struct cpumask *smt_mask;
> > >  	bool fi_before = false;
> > > -	int i, j, cpu, occ = 0;
> > > +	int i, cpu, occ = 0;
> > >  	bool need_sync;
> > >  
> > >  	if (!sched_core_enabled(rq))
> > > @@ -5508,11 +5508,27 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> > >  	 * order.
> > >  	 */
> > >  	for_each_class(class) {
> > > -again:
> > > +		struct rq *rq_i;
> > > +		struct task_struct *p;
> > > +
> > >  		for_each_cpu_wrap(i, smt_mask, cpu) {
> > > -			struct rq *rq_i = cpu_rq(i);
> > > -			struct task_struct *p;
> > > +			rq_i = cpu_rq(i);
> > > +			p = pick_task(rq_i, class, max, fi_before);
> > > +			/*
> > > +			 * If this new candidate is of higher priority than the
> > > +			 * previous; and they're incompatible; pick_task makes
> > > +			 * sure that p's priority is more than max if it doesn't
> > > +			 * match max's cookie. Update max.
> > > +			 *
> > > +			 * NOTE: this is a linear max-filter and is thus bounded
> > > +			 * in execution time.
> > > +			 */
> > > +			if (!max || !cookie_match(max, p))
> > > +				max = p;
> > > +		}
> > >  
> > > +		for_each_cpu_wrap(i, smt_mask, cpu) {
> > > +			rq_i = cpu_rq(i);
> > >  			if (rq_i->core_pick)
> > >  				continue;
> > >  
> > 
> > This now calls pick_task() twice for each CPU, which seems unfortunate;
> > perhaps add q->core_temp storage to cache that result. Also, since the
> > first iteration is now explicitly about the max filter, perhaps we
> > shouuld move that part of pick_task() into the loop and simplify things
> > further?
> 
> Here is my ugly patch below..
> Not compiled..
> 
> 
> >From b3de16fb6f3e6cd2a8a9f7a579e80df74fb2d865 Mon Sep 17 00:00:00 2001
> From: Tao Zhou <tao.zhou@linux.dev>
> Date: Wed, 18 Aug 2021 00:07:38 +0800
> Subject: [PATCH] optimize pick_next_task()
> 
> ---
>  kernel/sched/core.c  | 71 +++++++++++++++++++++++++++++++++-----------
>  kernel/sched/sched.h |  1 +
>  2 files changed, 54 insertions(+), 18 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 20ffcc044134..c2a403bacf99 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5380,18 +5380,32 @@ pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *ma
>  	if (cookie_equals(class_pick, cookie))
>  		return class_pick;
>  
> -	cookie_pick = sched_core_find(rq, cookie);
> +	return class_pick;
> +}
>  
> -	/*
> -	 * If class > max && class > cookie, it is the highest priority task on
> -	 * the core (so far) and it must be selected, otherwise we must go with
> -	 * the cookie pick in order to satisfy the constraint.
> -	 */
> -	if (prio_less(cookie_pick, class_pick, in_fi) &&
> -	    (!max || prio_less(max, class_pick, in_fi)))
> -		return class_pick;
> +static task_struct *
> +filter_max_prio(struct rq *rq, struct task_struct *class_pick,
> +				struct task_struct **cookie_pick, struct task_struct *max,
> +				bool in_fi)
> +{
> +	unsigned long cookie = rq->core->core_cookie;
>  
> -	return cookie_pick;
> +	*cookie_pick = NULL;
> +	if (cookie && !cookie_equals(class_pick, cookie)) {
> +		*cookie_pick = sched_core_find(rq, cookie);
> +		/*
> +		 * If class > max && class > cookie, it is the
> +		 * highest priority task on the core (so far)
> +		 * and it must be selected, otherwise we must
> +		 * go with the cookie pick in order to satisfy
> +		 * the constraint.
> +		 */
> +		if (prio_less(cookie_pick, class_pick, in_fi) &&
> +		    (!max || prio_less(max, class_pick, in_fi)))
> +			return class_pick;
> +	}
> +
> +	return NULL;
>  }
>  
>  extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
> @@ -5508,24 +5522,44 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>  	 * order.
>  	 */
>  	for_each_class(class) {
> -again:
> +		struct task_struct *class_pick, *cookie_pick;
> +		struct rq *rq_i;
> +
> +		for_each_cpu_wrap(i, smt_mask, cpu) {
> +			rq_i = cpu_rq(i);
> +			class_pick = pick_task(rq_i, class, max, fi_before);
> +			rq_i->core_temp = class_pick;
> +			/*
> +			 * This sibling doesn't yet have a suitable task to
> +			 * run.
> +			 */
> +			if (!class_pick)
> +				continue;
> +
> +			if (filter_max_prio(rq_i, class_pick, &cookie_pick, max, fi_before))
> +				max = class_pick;
> +		}
> +
>  		for_each_cpu_wrap(i, smt_mask, cpu) {
> -			struct rq *rq_i = cpu_rq(i);
>  			struct task_struct *p;
> +			rq_i = cpu_rq(i);
>  
>  			if (rq_i->core_pick)
>  				continue;
>  
>  			/*
> -			 * If this sibling doesn't yet have a suitable task to
> -			 * run; ask for the most eligible task, given the
> -			 * highest priority task already selected for this
> -			 * core.
> +			 * This sibling doesn't yet have a suitable task to
> +			 * run.
>  			 */
> -			p = pick_task(rq_i, class, max, fi_before);
> -			if (!p)
> +			if (!rq_i->core_temp)
>  				continue;
>  
> +			p = class_pick = rq_i->core_temp;
> +			if (!filter_max_prio(rq_i, class_pick, &cookie_pick, max, fi_before)) {
> +				if (cookie_pick)
> +					p = cookie_pick;
> +			}
> +
>  			if (!is_task_rq_idle(p))
>  				occ++;
>  
> @@ -9024,6 +9058,7 @@ void __init sched_init(void)
>  #ifdef CONFIG_SCHED_CORE
>  		rq->core = NULL;
>  		rq->core_pick = NULL;
> +		rq->core_temp = NULL;
>  		rq->core_enabled = 0;
>  		rq->core_tree = RB_ROOT;
>  		rq->core_forceidle = false;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 14a41a243f7b..2b21a3846b8e 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1089,6 +1089,7 @@ struct rq {
>  	/* per rq */
>  	struct rq		*core;
>  	struct task_struct	*core_pick;
> +	struct task_struct	*core_temp;
>  	unsigned int		core_enabled;
>  	unsigned int		core_sched_seq;
>  	struct rb_root		core_tree;
> -- 
> 2.31.1
> 
> 
> Thanks,
> Tao


Based on the above suggestion and the source. Here is another try.
Compiled.


From d8847ff57366c894a9d456bfe25a2bdb1b5f7759 Mon Sep 17 00:00:00 2001
From: Tao Zhou <tao.zhou@linux.dev>
Date: Thu, 19 Aug 2021 03:17:27 +0800
Subject: [PATCH] sched/core: Optimize pick_next_task().

---
 kernel/sched/core.c  | 113 ++++++++++++++++++++++---------------------
 kernel/sched/sched.h |   1 +
 2 files changed, 58 insertions(+), 56 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 20ffcc044134..212647ed2598 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5355,7 +5355,7 @@ static inline bool cookie_match(struct task_struct *a, struct task_struct *b)
 static struct task_struct *
 pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *max, bool in_fi)
 {
-	struct task_struct *class_pick, *cookie_pick;
+	struct task_struct *class_pick;
 	unsigned long cookie = rq->core->core_cookie;
 
 	class_pick = class->pick_task(rq);
@@ -5370,30 +5370,40 @@ pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *ma
 		if (max && class_pick->core_cookie &&
 		    prio_less(class_pick, max, in_fi))
 			return idle_sched_class.pick_task(rq);
-
-		return class_pick;
 	}
 
-	/*
-	 * If class_pick is idle or matches cookie, return early.
-	 */
-	if (cookie_equals(class_pick, cookie))
-		return class_pick;
+	return class_pick;
+}
 
-	cookie_pick = sched_core_find(rq, cookie);
+static struct task_struct *
+filter_max_prio(struct rq *rq, struct task_struct *class_pick, struct task_struct *max, bool in_fi)
+{
+	unsigned long cookie = rq->core->core_cookie;
+	struct task_struct *cookie_pick = NULL;
 
-	/*
-	 * If class > max && class > cookie, it is the highest priority task on
-	 * the core (so far) and it must be selected, otherwise we must go with
-	 * the cookie pick in order to satisfy the constraint.
-	 */
-	if (prio_less(cookie_pick, class_pick, in_fi) &&
-	    (!max || prio_less(max, class_pick, in_fi)))
-		return class_pick;
+	if (cookie) {
+		if (!cookie_equals(class_pick, cookie)) {
+			cookie_pick = sched_core_find(rq, cookie);
+			/*
+			 * If class > max && class > cookie, it is the
+			 * highest priority task on the core (so far)
+			 * and it must be selected, otherwise we must
+			 * go with the cookie pick in order to satisfy
+			 * the constraint.
+			 */
+			if (prio_less(cookie_pick, class_pick, in_fi) &&
+			    (!max || prio_less(max, class_pick, in_fi)))
+				return class_pick;
+			if (!rq->core_temp)
+				swap(rq->core_temp, cookie_pick);
+		} else if (prio_less(max, class_pick, in_fi))
+			return class_pick;
+	}
 
-	return cookie_pick;
+	return NULL;
 }
 
+
 extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
 
 static struct task_struct *
@@ -5403,7 +5413,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	const struct sched_class *class;
 	const struct cpumask *smt_mask;
 	bool fi_before = false;
-	int i, j, cpu, occ = 0;
+	int i, cpu, occ = 0;
 	bool need_sync;
 
 	if (!sched_core_enabled(rq))
@@ -5508,24 +5518,43 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	 * order.
 	 */
 	for_each_class(class) {
-again:
+		struct task_struct *class_pick;
+		struct rq *rq_i;
+
+		for_each_cpu_wrap(i, smt_mask, cpu) {
+			rq_i = cpu_rq(i);
+			class_pick = pick_task(rq_i, class, max, fi_before);
+			rq_i->core_temp = class_pick;
+			/*
+			 * This sibling doesn't yet have a suitable task to run.
+			 */
+			if (!class_pick)
+				continue;
+
+			if (filter_max_prio(rq_i, class_pick, max, fi_before))
+				max = class_pick;
+		}
+
 		for_each_cpu_wrap(i, smt_mask, cpu) {
-			struct rq *rq_i = cpu_rq(i);
 			struct task_struct *p;
+			rq_i = cpu_rq(i);
 
 			if (rq_i->core_pick)
 				continue;
 
 			/*
-			 * If this sibling doesn't yet have a suitable task to
-			 * run; ask for the most eligible task, given the
-			 * highest priority task already selected for this
-			 * core.
+			 * This sibling doesn't yet have a suitable task to run.
 			 */
-			p = pick_task(rq_i, class, max, fi_before);
-			if (!p)
+			if (!rq_i->core_temp)
 				continue;
 
+			p = rq_i->core_temp;
+			class_pick = NULL;
+			swap(rq_i->core_temp, class_pick);
+			if (!filter_max_prio(rq_i, class_pick, max, fi_before))
+				if (rq_i->core_temp)
+					p = rq_i->core_temp;
+
 			if (!is_task_rq_idle(p))
 				occ++;
 
@@ -5535,35 +5564,6 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 				if (!fi_before)
 					rq->core->core_forceidle_seq++;
 			}
-
-			/*
-			 * If this new candidate is of higher priority than the
-			 * previous; and they're incompatible; we need to wipe
-			 * the slate and start over. pick_task makes sure that
-			 * p's priority is more than max if it doesn't match
-			 * max's cookie.
-			 *
-			 * NOTE: this is a linear max-filter and is thus bounded
-			 * in execution time.
-			 */
-			if (!max || !cookie_match(max, p)) {
-				struct task_struct *old_max = max;
-
-				rq->core->core_cookie = p->core_cookie;
-				max = p;
-
-				if (old_max) {
-					rq->core->core_forceidle = false;
-					for_each_cpu(j, smt_mask) {
-						if (j == i)
-							continue;
-
-						cpu_rq(j)->core_pick = NULL;
-					}
-					occ = 1;
-					goto again;
-				}
-			}
 		}
 	}
 
@@ -9024,6 +9024,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SCHED_CORE
 		rq->core = NULL;
 		rq->core_pick = NULL;
+		rq->core_temp = NULL;
 		rq->core_enabled = 0;
 		rq->core_tree = RB_ROOT;
 		rq->core_forceidle = false;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14a41a243f7b..2b21a3846b8e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1089,6 +1089,7 @@ struct rq {
 	/* per rq */
 	struct rq		*core;
 	struct task_struct	*core_pick;
+	struct task_struct	*core_temp;
 	unsigned int		core_enabled;
 	unsigned int		core_sched_seq;
 	struct rb_root		core_tree;
-- 
2.31.1


Thanks,
Tao

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-08-18 19:23 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-16 15:44 [PATCH] sched/core: An optimization of pick_next_task() not sure Tao Zhou
2021-08-16 18:52 ` Peter Zijlstra
2021-08-16 19:02   ` Josh Don
2021-08-17 16:45   ` Tao Zhou
2021-08-18 19:23     ` Tao Zhou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).