LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: LKML <linux-kernel@vger.kernel.org>, Ingo Molnar <mingo@elte.hu>
Cc: Dmitry Adamushko <dmitry.adamushko@gmail.com>,
	Mike Galbraith <efault@gmx.de>,
	Dhaval Giani <dhaval@linux.vnet.ibm.com>,
	Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC/PATCH 14/17] sched: fair: avg_vruntime
Date: Sun, 09 Mar 2008 18:09:04 +0100	[thread overview]
Message-ID: <20080309170929.145446000@chello.nl> (raw)
In-Reply-To: <20080309170850.256853000@chello.nl>

[-- Attachment #1: sched-avg-vruntime.patch --]
[-- Type: text/plain, Size: 4946 bytes --]

In order to implement EEVDF we need to be able to test egibility. 
This requires knowing the current virtual time. We use a property of
fair schedulers to determine this in an numerically stable way, namely
the sum of all lags is 0. Therefore the average of all virtual times
is the position of lag=0.

We can't just take the average of vruntime - as it will use the full range
of its u64 and will wrap around. Instead we'll use the average of
(vruntime - min_vruntime)

\Sum_{i}^{n} 1/n (v_{i} - v) = 1/n (\Sum_{i}^{n} v_{i}) - vn

By factoring out the 1/n (never storing that) we avoid rounding, which
would bring an accumulating error.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched.c       |    5 +++
 kernel/sched_debug.c |    5 +++
 kernel/sched_fair.c  |   76 ++++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 83 insertions(+), 3 deletions(-)

Index: linux-2.6-2/kernel/sched.c
===================================================================
--- linux-2.6-2.orig/kernel/sched.c
+++ linux-2.6-2/kernel/sched.c
@@ -387,6 +387,11 @@ struct cfs_root_rq {
 	struct rb_root tasks_timeline;
 	struct rb_node *rb_leftmost;
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	s64 avg_vruntime;
+	unsigned long nr_queued;
+#endif
+
 #ifdef CONFIG_SCHEDSTATS
 	unsigned long nr_spread_over;
 	u64 exec_clock;
Index: linux-2.6-2/kernel/sched_fair.c
===================================================================
--- linux-2.6-2.orig/kernel/sched_fair.c
+++ linux-2.6-2/kernel/sched_fair.c
@@ -273,6 +273,67 @@ static void __dequeue_timeline(struct cf
 	rb_erase(&se->run_node, &cfs_r_rq->tasks_timeline);
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static inline
+void avg_vruntime_add(struct cfs_root_rq *cfs_r_rq, struct sched_entity *se)
+{
+	s64 key = entity_key(cfs_r_rq, se);
+	cfs_r_rq->avg_vruntime += key;
+	cfs_r_rq->nr_queued++;
+}
+
+static inline
+void avg_vruntime_sub(struct cfs_root_rq *cfs_r_rq, struct sched_entity *se)
+{
+	s64 key = entity_key(cfs_r_rq, se);
+	cfs_r_rq->avg_vruntime -= key;
+	cfs_r_rq->nr_queued--;
+}
+
+static inline
+void avg_vruntime_update(struct cfs_root_rq *cfs_r_rq, s64 delta)
+{
+	cfs_r_rq->avg_vruntime -= cfs_r_rq->nr_queued * delta;
+}
+
+static inline
+s64 avg_vruntime(struct cfs_root_rq *cfs_r_rq)
+{
+	s64 avg = cfs_r_rq->avg_vruntime;
+	int sign = 0;
+
+	if (avg < 0) {
+		sign = 1;
+		avg = -avg;
+	}
+
+	if (cfs_r_rq->nr_queued)
+		do_div(avg, cfs_r_rq->nr_queued);
+
+	if (sign)
+		avg = -avg;
+
+	return avg;
+}
+
+#else /* CONFIG_FAIR_GROUP_SCHED */
+
+static inline
+void avg_vruntime_add(struct cfs_root_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline
+void avg_vruntime_sub(struct cfs_root_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline
+void avg_vruntime_update(struct cfs_root_rq *cfs_rq, s64 delta)
+{
+}
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
 /*
  * Enqueue an entity into the rb-tree:
  */
@@ -286,6 +347,7 @@ static void __enqueue_entity(struct cfs_
 		return;
 
 	__enqueue_timeline(&rq_of(cfs_rq)->cfs_root, se);
+	avg_vruntime_add(&rq_of(cfs_rq)->cfs_root, se);
 }
 
 static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -297,6 +359,7 @@ static void __dequeue_entity(struct cfs_
 		return;
 
 	__dequeue_timeline(&rq_of(cfs_rq)->cfs_root, se);
+	avg_vruntime_sub(&rq_of(cfs_rq)->cfs_root, se);
 }
 
 static inline struct rb_node *first_fair(struct cfs_root_rq *cfs_r_rq)
@@ -464,6 +527,7 @@ static inline
 void __update_root(struct cfs_root_rq *cfs_r_rq, struct sched_entity *curr)
 {
 	u64 vruntime;
+	s64 delta;
 
 	/*
 	 * maintain cfs_r_rq->min_vruntime to be a monotonic increasing
@@ -475,8 +539,14 @@ void __update_root(struct cfs_root_rq *c
 	} else
 		vruntime = curr->vruntime;
 
-	cfs_r_rq->min_vruntime =
-		max_vruntime(cfs_r_rq->min_vruntime, vruntime);
+	/*
+	 * open coded max_vruntime() to allow updating avg_vruntime
+	 */
+	delta = (s64)(vruntime - cfs_r_rq->min_vruntime);
+	if (delta > 0) {
+		avg_vruntime_update(cfs_r_rq, delta);
+		cfs_r_rq->min_vruntime = vruntime;
+	}
 }
 
 /*
@@ -939,7 +1009,7 @@ static void yield_task_fair(struct rq *r
 	/*
 	 * Are we the only task in the tree?
 	 */
-	if (unlikely(rq->load.weight == curr->se.load.weight))
+	if (unlikely(!rq->cfs_root.nr_queued))
 		return;
 
 	if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
Index: linux-2.6-2/kernel/sched_debug.c
===================================================================
--- linux-2.6-2.orig/kernel/sched_debug.c
+++ linux-2.6-2/kernel/sched_debug.c
@@ -136,6 +136,11 @@ void print_cfs_root(struct seq_file *m, 
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
 			SPLIT_NS(spread0));
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "avg_vruntime",
+			SPLIT_NS(avg_vruntime(cfs_r_rq)));
+#endif
+
 #ifdef CONFIG_SCHEDSTATS
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
 			SPLIT_NS(cfs_r_rq->exec_clock));

--


  parent reply	other threads:[~2008-03-09 17:17 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-03-09 17:08 [RFC/PATCH 00/17] Current sched patch stack Peter Zijlstra
2008-03-09 17:08 ` [RFC/PATCH 01/17] sched: mix tasks and groups Peter Zijlstra
2008-04-01 12:12   ` Srivatsa Vaddagiri
2008-04-01 12:05     ` Peter Zijlstra
2008-03-09 17:08 ` [RFC/PATCH 02/17] sched: allow the CFS group scheduler to have multiple levels Peter Zijlstra
2008-03-12 14:47   ` Dhaval Giani
2008-03-09 17:08 ` [RFC/PATCH 03/17] sched: rt-group: full hierarchy support for the rt groups Peter Zijlstra
2008-03-12  8:25   ` Dhaval Giani
     [not found]     ` <661de9470803120129l66497656o948c13c6c0c26766@mail.gmail.com>
2008-03-12  9:29       ` Peter Zijlstra
2008-03-12 13:34         ` Balbir Singh
2008-03-12 13:39           ` Dhaval Giani
2008-03-12 14:08         ` Balbir Singh
2008-03-09 17:08 ` [RFC/PATCH 04/17] sched: fair-group: SMP-nice for group scheduling Peter Zijlstra
2008-03-09 17:08 ` [RFC/PATCH 05/17] sched: rt-group: optimize dequeue_rt_stack Peter Zijlstra
2008-03-09 17:08 ` [RFC/PATCH 06/17] sched: fair-group scheduling vs latency Peter Zijlstra
2008-03-09 17:08 ` [RFC/PATCH 07/17] sched: fair-group: de-couple load-balancing from the rb-trees Peter Zijlstra
2008-03-09 17:08 ` [RFC/PATCH 08/17] sched: fair-group: single RQ approach Peter Zijlstra
2008-03-09 17:08 ` [RFC/PATCH 09/17] sched: fair: remove moved_group() Peter Zijlstra
2008-03-09 17:09 ` [RFC/PATCH 10/17] sched: remove sysctl_sched_batch_wakeup_granularity Peter Zijlstra
2008-03-09 17:09 ` [RFC/PATCH 11/17] sched: fair: optimize sched_slice() Peter Zijlstra
2008-03-09 17:09 ` [RFC/PATCH 12/17] sched: fair: cfs_root_rq Peter Zijlstra
2008-03-09 17:09 ` [RFC/PATCH 13/17] sched: fair: calc_delta_weight() Peter Zijlstra
2008-03-09 17:09 ` Peter Zijlstra [this message]
2008-03-09 17:09 ` [RFC/PATCH 15/17] sched: fair-group: EEVDF scheduling Peter Zijlstra
2008-03-09 17:09 ` [RFC/PATCH 16/17] sched: fair: bound tardiness Peter Zijlstra
2008-03-09 17:09 ` [RFC/PATCH 17/17] sched: fair: optimize the elegebility test Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080309170929.145446000@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=dhaval@linux.vnet.ibm.com \
    --cc=dmitry.adamushko@gmail.com \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=vatsa@linux.vnet.ibm.com \
    --subject='Re: [RFC/PATCH 14/17] sched: fair: avg_vruntime' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).