LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 00/13] Balance RT tasks v5
@ 2007-10-23 16:50 Gregory Haskins
  2007-10-23 16:50 ` [PATCH 01/13] RT: push-rt Gregory Haskins
                   ` (12 more replies)
  0 siblings, 13 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:50 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

This is version 5 of the patch series against 23-rt1.

There have been numerous fixes/tweaks since v4, though we still are based on
the global rto_cpumask logic instead of Steve/Ingo's cpuset logic.  Otherwise,
it's in pretty good shape.

Without the series applied, the following test will fail:

ftp://ftp.novell.com/dev/ghaskins/preempt-test-latest.tar.bz2

After it is applied, it will pass.

NOTE: it appears that the series also introduces wake-latency spikes that
are not present in the baseline code, so this is still "RFC" quality.
However, the baseline scheduler also violates priority order, so its hard to
determine if the numbers translate apples to apples.  These issues are still
under investigation, but I am sharing the series now so that Steven Rostedt
and Darren Hart can have access to my current tree.  The issues appear to be
caused by some other strange scheduling decisions (such as running the idle
thread while we are busy).  TBD 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 01/13] RT: push-rt
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
@ 2007-10-23 16:50 ` Gregory Haskins
  2007-10-23 16:50 ` [PATCH 02/13] RT: Condense the next-task search into one function Gregory Haskins
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:50 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

From: Steven Rostedt <rostedt@goodmis.org>

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---

 kernel/sched.c    |  141 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 kernel/sched_rt.c |   44 +++++++++++++++++
 2 files changed, 178 insertions(+), 7 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 3e75c62..0dabf89 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -304,6 +304,7 @@ struct rq {
 #ifdef CONFIG_PREEMPT_RT
 	unsigned long rt_nr_running;
 	unsigned long rt_nr_uninterruptible;
+	int curr_prio;
 #endif
 
 	unsigned long switch_timestamp;
@@ -1484,6 +1485,123 @@ next_in_queue:
 
 static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
 
+/* Only try this algorithm three times */
+#define RT_PUSH_MAX_TRIES 3
+
+/* Will lock the rq it finds */
+static struct rq *find_lock_lowest_rq(cpumask_t *cpu_mask,
+				      struct task_struct *task,
+				      struct rq *this_rq)
+{
+	struct rq *lowest_rq = NULL;
+	int dst_cpu = -1;
+	int cpu;
+	int tries;
+
+	for (tries = 0; tries < RT_PUSH_MAX_TRIES; tries++) {
+		/*
+		 * Scan each rq for the lowest prio.
+		 */
+		for_each_cpu_mask(cpu, *cpu_mask) {
+			struct rq *rq = &per_cpu(runqueues, cpu);
+
+			if (cpu == smp_processor_id())
+				continue;
+
+			/* We look for lowest RT prio or non-rt CPU */
+			if (rq->curr_prio >= MAX_RT_PRIO) {
+				lowest_rq = rq;
+				dst_cpu = cpu;
+				break;
+			}
+
+			/* no locking for now */
+			if (rq->curr_prio > task->prio &&
+			    (!lowest_rq || rq->curr_prio < lowest_rq->curr_prio)) {
+				lowest_rq = rq;
+				dst_cpu = cpu;
+			}
+		}
+
+		if (!lowest_rq)
+			break;
+
+		/* if the prio of this runqueue changed, try again */
+		if (double_lock_balance(this_rq, lowest_rq)) {
+			/*
+			 * We had to unlock the run queue. In
+			 * the mean time, task could have
+			 * migrated already or had its affinity changed.
+			 */
+			if (unlikely(task_rq(task) != this_rq ||
+				     !cpu_isset(dst_cpu, task->cpus_allowed))) {
+				spin_unlock(&lowest_rq->lock);
+				lowest_rq = NULL;
+				break;
+			}
+
+		}
+
+		/* If this rq is still suitable use it. */
+		if (lowest_rq->curr_prio > task->prio)
+			break;
+
+		/* try again */
+		spin_unlock(&lowest_rq->lock);
+		lowest_rq = NULL;
+	}
+
+	return lowest_rq;
+}
+
+/*
+ * If the current CPU has more than one RT task, see if the non
+ * running task can migrate over to a CPU that is running a task
+ * of lesser priority.
+ */
+static int push_rt_task(struct rq *this_rq)
+{
+	struct task_struct *next_task;
+	struct rq *lowest_rq;
+	int dst_cpu;
+	int ret = 0;
+	cpumask_t cpu_mask;
+
+	assert_spin_locked(&this_rq->lock);
+
+	next_task = rt_next_highest_task(this_rq);
+	if (!next_task)
+		return 0;
+
+	cpus_and(cpu_mask, cpu_online_map, next_task->cpus_allowed);
+
+	/* We might release this_rq lock */
+	get_task_struct(next_task);
+
+	/* find_lock_lowest_rq locks the rq if found */
+	lowest_rq = find_lock_lowest_rq(&cpu_mask, next_task, this_rq);
+	if (!lowest_rq)
+		goto out;
+
+	dst_cpu = lowest_rq->cpu;
+
+	assert_spin_locked(&lowest_rq->lock);
+
+	deactivate_task(this_rq, next_task, 0);
+	set_task_cpu(next_task, dst_cpu);
+	activate_task(lowest_rq, next_task, 0);
+
+	resched_task(lowest_rq->curr);
+
+	spin_unlock(&lowest_rq->lock);
+
+	ret = 1;
+out:
+	put_task_struct(next_task);
+
+	return ret;
+}
+
 /*
  * Pull RT tasks from other CPUs in the RT-overload
  * case. Interrupts are disabled, local rq is locked.
@@ -2202,19 +2320,28 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	 * be dropped twice.
 	 *		Manfred Spraul <manfred@colorfullife.com>
 	 */
+	prev_state = prev->state;
+	_finish_arch_switch(prev);
+#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
+	rq->curr_prio = current->prio;
+#endif
+	finish_lock_switch(rq, prev);
 #if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
 	/*
 	 * If we pushed an RT task off the runqueue,
-	 * then kick other CPUs, they might run it:
+	 * then kick other CPUs, they might run it.
+	 * Note we may release the rq lock, and since
+	 * the lock was owned by prev, we need to release it
+	 * first via finish_lock_switch and then reaquire it.
 	 */
-	if (unlikely(rt_task(current) && rq->rt_nr_running > 1)) {
-		schedstat_inc(rq, rto_schedule);
-		smp_send_reschedule_allbutself_cpumask(current->cpus_allowed);
+	if (unlikely(rt_task(current))) {
+		spin_lock(&rq->lock);
+		/* push_rt_task will return true if it moved an RT */
+		while (push_rt_task(rq))
+			;
+		spin_unlock(&rq->lock);
 	}
 #endif
-	prev_state = prev->state;
-	_finish_arch_switch(prev);
-	finish_lock_switch(rq, prev);
 	fire_sched_in_preempt_notifiers(current);
 	trace_stop_sched_switched(current);
 	/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 369827b..8d59e62 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -96,6 +96,50 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 	return next;
 }
 
+#ifdef CONFIG_PREEMPT_RT
+static struct task_struct *rt_next_highest_task(struct rq *rq)
+{
+	struct rt_prio_array *array = &rq->rt.active;
+	struct task_struct *next;
+	struct list_head *queue;
+	int idx;
+
+	if (likely (rq->rt_nr_running < 2))
+		return NULL;
+
+	idx = sched_find_first_bit(array->bitmap);
+	if (idx >= MAX_RT_PRIO) {
+		WARN_ON(1); /* rt_nr_running is bad */
+		return NULL;
+	}
+
+	queue = array->queue + idx;
+	next = list_entry(queue->next, struct task_struct, run_list);
+	if (unlikely(next != current))
+		return next;
+
+	if (queue->next->next != queue) {
+		/* same prio task */
+		next = list_entry(queue->next->next, struct task_struct, run_list);
+		goto out;
+	}
+
+	/* slower, but more flexible */
+	idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
+	if (idx >= MAX_RT_PRIO) {
+		WARN_ON(1); /* rt_nr_running was 2 and above! */
+		return NULL;
+	}
+
+	queue = array->queue + idx;
+	next = list_entry(queue->next, struct task_struct, run_list);
+
+ out:
+	return next;
+
+}
+#endif /* CONFIG_PREEMPT_RT */
+
 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 {
 	update_curr_rt(rq);


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 02/13] RT: Condense the next-task search into one function
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
  2007-10-23 16:50 ` [PATCH 01/13] RT: push-rt Gregory Haskins
@ 2007-10-23 16:50 ` Gregory Haskins
  2007-10-23 16:50 ` [PATCH 03/13] RT: Add a per-cpu rt_overload indication Gregory Haskins
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:50 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

We inadvertently added a redundant function, so clean it up

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 kernel/sched.c    |    9 +++++----
 kernel/sched_rt.c |   44 --------------------------------------------
 2 files changed, 5 insertions(+), 48 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 0dabf89..daeb8ed 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1471,9 +1471,10 @@ next_in_queue:
 	 * Return the highest-prio non-running RT task (if task
 	 * may run on this CPU):
 	 */
-	if (!task_running(src_rq, tmp) &&
-				cpu_isset(this_cpu, tmp->cpus_allowed))
-		return tmp;
+	if (!task_running(src_rq, tmp)) {
+		if ((this_cpu == -1) || cpu_isset(this_cpu, tmp->cpus_allowed))
+			return tmp;
+	}
 
 	curr = curr->next;
 	if (curr != head)
@@ -1569,7 +1570,7 @@ static int push_rt_task(struct rq *this_rq)
 
 	assert_spin_locked(&this_rq->lock);
 
-	next_task = rt_next_highest_task(this_rq);
+	next_task = pick_rt_task(this_rq, -1);
 	if (!next_task)
 		return 0;
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 8d59e62..369827b 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -96,50 +96,6 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 	return next;
 }
 
-#ifdef CONFIG_PREEMPT_RT
-static struct task_struct *rt_next_highest_task(struct rq *rq)
-{
-	struct rt_prio_array *array = &rq->rt.active;
-	struct task_struct *next;
-	struct list_head *queue;
-	int idx;
-
-	if (likely (rq->rt_nr_running < 2))
-		return NULL;
-
-	idx = sched_find_first_bit(array->bitmap);
-	if (idx >= MAX_RT_PRIO) {
-		WARN_ON(1); /* rt_nr_running is bad */
-		return NULL;
-	}
-
-	queue = array->queue + idx;
-	next = list_entry(queue->next, struct task_struct, run_list);
-	if (unlikely(next != current))
-		return next;
-
-	if (queue->next->next != queue) {
-		/* same prio task */
-		next = list_entry(queue->next->next, struct task_struct, run_list);
-		goto out;
-	}
-
-	/* slower, but more flexible */
-	idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
-	if (idx >= MAX_RT_PRIO) {
-		WARN_ON(1); /* rt_nr_running was 2 and above! */
-		return NULL;
-	}
-
-	queue = array->queue + idx;
-	next = list_entry(queue->next, struct task_struct, run_list);
-
- out:
-	return next;
-
-}
-#endif /* CONFIG_PREEMPT_RT */
-
 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 {
 	update_curr_rt(rq);


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 03/13] RT: Add a per-cpu rt_overload indication
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
  2007-10-23 16:50 ` [PATCH 01/13] RT: push-rt Gregory Haskins
  2007-10-23 16:50 ` [PATCH 02/13] RT: Condense the next-task search into one function Gregory Haskins
@ 2007-10-23 16:50 ` Gregory Haskins
  2007-10-23 16:50 ` [PATCH 04/13] RT: Wrap the RQ notion of priority to make it conditional Gregory Haskins
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:50 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

The system currently evaluates all online CPUs whenever one or more enters
an rt_overload condition.  This suffers from scalability limitations as
the # of online CPUs increases.  So we introduce a cpumask to track
exactly which CPUs need RT balancing.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
CC: Peter W. Morreale <pmorreale@novell.com>
---

 kernel/sched.c |   12 +++++++++---
 1 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index daeb8ed..e22eec7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -632,6 +632,7 @@ static inline struct rq *this_rq_lock(void)
 
 #if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
 static __cacheline_aligned_in_smp atomic_t rt_overload;
+static cpumask_t rto_cpus;
 #endif
 
 static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
@@ -640,8 +641,11 @@ static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
 	if (rt_task(p)) {
 		rq->rt_nr_running++;
 # ifdef CONFIG_SMP
-		if (rq->rt_nr_running == 2)
+		if (rq->rt_nr_running == 2) {
+			cpu_set(rq->cpu, rto_cpus);
+			smp_wmb();
 			atomic_inc(&rt_overload);
+		}
 # endif
 	}
 #endif
@@ -654,8 +658,10 @@ static inline void dec_rt_tasks(struct task_struct *p, struct rq *rq)
 		WARN_ON(!rq->rt_nr_running);
 		rq->rt_nr_running--;
 # ifdef CONFIG_SMP
-		if (rq->rt_nr_running == 1)
+		if (rq->rt_nr_running == 1) {
 			atomic_dec(&rt_overload);
+			cpu_clear(rq->cpu, rto_cpus);
+		}
 # endif
 	}
 #endif
@@ -1622,7 +1628,7 @@ static void balance_rt_tasks(struct rq *this_rq, int this_cpu)
 	 */
 	next = pick_next_task(this_rq, this_rq->curr);
 
-	for_each_online_cpu(cpu) {
+	for_each_cpu_mask(cpu, rto_cpus) {
 		if (cpu == this_cpu)
 			continue;
 		src_rq = cpu_rq(cpu);


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 04/13] RT: Wrap the RQ notion of priority to make it conditional
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
                   ` (2 preceding siblings ...)
  2007-10-23 16:50 ` [PATCH 03/13] RT: Add a per-cpu rt_overload indication Gregory Haskins
@ 2007-10-23 16:50 ` Gregory Haskins
  2007-10-23 16:50 ` [PATCH 05/13] RT: Initialize the priority value Gregory Haskins
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:50 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

A little cleanup to avoid #ifdef proliferation later in the series

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 kernel/sched.c |   16 +++++++++++++---
 1 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index e22eec7..dfd0b92 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -365,6 +365,16 @@ struct rq {
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 static DEFINE_MUTEX(sched_hotcpu_mutex);
 
+#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
+static inline void set_rq_prio(struct rq *rq, int prio)
+{
+	rq->curr_prio = prio;
+}
+
+#else
+#define set_rq_prio(rq, prio) do { } while(0)
+#endif
+
 static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
 {
 	rq->curr->sched_class->check_preempt_curr(rq, p);
@@ -2329,9 +2339,9 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	 */
 	prev_state = prev->state;
 	_finish_arch_switch(prev);
-#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
-	rq->curr_prio = current->prio;
-#endif
+
+	set_rq_prio(rq, current->prio);
+
 	finish_lock_switch(rq, prev);
 #if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
 	/*


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 05/13] RT: Initialize the priority value
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
                   ` (3 preceding siblings ...)
  2007-10-23 16:50 ` [PATCH 04/13] RT: Wrap the RQ notion of priority to make it conditional Gregory Haskins
@ 2007-10-23 16:50 ` Gregory Haskins
  2007-10-23 16:50 ` [PATCH 06/13] RT: Maintain the highest RQ priority Gregory Haskins
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:50 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

We should init the base value of the current RQ priority to "IDLE"

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 kernel/sched.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index dfd0b92..7c4fba8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7386,6 +7386,8 @@ void __init sched_init(void)
 		highest_cpu = i;
 		/* delimiter for bitsearch: */
 		__set_bit(MAX_RT_PRIO, array->bitmap);
+
+		set_rq_prio(rq, MAX_PRIO);
 	}
 
 	set_load_weight(&init_task);


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 06/13] RT: Maintain the highest RQ priority
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
                   ` (4 preceding siblings ...)
  2007-10-23 16:50 ` [PATCH 05/13] RT: Initialize the priority value Gregory Haskins
@ 2007-10-23 16:50 ` Gregory Haskins
  2007-10-23 16:50 ` [PATCH 07/13] RT: Clean up some of the push-rt logic Gregory Haskins
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:50 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

This is an implementation of Steve's idea where we should update the RQ
concept of priority to show the highest-task, even if that task is not (yet)
running.  This prevents us from pushing multiple tasks to the RQ before it
gets a chance to reschedule.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 kernel/sched.c |   34 +++++++++++++++++++++++++---------
 1 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 7c4fba8..c17e2e4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -304,7 +304,7 @@ struct rq {
 #ifdef CONFIG_PREEMPT_RT
 	unsigned long rt_nr_running;
 	unsigned long rt_nr_uninterruptible;
-	int curr_prio;
+	int highest_prio;
 #endif
 
 	unsigned long switch_timestamp;
@@ -368,11 +368,20 @@ static DEFINE_MUTEX(sched_hotcpu_mutex);
 #if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
 static inline void set_rq_prio(struct rq *rq, int prio)
 {
-	rq->curr_prio = prio;
+	rq->highest_prio = prio;
+}
+
+static inline void update_rq_prio(struct rq *rq)
+{
+	struct rt_prio_array *array = &rq->rt.active;
+	int                   prio  = sched_find_first_bit(array->bitmap);
+
+	set_rq_prio(rq, prio);
 }
 
 #else
 #define set_rq_prio(rq, prio) do { } while(0)
+#define update_rq_prio(rq)    do { } while(0)
 #endif
 
 static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
@@ -1023,12 +1032,14 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
 	sched_info_queued(p);
 	p->sched_class->enqueue_task(rq, p, wakeup);
 	p->se.on_rq = 1;
+	update_rq_prio(rq);
 }
 
 static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
 {
 	p->sched_class->dequeue_task(rq, p, sleep);
 	p->se.on_rq = 0;
+	update_rq_prio(rq);
 }
 
 /*
@@ -1526,15 +1537,15 @@ static struct rq *find_lock_lowest_rq(cpumask_t *cpu_mask,
 				continue;
 
 			/* We look for lowest RT prio or non-rt CPU */
-			if (rq->curr_prio >= MAX_RT_PRIO) {
+			if (rq->highest_prio >= MAX_RT_PRIO) {
 				lowest_rq = rq;
 				dst_cpu = cpu;
 				break;
 			}
 
 			/* no locking for now */
-			if (rq->curr_prio > task->prio &&
-			    (!lowest_rq || rq->curr_prio < lowest_rq->curr_prio)) {
+			if (rq->highest_prio > task->prio &&
+			    (!lowest_rq || rq->highest_prio < lowest_rq->highest_prio)) {
 				lowest_rq = rq;
 				dst_cpu = cpu;
 			}
@@ -1560,7 +1571,7 @@ static struct rq *find_lock_lowest_rq(cpumask_t *cpu_mask,
 		}
 
 		/* If this rq is still suitable use it. */
-		if (lowest_rq->curr_prio > task->prio)
+		if (lowest_rq->highest_prio > task->prio)
 			break;
 
 		/* try again */
@@ -2339,10 +2350,8 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	 */
 	prev_state = prev->state;
 	_finish_arch_switch(prev);
-
-	set_rq_prio(rq, current->prio);
-
 	finish_lock_switch(rq, prev);
+
 #if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
 	/*
 	 * If we pushed an RT task off the runqueue,
@@ -4647,6 +4656,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	prev_resched = _need_resched();
 
 	if (on_rq) {
+		/*
+		 * Note: RQ priority gets updated in the enqueue/dequeue logic
+		 */
 		enqueue_task(rq, p, 0);
 		/*
 		 * Reschedule if we are currently running on this runqueue and
@@ -4713,6 +4725,10 @@ void set_user_nice(struct task_struct *p, long nice)
 		 */
 		if (delta < 0 || (delta > 0 && task_running(rq, p)))
 			resched_task(rq->curr);
+
+		/*
+		 * Note: RQ priority gets updated in the enqueue/dequeue logic
+		 */
 	}
 out_unlock:
 	task_rq_unlock(rq, &flags);


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 07/13] RT: Clean up some of the push-rt logic
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
                   ` (5 preceding siblings ...)
  2007-10-23 16:50 ` [PATCH 06/13] RT: Maintain the highest RQ priority Gregory Haskins
@ 2007-10-23 16:50 ` Gregory Haskins
  2007-10-23 16:51 ` [PATCH 08/13] RT: Add support for low-priority wake-up to push_rt feature Gregory Haskins
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:50 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

Get rid of the superfluous dst_cpu, and move the cpu_mask inside the search
function.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 kernel/sched.c |   18 +++++++-----------
 1 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index c17e2e4..0600062 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1517,20 +1517,21 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
 #define RT_PUSH_MAX_TRIES 3
 
 /* Will lock the rq it finds */
-static struct rq *find_lock_lowest_rq(cpumask_t *cpu_mask,
-				      struct task_struct *task,
+static struct rq *find_lock_lowest_rq(struct task_struct *task,
 				      struct rq *this_rq)
 {
 	struct rq *lowest_rq = NULL;
-	int dst_cpu = -1;
 	int cpu;
 	int tries;
+	cpumask_t cpu_mask;
+
+	cpus_and(cpu_mask, cpu_online_map, task->cpus_allowed);
 
 	for (tries = 0; tries < RT_PUSH_MAX_TRIES; tries++) {
 		/*
 		 * Scan each rq for the lowest prio.
 		 */
-		for_each_cpu_mask(cpu, *cpu_mask) {
+		for_each_cpu_mask(cpu, cpu_mask) {
 			struct rq *rq = &per_cpu(runqueues, cpu);
 
 			if (cpu == smp_processor_id())
@@ -1539,7 +1540,6 @@ static struct rq *find_lock_lowest_rq(cpumask_t *cpu_mask,
 			/* We look for lowest RT prio or non-rt CPU */
 			if (rq->highest_prio >= MAX_RT_PRIO) {
 				lowest_rq = rq;
-				dst_cpu = cpu;
 				break;
 			}
 
@@ -1547,7 +1547,6 @@ static struct rq *find_lock_lowest_rq(cpumask_t *cpu_mask,
 			if (rq->highest_prio > task->prio &&
 			    (!lowest_rq || rq->highest_prio < lowest_rq->highest_prio)) {
 				lowest_rq = rq;
-				dst_cpu = cpu;
 			}
 		}
 
@@ -1562,7 +1561,7 @@ static struct rq *find_lock_lowest_rq(cpumask_t *cpu_mask,
 			 * migrated already or had its affinity changed.
 			 */
 			if (unlikely(task_rq(task) != this_rq ||
-				     !cpu_isset(dst_cpu, task->cpus_allowed))) {
+				     !cpu_isset(lowest_rq->cpu, task->cpus_allowed))) {
 				spin_unlock(&lowest_rq->lock);
 				lowest_rq = NULL;
 				break;
@@ -1593,7 +1592,6 @@ static int push_rt_task(struct rq *this_rq)
 	struct rq *lowest_rq;
 	int dst_cpu;
 	int ret = 0;
-	cpumask_t cpu_mask;
 
 	assert_spin_locked(&this_rq->lock);
 
@@ -1601,13 +1599,11 @@ static int push_rt_task(struct rq *this_rq)
 	if (!next_task)
 		return 0;
 
-	cpus_and(cpu_mask, cpu_online_map, next_task->cpus_allowed);
-
 	/* We might release this_rq lock */
 	get_task_struct(next_task);
 
 	/* find_lock_lowest_rq locks the rq if found */
-	lowest_rq = find_lock_lowest_rq(&cpu_mask, next_task, this_rq);
+	lowest_rq = find_lock_lowest_rq(next_task, this_rq);
 	if (!lowest_rq)
 		goto out;
 


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 08/13] RT: Add support for low-priority wake-up to push_rt feature
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
                   ` (6 preceding siblings ...)
  2007-10-23 16:50 ` [PATCH 07/13] RT: Clean up some of the push-rt logic Gregory Haskins
@ 2007-10-23 16:51 ` Gregory Haskins
  2007-10-23 16:51 ` [PATCH 09/13] RT: Only dirty a cacheline if the priority is actually changing Gregory Haskins
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:51 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

There are three events that require consideration for redistributing RT
tasks:

1) When one or more higher-priority tasks preempts a lower-one from a
   RQ
2) When a lower-priority task is woken up on a RQ
3) When a RQ downgrades its current priority

Steve Rostedt's push_rt patch addresses (1).  It hooks in right after
a new task has been switched-in.  If this was the result of an RT
preemption, or if more than one task was awoken at the same time, we
can try to push some of those other tasks away.

This patch addresses (2).  When we wake up a task, we check to see
if it would preempt the current task on the queue.  If it will not, we
attempt to find a better suited CPU (e.g. one running something lower
priority than the task being woken) and try to activate the task there.

Finally, we have (3).  In theory, we only need to balance_rt_tasks() if
the following conditions are met:
   1) One or more CPUs are in overload, AND
   2) We are about to switch to a task that lowers our priority.

(3) will be addressed in a later patch.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 kernel/sched.c |  109 ++++++++++++++++++++++++++++++++------------------------
 1 files changed, 62 insertions(+), 47 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 0600062..e536142 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1626,6 +1626,13 @@ out:
 	return ret;
 }
 
+/* Push all tasks that we can to other CPUs */
+static void push_rt_tasks(struct rq *this_rq)
+{
+	while (push_rt_task(this_rq))
+		;
+}
+
 /*
  * Pull RT tasks from other CPUs in the RT-overload
  * case. Interrupts are disabled, local rq is locked.
@@ -1986,6 +1993,46 @@ out_set_cpu:
 		this_cpu = smp_processor_id();
 		cpu = task_cpu(p);
 	}
+	
+#if defined(CONFIG_PREEMPT_RT)
+       /*
+        * If a newly woken up RT task will not run immediately on its affined
+        * RQ, try to find another CPU it can preempt:
+        */
+	if (rt_task(p) && (p->prio > rq->highest_prio)) {
+		struct rq *lowest_rq = find_lock_lowest_rq(p, rq);
+
+		if (lowest_rq) {
+			/*
+			 * We may have dropped this_rq->lock, so check to be
+			 * sure we are still eligible to wake up this task
+			 * somewhere...
+			 *
+			 * Basically the task could already be running on this
+			 * RQ, or it could have already migrated away to a
+			 * different RQ. 
+			 */
+			if (!task_running(rq, p) && (task_rq(p) == rq)) {
+				set_task_cpu(p, lowest_rq->cpu);
+				spin_unlock(&rq->lock);
+
+				/*
+				 * The new lock was already acquired in
+				 * find_lowest
+				 */ 
+				rq  = lowest_rq;
+				cpu = task_cpu(p);
+			} else
+				spin_unlock(&lowest_rq->lock);
+		}
+
+		old_state = p->state;
+		if (!(old_state & state))
+			goto out;
+		if (p->se.on_rq)
+			goto out_running;
+	}
+#endif /* defined(CONFIG_PREEMPT_RT) */
 
 out_activate:
 #endif /* CONFIG_SMP */
@@ -1995,51 +2042,20 @@ out_activate:
 	trace_start_sched_wakeup(p, rq);
 
 	/*
-	 * If a newly woken up RT task cannot preempt the
-	 * current (RT) task (on a target runqueue) then try
-	 * to find another CPU it can preempt:
+	 * Sync wakeups (i.e. those types of wakeups where the waker
+	 * has indicated that it will leave the CPU in short order)
+	 * don't trigger a preemption, if the woken up task will run on
+	 * this cpu. (in this case the 'I will reschedule' promise of
+	 * the waker guarantees that the freshly woken up task is going
+	 * to be considered on this CPU.)
 	 */
-	if (rt_task(p) && !TASK_PREEMPTS_CURR(p, rq)) {
-		struct rq *this_rq = cpu_rq(this_cpu);
-		/*
-		 * Special-case: the task on this CPU can be
-		 * preempted. In that case there's no need to
-		 * trigger reschedules on other CPUs, we can
-		 * mark the current task for reschedule.
-		 *
-		 * (Note that it's safe to access this_rq without
-		 * extra locking in this particular case, because
-		 * we are on the current CPU.)
-		 */
-		if (TASK_PREEMPTS_CURR(p, this_rq))
-			set_tsk_need_resched(this_rq->curr);
-		else
-			/*
-			 * Neither the intended target runqueue
-			 * nor the current CPU can take this task.
-			 * Trigger a reschedule on all other CPUs
-			 * nevertheless, maybe one of them can take
-			 * this task:
-			 */
-			smp_send_reschedule_allbutself_cpumask(p->cpus_allowed);
-
-		schedstat_inc(this_rq, rto_wakeup);
-	} else {
-		/*
-		 * Sync wakeups (i.e. those types of wakeups where the waker
-		 * has indicated that it will leave the CPU in short order)
-		 * don't trigger a preemption, if the woken up task will run on
-		 * this cpu. (in this case the 'I will reschedule' promise of
-		 * the waker guarantees that the freshly woken up task is going
-		 * to be considered on this CPU.)
-		 */
-		if (!sync || cpu != this_cpu)
-			check_preempt_curr(rq, p);
-		else {
-			if (TASK_PREEMPTS_CURR(p, rq))
-				set_tsk_need_resched_delayed(rq->curr);
-		}
+	if (!sync || cpu != this_cpu)
+		check_preempt_curr(rq, p);
+	else {
+		if (TASK_PREEMPTS_CURR(p, rq))
+			set_tsk_need_resched_delayed(rq->curr);
 	}
+
 	if (rq->curr && p && rq && _need_resched())
 		trace_special_pid(p->pid, PRIO(p), PRIO(rq->curr));
 
@@ -2356,13 +2372,12 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	 * the lock was owned by prev, we need to release it
 	 * first via finish_lock_switch and then reaquire it.
 	 */
-	if (unlikely(rt_task(current))) {
+	if (unlikely(rq->rt_nr_running > 1)) {
 		spin_lock(&rq->lock);
-		/* push_rt_task will return true if it moved an RT */
-		while (push_rt_task(rq))
-			;
+		push_rt_tasks(rq);
 		spin_unlock(&rq->lock);
 	}
+
 #endif
 	fire_sched_in_preempt_notifiers(current);
 	trace_stop_sched_switched(current);


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 09/13] RT: Only dirty a cacheline if the priority is actually changing
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
                   ` (7 preceding siblings ...)
  2007-10-23 16:51 ` [PATCH 08/13] RT: Add support for low-priority wake-up to push_rt feature Gregory Haskins
@ 2007-10-23 16:51 ` Gregory Haskins
  2007-10-23 16:51 ` [PATCH 10/13] RT: Fixes for push-rt patch Gregory Haskins
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:51 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

We can avoid dirtying a rq related cacheline with a simple check, so why not.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 kernel/sched.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index e536142..1058a1f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -376,7 +376,8 @@ static inline void update_rq_prio(struct rq *rq)
 	struct rt_prio_array *array = &rq->rt.active;
 	int                   prio  = sched_find_first_bit(array->bitmap);
 
-	set_rq_prio(rq, prio);
+	if (rq->highest_prio != prio)
+		set_rq_prio(rq, prio);
 }
 
 #else


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 10/13] RT: Fixes for push-rt patch
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
                   ` (8 preceding siblings ...)
  2007-10-23 16:51 ` [PATCH 09/13] RT: Only dirty a cacheline if the priority is actually changing Gregory Haskins
@ 2007-10-23 16:51 ` Gregory Haskins
  2007-10-23 16:51 ` [PATCH 11/13] RT: Condense NORMAL and IDLE priorities Gregory Haskins
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:51 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

From: Steven Rostedt <rostedt@goodmis.org>

Steve found these errors in the original patch

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 kernel/sched.c    |   15 ++++++++-
 kernel/sched_rt.c |   90 +----------------------------------------------------
 2 files changed, 15 insertions(+), 90 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 1058a1f..a1f1d92 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1535,7 +1535,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task,
 		for_each_cpu_mask(cpu, cpu_mask) {
 			struct rq *rq = &per_cpu(runqueues, cpu);
 
-			if (cpu == smp_processor_id())
+			if (cpu == this_rq->cpu)
 				continue;
 
 			/* We look for lowest RT prio or non-rt CPU */
@@ -1561,7 +1561,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task,
 			 * the mean time, task could have
 			 * migrated already or had its affinity changed.
 			 */
-			if (unlikely(task_rq(task) != this_rq ||
+			if (unlikely(task_running(this_rq, task) ||
+				     task_rq(task) != this_rq ||
 				     !cpu_isset(lowest_rq->cpu, task->cpus_allowed))) {
 				spin_unlock(&lowest_rq->lock);
 				lowest_rq = NULL;
@@ -2380,6 +2381,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	}
 
 #endif
+
 	fire_sched_in_preempt_notifiers(current);
 	trace_stop_sched_switched(current);
 	/*
@@ -4102,6 +4104,15 @@ asmlinkage void __sched __schedule(void)
 		context_switch(rq, prev, next); /* unlocks the rq */
 		__preempt_enable_no_resched();
 	} else {
+#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
+		/*
+		 * If we hit the condition where we do not need to actually
+		 * reschedule, we need to check if there are any tasks that
+		 * should be pushed away
+		 */
+		if (unlikely(rq->rt_nr_running > 1))
+			push_rt_tasks(rq);
+#endif
 		__preempt_enable_no_resched();
 		spin_unlock(&rq->lock);
 		trace_stop_sched_switched(next);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 369827b..9b677c1 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -102,100 +102,14 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 	p->se.exec_start = 0;
 }
 
-/*
- * Load-balancing iterator. Note: while the runqueue stays locked
- * during the whole iteration, the current task might be
- * dequeued so the iterator has to be dequeue-safe. Here we
- * achieve that by always pre-iterating before returning
- * the current task:
- */
-static struct task_struct *load_balance_start_rt(void *arg)
-{
-	struct rq *rq = arg;
-	struct rt_prio_array *array = &rq->rt.active;
-	struct list_head *head, *curr;
-	struct task_struct *p;
-	int idx;
-
-	idx = sched_find_first_bit(array->bitmap);
-	if (idx >= MAX_RT_PRIO)
-		return NULL;
-
-	head = array->queue + idx;
-	curr = head->prev;
-
-	p = list_entry(curr, struct task_struct, run_list);
-
-	curr = curr->prev;
-
-	rq->rt.rt_load_balance_idx = idx;
-	rq->rt.rt_load_balance_head = head;
-	rq->rt.rt_load_balance_curr = curr;
-
-	return p;
-}
-
-static struct task_struct *load_balance_next_rt(void *arg)
-{
-	struct rq *rq = arg;
-	struct rt_prio_array *array = &rq->rt.active;
-	struct list_head *head, *curr;
-	struct task_struct *p;
-	int idx;
-
-	idx = rq->rt.rt_load_balance_idx;
-	head = rq->rt.rt_load_balance_head;
-	curr = rq->rt.rt_load_balance_curr;
-
-	/*
-	 * If we arrived back to the head again then
-	 * iterate to the next queue (if any):
-	 */
-	if (unlikely(head == curr)) {
-		int next_idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
-
-		if (next_idx >= MAX_RT_PRIO)
-			return NULL;
-
-		idx = next_idx;
-		head = array->queue + idx;
-		curr = head->prev;
-
-		rq->rt.rt_load_balance_idx = idx;
-		rq->rt.rt_load_balance_head = head;
-	}
-
-	p = list_entry(curr, struct task_struct, run_list);
-
-	curr = curr->prev;
-
-	rq->rt.rt_load_balance_curr = curr;
-
-	return p;
-}
-
 static unsigned long
 load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
 			unsigned long max_nr_move, unsigned long max_load_move,
 			struct sched_domain *sd, enum cpu_idle_type idle,
 			int *all_pinned, int *this_best_prio)
 {
-	int nr_moved;
-	struct rq_iterator rt_rq_iterator;
-	unsigned long load_moved;
-
-	rt_rq_iterator.start = load_balance_start_rt;
-	rt_rq_iterator.next = load_balance_next_rt;
-	/* pass 'busiest' rq argument into
-	 * load_balance_[start|next]_rt iterators
-	 */
-	rt_rq_iterator.arg = busiest;
-
-	nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
-			max_load_move, sd, idle, all_pinned, &load_moved,
-			this_best_prio, &rt_rq_iterator);
-
-	return load_moved;
+	/* No passive migration for RT */
+	return 0;
 }
 
 static void task_tick_rt(struct rq *rq, struct task_struct *p)


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 11/13] RT: Condense NORMAL and IDLE priorities
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
                   ` (9 preceding siblings ...)
  2007-10-23 16:51 ` [PATCH 10/13] RT: Fixes for push-rt patch Gregory Haskins
@ 2007-10-23 16:51 ` Gregory Haskins
  2007-10-23 16:51 ` [PATCH 12/13] RT: CPU priority management Gregory Haskins
  2007-10-23 16:51 ` [PATCH 13/13] RT: Cache cpus_allowed weight for optimizing migration Gregory Haskins
  12 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:51 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

We only need to track if the CPU is in a non-RT state, as opposed to its
priority within the non-RT state.  So simplify setting in the effort of
reducing cache-thrash.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 kernel/sched.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index a1f1d92..4abe738 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -371,11 +371,19 @@ static inline void set_rq_prio(struct rq *rq, int prio)
 	rq->highest_prio = prio;
 }
 
+/*
+ * We dont care what the exact normal priority is.  We only care about
+ * RT-priority, vs non-RT (normal or idle).  So flatten the priority if its a
+ * non-RT variety. This will reduce cache-thrashing on the rq->highest_prio.
+ */
 static inline void update_rq_prio(struct rq *rq)
 {
 	struct rt_prio_array *array = &rq->rt.active;
 	int                   prio  = sched_find_first_bit(array->bitmap);
 
+	if ((prio != MAX_PRIO) && (prio > MAX_RT_PRIO))
+		prio = MAX_RT_PRIO;
+
 	if (rq->highest_prio != prio)
 		set_rq_prio(rq, prio);
 }


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 12/13] RT: CPU priority management
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
                   ` (10 preceding siblings ...)
  2007-10-23 16:51 ` [PATCH 11/13] RT: Condense NORMAL and IDLE priorities Gregory Haskins
@ 2007-10-23 16:51 ` Gregory Haskins
  2007-10-23 16:51 ` [PATCH 13/13] RT: Cache cpus_allowed weight for optimizing migration Gregory Haskins
  12 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:51 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

This code tracks the priority of each CPU so that global migration
  decisions are easy to calculate.  Each CPU can be in a state as follows:

                 (INVALID), IDLE, NORMAL, RT1, ... RT99

  going from the lowest priority to the highest.  CPUs in the INVALID state
  are not eligible for routing.  The system maintains this state with
  a 2 dimensional bitmap (the first for priority class, the second for cpus
  in that class).  Therefore a typical application without affinity
  restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
  searches).  For tasks with affinity restrictions, the algorithm has a
  worst case complexity of O(min(102, NR_CPUS)), though the scenario that
  yields the worst case search is fairly contrived.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 kernel/Makefile       |    2 
 kernel/sched.c        |   37 +++------
 kernel/sched_cpupri.c |  200 +++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched_cpupri.h |   10 ++
 4 files changed, 222 insertions(+), 27 deletions(-)

diff --git a/kernel/Makefile b/kernel/Makefile
index e4e2acf..d9d1351 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o \
 	    hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
-	    utsname.o
+	    utsname.o sched_cpupri.o
 
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
diff --git a/kernel/sched.c b/kernel/sched.c
index 4abe738..bdb6be0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -67,6 +67,8 @@
 
 #include <asm/tlb.h>
 
+#include "sched_cpupri.h"
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  * This is default implementation.
@@ -384,8 +386,10 @@ static inline void update_rq_prio(struct rq *rq)
 	if ((prio != MAX_PRIO) && (prio > MAX_RT_PRIO))
 		prio = MAX_RT_PRIO;
 
-	if (rq->highest_prio != prio)
+	if (rq->highest_prio != prio) {
+		cpupri_set(rq->cpu, prio);
 		set_rq_prio(rq, prio);
+	}
 }
 
 #else
@@ -1532,36 +1536,15 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task,
 	struct rq *lowest_rq = NULL;
 	int cpu;
 	int tries;
-	cpumask_t cpu_mask;
-
-	cpus_and(cpu_mask, cpu_online_map, task->cpus_allowed);
 
 	for (tries = 0; tries < RT_PUSH_MAX_TRIES; tries++) {
-		/*
-		 * Scan each rq for the lowest prio.
-		 */
-		for_each_cpu_mask(cpu, cpu_mask) {
-			struct rq *rq = &per_cpu(runqueues, cpu);
-
-			if (cpu == this_rq->cpu)
-				continue;
+		cpu = cpupri_find(this_rq->cpu, task);
 
-			/* We look for lowest RT prio or non-rt CPU */
-			if (rq->highest_prio >= MAX_RT_PRIO) {
-				lowest_rq = rq;
-				break;
-			}
-
-			/* no locking for now */
-			if (rq->highest_prio > task->prio &&
-			    (!lowest_rq || rq->highest_prio < lowest_rq->highest_prio)) {
-				lowest_rq = rq;
-			}
-		}
-
-		if (!lowest_rq)
+		if (cpu == this_rq->cpu)
 			break;
 
+		lowest_rq = cpu_rq(cpu);
+
 		/* if the prio of this runqueue changed, try again */
 		if (double_lock_balance(this_rq, lowest_rq)) {
 			/*
@@ -7395,6 +7378,8 @@ void __init sched_init(void)
 	fair_sched_class.next = &idle_sched_class;
 	idle_sched_class.next = NULL;
 
+	cpupri_init();
+
 	for_each_possible_cpu(i) {
 		struct rt_prio_array *array;
 		struct rq *rq;
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
new file mode 100644
index 0000000..5cb51ae
--- /dev/null
+++ b/kernel/sched_cpupri.c
@@ -0,0 +1,200 @@
+/*
+ *  kernel/sched_cpupri.c
+ *
+ *  CPU priority management
+ *
+ *  Copyright (C) 2007 Novell
+ *
+ *  Author: Gregory Haskins <ghaskins@novell.com>
+ *
+ *  This code tracks the priority of each CPU so that global migration
+ *  decisions are easy to calculate.  Each CPU can be in a state as follows:
+ *
+ *                 (INVALID), IDLE, NORMAL, RT1, ... RT99
+ *
+ *  going from the lowest priority to the highest.  CPUs in the INVALID state
+ *  are not eligible for routing.  The system maintains this state with
+ *  a 2 dimensional bitmap (the first for priority class, the second for cpus
+ *  in that class).  Therefore a typical application without affinity
+ *  restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
+ *  searches).  For tasks with affinity restrictions, the algorithm has a
+ *  worst case complexity of O(min(102, NR_CPUS)), though the scenario that
+ *  yields the worst case search is fairly contrived.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; version 2
+ *  of the License.
+ */
+
+#include <asm/idle.h>
+
+#include "sched_cpupri.h"
+
+#define CPUPRI_NR_PRIORITIES 2+MAX_RT_PRIO
+
+#define CPUPRI_INVALID -2
+#define CPUPRI_IDLE    -1
+#define CPUPRI_NORMAL   0
+/* values 1-99 are RT priorities */
+
+struct cpu_priority {
+	raw_spinlock_t lock;
+	cpumask_t      pri_to_cpu[CPUPRI_NR_PRIORITIES];
+	long           pri_active[CPUPRI_NR_PRIORITIES/BITS_PER_LONG];
+	int            cpu_to_pri[NR_CPUS];
+};
+
+static __cacheline_aligned_in_smp struct cpu_priority cpu_priority;
+
+/* Convert between a 140 based task->prio, and our 102 based cpupri */
+static int convert_prio(int prio)
+{
+	int cpupri;
+	
+	if (prio == MAX_PRIO)
+		cpupri = CPUPRI_IDLE;
+	else if (prio >= MAX_RT_PRIO)
+		cpupri = CPUPRI_NORMAL;
+	else
+		cpupri = MAX_RT_PRIO - prio;
+    
+	return cpupri;
+}
+
+#define for_each_cpupri_active(array, idx)                   \
+  for( idx = find_first_bit(array, CPUPRI_NR_PRIORITIES);    \
+       idx < CPUPRI_NR_PRIORITIES;                           \
+       idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1))
+
+/**
+ * cpupri_find - find the best (lowest-pri) CPU in the system
+ * @cpu: The recommended/default CPU
+ * @task_pri: The priority of the task being scheduled (IDLE-RT99)
+ * @p: The task being scheduled
+ *
+ * Note: This function returns the recommended CPU as calculated during the
+ * current invokation.  By the time the call returns, the CPUs may have in
+ * fact changed priorities any number of times.  While not ideal, it is not
+ * an issue of correctness since the normal rebalancer logic will correct
+ * any discrepancies created by racing against the uncertainty of the current
+ * priority configuration.
+ *
+ * Returns: (int)cpu - The recommended cpu to accept the task
+ */
+int cpupri_find(int def_cpu, struct task_struct *p)
+{
+	int                  idx      = 0;
+	struct cpu_priority *cp       = &cpu_priority;
+	int                  this_cpu = smp_processor_id();
+	int                  cpu      = def_cpu;
+	int                  task_pri = convert_prio(p->prio);
+
+	for_each_cpupri_active(cp->pri_active, idx) {
+		cpumask_t mask;
+		int       lowest_pri = idx-1;
+		
+		if (lowest_pri >= task_pri)
+			break;
+		
+		cpus_and(mask, p->cpus_allowed, cp->pri_to_cpu[idx]);
+		
+		if (!cpus_empty(mask)) {
+			/*
+			 * We select a CPU in the following priority:
+			 *
+			 *       def_cpu, this_cpu, first_cpu
+			 *
+			 * for efficiency.  def_cpu preserves cache
+			 * affinity, and this_cpu is cheaper to preempt
+			 * (note that sometimes they are the same).
+			 * Finally, we will take whatever is available
+			 * if the first two don't pan out.
+			 */
+			if (cpu_isset(def_cpu, mask))
+				break;
+			
+			if (cpu_isset(this_cpu, mask)) {
+				cpu = this_cpu;
+				break;
+			}
+			
+			cpu = first_cpu(mask);
+			break;
+		}
+	}
+
+	return cpu;
+}
+
+/**
+ * cpupri_set - update the cpu priority setting
+ * @cpu: The target cpu
+ * @pri: The priority (INVALID-RT99) to assign to this CPU
+ *
+ * Note: Assumes cpu_rq(cpu)->lock is locked
+ *
+ * Returns: (void)
+ */
+void cpupri_set(int cpu, int newpri)
+{
+	struct cpu_priority *cp      = &cpu_priority;
+	int                 *currpri = &cp->cpu_to_pri[cpu];
+	int                  oldpri  = *currpri;
+	unsigned long        flags;
+
+	newpri = convert_prio(newpri);
+
+	WARN_ON(oldpri == newpri);
+
+	spin_lock_irqsave(&cp->lock, flags);
+	
+	/*
+	 * If the cpu was currently mapped to a different value, we
+	 * first need to unmap the old value
+	 */
+	if (likely(oldpri != CPUPRI_INVALID)) {
+		int        idx  = oldpri+1;
+		cpumask_t *mask = &cp->pri_to_cpu[idx];
+		
+		cpu_clear(cpu, *mask);
+		if (cpus_empty(*mask))
+			__clear_bit(idx, cp->pri_active);
+	}
+	
+	if (likely(newpri != CPUPRI_INVALID)) {
+		int        idx  = newpri+1;
+		cpumask_t *mask = &cp->pri_to_cpu[idx];
+		
+		cpu_set(cpu, *mask);
+		__set_bit(idx, cp->pri_active);
+	}
+
+	spin_unlock_irqrestore(&cp->lock, flags);
+	
+	*currpri = newpri;
+}
+
+/**
+ * cpupri_init - initialize the cpupri subsystem
+ *
+ * This must be called during the scheduler initialization before the 
+ * other methods may be used.
+ *
+ * Returns: (void)
+ */
+void cpupri_init(void)
+{
+	struct cpu_priority *cp = &cpu_priority;
+	int i;
+
+	memset(cp, 0, sizeof(*cp));
+
+	spin_lock_init(&cp->lock);
+
+	for_each_possible_cpu(i) {
+		cp->cpu_to_pri[i] = CPUPRI_INVALID;
+	}
+}
+
+
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h
new file mode 100644
index 0000000..a58a4e8
--- /dev/null
+++ b/kernel/sched_cpupri.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_CPUPRI_H
+#define _LINUX_CPUPRI_H
+
+#include <linux/sched.h>
+
+int  cpupri_find(int cpu, struct task_struct *p);
+void cpupri_set(int cpu, int pri);
+void cpupri_init(void);
+
+#endif /* _LINUX_CPUPRI_H */


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 13/13] RT: Cache cpus_allowed weight for optimizing migration
  2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
                   ` (11 preceding siblings ...)
  2007-10-23 16:51 ` [PATCH 12/13] RT: CPU priority management Gregory Haskins
@ 2007-10-23 16:51 ` Gregory Haskins
  2007-10-24  0:19   ` Ingo Oeser
  12 siblings, 1 reply; 16+ messages in thread
From: Gregory Haskins @ 2007-10-23 16:51 UTC (permalink / raw)
  To: linux-rt-users
  Cc: linux-kernel, Gregory Haskins, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

Some RT tasks (particularly kthreads) are bound to one specific CPU.
It is fairly common for one or more bound tasks to get queued up at the
same time.  Consider, for instance, softirq_timer and softirq_sched.  A
timer goes off in an ISR which schedules softirq_thread to run at RT50.
Then during the handling of the timer, the system determines that it's
time to smp-rebalance the system so it schedules softirq_sched to run
from within the softirq_timer kthread context. So we are in a situation
where we have two RT50 tasks queued, and the system will go into
rt-overload condition to request other CPUs for help.

The problem is that these tasks cannot ever be pulled away since they
are already running on their one and only valid RQ.  However, the other
CPUs cannot determine that the tasks are unpullable without going
through expensive checks/locking.  Therefore the helping CPUS
experience unecessary overhead/latencies regardless as they
ineffectively try to process the overload condition.

This patch tries to optimize the situation by utilizing the hamming
weight of the task->cpus_allowed mask.  A weight of 1 indicates that
the task cannot be migrated, which may be utilized by the overload
handling code to eliminate uncessary rebalance attempts.  We also
introduce a per-rq variable to count the number of migratable tasks
that are currently running.  We only go into overload if we have more
than one rt task, AND at least one of them is migratable. 

Calculating the weight is probably relatively expensive, so it is only
done when the cpus_allowed mask is updated (which should be relatively
infrequent, especially compared to scheduling frequency) and cached in
the task_struct.


Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 include/linux/sched.h |    1 
 kernel/fork.c         |    1 
 kernel/sched.c        |  121 +++++++++++++++++++++++++++++++++++++------------
 3 files changed, 94 insertions(+), 29 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7a3829f..b657c13 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1144,6 +1144,7 @@ struct task_struct {
 
 	unsigned int policy;
 	cpumask_t cpus_allowed;
+	int nr_cpus_allowed;
 	unsigned int time_slice;
 
 #ifdef CONFIG_PREEMPT_RCU
diff --git a/kernel/fork.c b/kernel/fork.c
index 5f11f23..f808e18 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1257,6 +1257,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	 */
 	preempt_disable();
 	p->cpus_allowed = current->cpus_allowed;
+	p->nr_cpus_allowed = current->nr_cpus_allowed;
 	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
 			!cpu_online(task_cpu(p))))
 		set_task_cpu(p, smp_processor_id());
diff --git a/kernel/sched.c b/kernel/sched.c
index bdb6be0..9120b41 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -305,6 +305,7 @@ struct rq {
 
 #ifdef CONFIG_PREEMPT_RT
 	unsigned long rt_nr_running;
+	unsigned long rt_nr_migratory;
 	unsigned long rt_nr_uninterruptible;
 	int highest_prio;
 #endif
@@ -665,20 +666,43 @@ static inline struct rq *this_rq_lock(void)
 #if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
 static __cacheline_aligned_in_smp atomic_t rt_overload;
 static cpumask_t rto_cpus;
-#endif
+
+static void inc_rt_migration(struct rq *rq)
+{
+	rq->rt_nr_migratory++;
+
+	if ((rq->rt_nr_running > 1) && !cpu_isset(rq->cpu, rto_cpus)) {
+		cpu_set(rq->cpu, rto_cpus);
+		smp_wmb();
+		atomic_inc(&rt_overload);
+	}
+}
+
+static void dec_rt_migration(struct rq *rq)
+{
+	WARN_ON(!rq->rt_nr_migratory);
+	rq->rt_nr_migratory--;
+
+	if (((rq->rt_nr_running <= 1) || !rq->rt_nr_migratory)
+	    && cpu_isset(rq->cpu, rto_cpus)) {
+		atomic_dec(&rt_overload);
+		cpu_clear(rq->cpu, rto_cpus);
+	}
+}
+
+#else
+#define inc_rt_migration(rq) do { } while(0)
+#define dec_rt_migration(rq) do { } while(0)
+#endif /* defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP) */
+
 
 static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
 {
 #ifdef CONFIG_PREEMPT_RT
 	if (rt_task(p)) {
 		rq->rt_nr_running++;
-# ifdef CONFIG_SMP
-		if (rq->rt_nr_running == 2) {
-			cpu_set(rq->cpu, rto_cpus);
-			smp_wmb();
-			atomic_inc(&rt_overload);
-		}
-# endif
+		if (p->nr_cpus_allowed > 1)
+			inc_rt_migration(rq);
 	}
 #endif
 }
@@ -689,12 +713,8 @@ static inline void dec_rt_tasks(struct task_struct *p, struct rq *rq)
 	if (rt_task(p)) {
 		WARN_ON(!rq->rt_nr_running);
 		rq->rt_nr_running--;
-# ifdef CONFIG_SMP
-		if (rq->rt_nr_running == 1) {
-			atomic_dec(&rt_overload);
-			cpu_clear(rq->cpu, rto_cpus);
-		}
-# endif
+		if (p->nr_cpus_allowed > 1)
+			dec_rt_migration(rq);
 	}
 #endif
 }
@@ -1526,6 +1546,27 @@ next_in_queue:
 
 static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
 
+static int lock_migration_target(struct task_struct *p, struct rq *target_rq)
+{
+	struct rq *this_rq = task_rq(p);
+
+	if (double_lock_balance(this_rq, target_rq)) {
+		/*
+		 * We had to unlock the run queue. In
+		 * the mean time, task could have
+		 * migrated already or had its affinity changed.
+		 */
+		if (unlikely(task_running(this_rq, p) ||
+			     task_rq(p) != this_rq ||
+			     !cpu_isset(target_rq->cpu, p->cpus_allowed))) {
+			spin_unlock(&target_rq->lock);
+			return 0;
+		}
+	}	
+	
+	return 1;
+}
+
 /* Only try this algorithm three times */
 #define RT_PUSH_MAX_TRIES 3
 
@@ -1537,6 +1578,28 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task,
 	int cpu;
 	int tries;
 
+	/*
+	 * We can optimize if the hamming weight of the cpus_allowed mask
+	 * is 1 because the task has nowhere to go but one CPU.  So don't
+	 * waste the time trying to find the lowest RQ in this case.
+	 */
+	if (task->nr_cpus_allowed == 1) {
+		/* If the task is already on the RQ, we are done */
+		if (cpu_isset(this_rq->cpu, task->cpus_allowed))
+			return NULL;
+
+		cpu = first_cpu(task->cpus_allowed);
+
+		lowest_rq = cpu_rq(cpu);
+		BUG_ON(this_rq == lowest_rq);
+
+		/* Otherwise, we can simply grab the new RQ */
+		if (lock_migration_target(task, lowest_rq))
+			return lowest_rq;
+		else
+			return NULL;
+	}
+
 	for (tries = 0; tries < RT_PUSH_MAX_TRIES; tries++) {
 		cpu = cpupri_find(this_rq->cpu, task);
 
@@ -1546,21 +1609,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task,
 		lowest_rq = cpu_rq(cpu);
 
 		/* if the prio of this runqueue changed, try again */
-		if (double_lock_balance(this_rq, lowest_rq)) {
-			/*
-			 * We had to unlock the run queue. In
-			 * the mean time, task could have
-			 * migrated already or had its affinity changed.
-			 */
-			if (unlikely(task_running(this_rq, task) ||
-				     task_rq(task) != this_rq ||
-				     !cpu_isset(lowest_rq->cpu, task->cpus_allowed))) {
-				spin_unlock(&lowest_rq->lock);
-				lowest_rq = NULL;
-				break;
-			}
-
-		}
+		if (!lock_migration_target(task, lowest_rq))
+			return NULL;
 
 		/* If this rq is still suitable use it. */
 		if (lowest_rq->highest_prio > task->prio)
@@ -5794,6 +5844,7 @@ int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 	unsigned long flags;
 	struct rq *rq;
 	int ret = 0;
+	int weight = cpus_weight(new_mask);
 
 	rq = task_rq_lock(p, &flags);
 	if (!cpus_intersects(new_mask, cpu_online_map)) {
@@ -5801,7 +5852,19 @@ int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 		goto out;
 	}
 
+#ifdef CONFIG_SMP
+	if (rt_task(p) && task_running(rq,p)) {
+		if (weight != p->nr_cpus_allowed) {
+			if ((p->nr_cpus_allowed <= 1) && (weight > 1))
+				inc_rt_migration(rq);
+			else if((p->nr_cpus_allowed > 1) && (weight <= 1))
+				dec_rt_migration(rq);
+		}
+	}
+#endif
 	p->cpus_allowed = new_mask;
+	p->nr_cpus_allowed = weight;
+
 	/* Can the task run on the task's current CPU? If so, we're done */
 	if (cpu_isset(task_cpu(p), new_mask))
 		goto out;


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 13/13] RT: Cache cpus_allowed weight for optimizing migration
  2007-10-23 16:51 ` [PATCH 13/13] RT: Cache cpus_allowed weight for optimizing migration Gregory Haskins
@ 2007-10-24  0:19   ` Ingo Oeser
  2007-10-24  3:20     ` Gregory Haskins
  0 siblings, 1 reply; 16+ messages in thread
From: Ingo Oeser @ 2007-10-24  0:19 UTC (permalink / raw)
  To: Gregory Haskins
  Cc: linux-rt-users, linux-kernel, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

Hi Gregory,

On Tuesday 23 October 2007, Gregory Haskins wrote:
> Calculating the weight is probably relatively expensive, so it is only
> done when the cpus_allowed mask is updated (which should be relatively
> infrequent, especially compared to scheduling frequency) and cached in
> the task_struct.

Why not make it a task flag, since according to your code, you are only 
interested whether this is <= 1 or > 1. Since !(x <= 1) <=> (x > 1)
for any given unsigned integer x, the required data structure is
a "boolean" or a flag.


Best Regards

Ingo Oeser

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 13/13] RT: Cache cpus_allowed weight for optimizing migration
  2007-10-24  0:19   ` Ingo Oeser
@ 2007-10-24  3:20     ` Gregory Haskins
  0 siblings, 0 replies; 16+ messages in thread
From: Gregory Haskins @ 2007-10-24  3:20 UTC (permalink / raw)
  To: Ingo Oeser
  Cc: linux-rt-users, linux-kernel, Steven Rostedt, Dmitry Adamushko,
	Peter Zijlstra, Ingo Molnar, Darren Hart

[-- Attachment #1: Type: text/plain, Size: 852 bytes --]

On Wed, 2007-10-24 at 02:19 +0200, Ingo Oeser wrote:

> Why not make it a task flag, since according to your code, you are only 
> interested whether this is <= 1 or > 1. Since !(x <= 1) <=> (x > 1)
> for any given unsigned integer x, the required data structure is
> a "boolean" or a flag.

Hi Ingo,
  You are correct that the data is in fact interpreted as a boolean.  I
also had considered using a more boolean-like notation at one point.
However, I then figured I went through the expense of computing it, I
might as well store the actual value as an integer in case it can be
used in another way.  But to be honest, I cannot really think of any
other potential uses, so perhaps we would be best to follow your
suggestion.  It could always be changed if such a need ever arises.

Thank you for the feedback!

Regards,
-Greg



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2007-10-24  3:23 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-10-23 16:50 [PATCH 00/13] Balance RT tasks v5 Gregory Haskins
2007-10-23 16:50 ` [PATCH 01/13] RT: push-rt Gregory Haskins
2007-10-23 16:50 ` [PATCH 02/13] RT: Condense the next-task search into one function Gregory Haskins
2007-10-23 16:50 ` [PATCH 03/13] RT: Add a per-cpu rt_overload indication Gregory Haskins
2007-10-23 16:50 ` [PATCH 04/13] RT: Wrap the RQ notion of priority to make it conditional Gregory Haskins
2007-10-23 16:50 ` [PATCH 05/13] RT: Initialize the priority value Gregory Haskins
2007-10-23 16:50 ` [PATCH 06/13] RT: Maintain the highest RQ priority Gregory Haskins
2007-10-23 16:50 ` [PATCH 07/13] RT: Clean up some of the push-rt logic Gregory Haskins
2007-10-23 16:51 ` [PATCH 08/13] RT: Add support for low-priority wake-up to push_rt feature Gregory Haskins
2007-10-23 16:51 ` [PATCH 09/13] RT: Only dirty a cacheline if the priority is actually changing Gregory Haskins
2007-10-23 16:51 ` [PATCH 10/13] RT: Fixes for push-rt patch Gregory Haskins
2007-10-23 16:51 ` [PATCH 11/13] RT: Condense NORMAL and IDLE priorities Gregory Haskins
2007-10-23 16:51 ` [PATCH 12/13] RT: CPU priority management Gregory Haskins
2007-10-23 16:51 ` [PATCH 13/13] RT: Cache cpus_allowed weight for optimizing migration Gregory Haskins
2007-10-24  0:19   ` Ingo Oeser
2007-10-24  3:20     ` Gregory Haskins

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).