LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 00/11] another rt group sched update
@ 2008-01-06 16:11 Peter Zijlstra
2008-01-06 16:11 ` [PATCH 01/11] sched: rt throttling vs no_hz Peter Zijlstra
` (12 more replies)
0 siblings, 13 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-06 16:11 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Peter Zijlstra, Thomas Gleixner
this time compile tested on all 16 combinations of:
CONFIG_SMP
CONFIG_FAIR_GROUP_SCHED
CONFIG_HIGH_RES_TIMERS
CONFIG_NO_HZ
ran some but not all combinations
--
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 01/11] sched: rt throttling vs no_hz
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
@ 2008-01-06 16:11 ` Peter Zijlstra
2008-01-06 16:11 ` [PATCH 02/11] sched: load_balance_monitor rename Peter Zijlstra
` (11 subsequent siblings)
12 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-06 16:11 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Peter Zijlstra, Thomas Gleixner
[-- Attachment #1: sched-rt-group-update.patch --]
[-- Type: text/plain, Size: 4621 bytes --]
We need to teach no_hz about the rt throttling because its tick driven.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/sched.h | 2 ++
kernel/sched.c | 23 ++++++++++++++++++++++-
kernel/sched_rt.c | 30 ++++++++++++++++--------------
kernel/time/tick-sched.c | 5 +++++
4 files changed, 45 insertions(+), 15 deletions(-)
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -230,6 +230,8 @@ static inline int select_nohz_load_balan
}
#endif
+extern unsigned long rt_needs_cpu(int cpu);
+
/*
* Only dump TASK_* tasks. (0 for all tasks)
*/
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -442,6 +442,7 @@ struct rq {
struct cfs_rq cfs;
struct rt_rq rt;
u64 rt_period_expire;
+ int rt_throttled;
#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */
@@ -594,6 +595,23 @@ static void update_rq_clock(struct rq *r
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
+unsigned long rt_needs_cpu(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ u64 delta;
+
+ if (!rq->rt_throttled)
+ return 0;
+
+ if (rq->clock > rq->rt_period_expire)
+ return 1;
+
+ delta = rq->rt_period_expire - rq->clock;
+ do_div(delta, NSEC_PER_SEC / HZ);
+
+ return (unsigned long)delta;
+}
+
/*
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
*/
@@ -7099,9 +7117,11 @@ static void init_rt_rq(struct rt_rq *rt_
/* delimiter for bitsearch: */
__set_bit(MAX_RT_PRIO, array->bitmap);
+#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+ rt_rq->highest_prio = MAX_RT_PRIO;
+#endif
#ifdef CONFIG_SMP
rt_rq->rt_nr_migratory = 0;
- rt_rq->highest_prio = MAX_RT_PRIO;
rt_rq->overloaded = 0;
#endif
@@ -7186,6 +7206,7 @@ void __init sched_init(void)
list_add(&init_task_group.list, &task_groups);
#endif
rq->rt_period_expire = 0;
+ rq->rt_throttled = 0;
for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
rq->cpu_load[j] = 0;
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -175,7 +175,11 @@ static int sched_rt_ratio_exceeded(struc
ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
if (rt_rq->rt_time > ratio) {
+ struct rq *rq = rq_of_rt_rq(rt_rq);
+
+ rq->rt_throttled = 1;
rt_rq->rt_throttled = 1;
+
sched_rt_ratio_dequeue(rt_rq);
return 1;
}
@@ -183,18 +187,6 @@ static int sched_rt_ratio_exceeded(struc
return 0;
}
-static void __update_sched_rt_period(struct rt_rq *rt_rq, u64 period)
-{
- unsigned long rt_ratio = sched_rt_ratio(rt_rq);
- u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
-
- rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
- if (rt_rq->rt_throttled) {
- rt_rq->rt_throttled = 0;
- sched_rt_ratio_enqueue(rt_rq);
- }
-}
-
static void update_sched_rt_period(struct rq *rq)
{
struct rt_rq *rt_rq;
@@ -204,8 +196,18 @@ static void update_sched_rt_period(struc
period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
rq->rt_period_expire += period;
- for_each_leaf_rt_rq(rt_rq, rq)
- __update_sched_rt_period(rt_rq, period);
+ for_each_leaf_rt_rq(rt_rq, rq) {
+ unsigned long rt_ratio = sched_rt_ratio(rt_rq);
+ u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
+
+ rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
+ if (rt_rq->rt_throttled) {
+ rt_rq->rt_throttled = 0;
+ sched_rt_ratio_enqueue(rt_rq);
+ }
+ }
+
+ rq->rt_throttled = 0;
}
}
Index: linux-2.6/kernel/time/tick-sched.c
===================================================================
--- linux-2.6.orig/kernel/time/tick-sched.c
+++ linux-2.6/kernel/time/tick-sched.c
@@ -153,6 +153,7 @@ void tick_nohz_update_jiffies(void)
void tick_nohz_stop_sched_tick(void)
{
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
+ unsigned long rt_jiffies;
struct tick_sched *ts;
ktime_t last_update, expires, now, delta;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
@@ -216,6 +217,10 @@ void tick_nohz_stop_sched_tick(void)
next_jiffies = get_next_timer_interrupt(last_jiffies);
delta_jiffies = next_jiffies - last_jiffies;
+ rt_jiffies = rt_needs_cpu(cpu);
+ if (rt_jiffies && rt_jiffies < delta_jiffies)
+ delta_jiffies = rt_jiffies;
+
if (rcu_needs_cpu(cpu))
delta_jiffies = 1;
/*
--
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 02/11] sched: load_balance_monitor rename
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
2008-01-06 16:11 ` [PATCH 01/11] sched: rt throttling vs no_hz Peter Zijlstra
@ 2008-01-06 16:11 ` Peter Zijlstra
2008-01-06 16:11 ` [PATCH 03/11] hrtimer: clean up cpu->base locking tricks Peter Zijlstra
` (10 subsequent siblings)
12 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-06 16:11 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Peter Zijlstra, Thomas Gleixner
[-- Attachment #1: sched-group-fixes.patch --]
[-- Type: text/plain, Size: 830 bytes --]
don't start the load_balance_monitor when there is only a single cpu.
rename the kthread because its currently longer than TASK_COMM_LEN
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/sched.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -7070,8 +7070,11 @@ void __init sched_init_smp(void)
sched_init_granularity();
#ifdef CONFIG_FAIR_GROUP_SCHED
+ if (nr_cpu_ids == 1)
+ return;
+
lb_monitor_task = kthread_create(load_balance_monitor, NULL,
- "load_balance_monitor");
+ "group_balance");
if (!IS_ERR(lb_monitor_task)) {
lb_monitor_task->flags |= PF_NOFREEZE;
wake_up_process(lb_monitor_task);
--
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 03/11] hrtimer: clean up cpu->base locking tricks
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
2008-01-06 16:11 ` [PATCH 01/11] sched: rt throttling vs no_hz Peter Zijlstra
2008-01-06 16:11 ` [PATCH 02/11] sched: load_balance_monitor rename Peter Zijlstra
@ 2008-01-06 16:11 ` Peter Zijlstra
2008-01-06 16:11 ` [PATCH 04/11] hrtimer: fixup the HRTIMER_CB_IRQSAFE_NO_SOFTIRQ fallback Peter Zijlstra
` (9 subsequent siblings)
12 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-06 16:11 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Peter Zijlstra, Thomas Gleixner
[-- Attachment #1: hrtimer-unlocked-callback.patch --]
[-- Type: text/plain, Size: 2786 bytes --]
In order to more easily allow for the scheduler to use timers, clean up
the locking a bit.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/hrtimer.c | 109 +++++++++++++++++++++++++++++++++++++++++++----
kernel/time/tick-sched.c | 8 ---
2 files changed, 102 insertions(+), 15 deletions(-)
Index: linux-2.6/kernel/hrtimer.c
===================================================================
--- linux-2.6.orig/kernel/hrtimer.c
+++ linux-2.6/kernel/hrtimer.c
@@ -1063,7 +1063,9 @@ void hrtimer_interrupt(struct clock_even
basenow = ktime_add(now, base->offset);
while ((node = base->first)) {
+ enum hrtimer_restart (*fn)(struct hrtimer *);
struct hrtimer *timer;
+ int restart;
timer = rb_entry(node, struct hrtimer, node);
@@ -1091,13 +1093,29 @@ void hrtimer_interrupt(struct clock_even
HRTIMER_STATE_CALLBACK, 0);
timer_stats_account_hrtimer(timer);
+ fn = timer->function;
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
+ /*
+ * Used for scheduler timers, avoid lock
+ * inversion with rq->lock and tasklist_lock.
+ *
+ * These timers are required to deal with
+ * enqueue expiry themselves and are not
+ * allowed to migrate.
+ */
+ spin_unlock(&cpu_base->lock);
+ restart = fn(timer);
+ spin_lock(&cpu_base->lock);
+ } else
+ restart = fn(timer);
+
/*
* Note: We clear the CALLBACK bit after
* enqueue_hrtimer to avoid reprogramming of
* the event hardware. This happens at the end
* of this function anyway.
*/
- if (timer->function(timer) != HRTIMER_NORESTART) {
+ if (restart != HRTIMER_NORESTART) {
BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
enqueue_hrtimer(timer, base, 0);
}
Index: linux-2.6/kernel/time/tick-sched.c
===================================================================
--- linux-2.6.orig/kernel/time/tick-sched.c
+++ linux-2.6/kernel/time/tick-sched.c
@@ -514,7 +514,6 @@ static enum hrtimer_restart tick_sched_t
{
struct tick_sched *ts =
container_of(timer, struct tick_sched, sched_timer);
- struct hrtimer_cpu_base *base = timer->base->cpu_base;
struct pt_regs *regs = get_irq_regs();
ktime_t now = ktime_get();
int cpu = smp_processor_id();
@@ -552,15 +551,8 @@ static enum hrtimer_restart tick_sched_t
touch_softlockup_watchdog();
ts->idle_jiffies++;
}
- /*
- * update_process_times() might take tasklist_lock, hence
- * drop the base lock. sched-tick hrtimers are per-CPU and
- * never accessible by userspace APIs, so this is safe to do.
- */
- spin_unlock(&base->lock);
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING);
- spin_lock(&base->lock);
}
/* Do not restart, when we are in the idle loop */
--
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 04/11] hrtimer: fixup the HRTIMER_CB_IRQSAFE_NO_SOFTIRQ fallback
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
` (2 preceding siblings ...)
2008-01-06 16:11 ` [PATCH 03/11] hrtimer: clean up cpu->base locking tricks Peter Zijlstra
@ 2008-01-06 16:11 ` Peter Zijlstra
2008-01-07 11:56 ` Peter Zijlstra
2008-01-06 16:11 ` [PATCH 05/11] hrtimer: unlock hrtimer_wakeup Peter Zijlstra
` (8 subsequent siblings)
12 siblings, 1 reply; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-06 16:11 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Peter Zijlstra, Thomas Gleixner
[-- Attachment #1: hrtimer-fallback.patch --]
[-- Type: text/plain, Size: 11137 bytes --]
Currently all highres=off timers are run from softirq context, but
HRTIMER_CB_IRQSAFE_NO_SOFTIRQ timers expect to run from irq context.
Fix this up by splitting it similar to the highres=on case.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/hrtimer.h | 5 -
kernel/hrtimer.c | 232 +++++++++++++++++++++++++-----------------------
kernel/timer.c | 3
3 files changed, 125 insertions(+), 115 deletions(-)
Index: linux-2.6/kernel/hrtimer.c
===================================================================
--- linux-2.6.orig/kernel/hrtimer.c
+++ linux-2.6/kernel/hrtimer.c
@@ -622,6 +622,11 @@ static inline int hrtimer_cb_pending(str
static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { }
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
+static inline int hrtimer_reprogram(struct hrtimer *timer,
+ struct hrtimer_clock_base *base)
+{
+ return 0;
+}
#endif /* CONFIG_HIGH_RES_TIMERS */
@@ -1030,6 +1035,85 @@ int hrtimer_get_res(const clockid_t whic
}
EXPORT_SYMBOL_GPL(hrtimer_get_res);
+static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
+{
+ spin_lock_irq(&cpu_base->lock);
+
+ while (!list_empty(&cpu_base->cb_pending)) {
+ enum hrtimer_restart (*fn)(struct hrtimer *);
+ struct hrtimer *timer;
+ int restart;
+
+ timer = list_entry(cpu_base->cb_pending.next,
+ struct hrtimer, cb_entry);
+
+ timer_stats_account_hrtimer(timer);
+
+ fn = timer->function;
+ __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
+ spin_unlock_irq(&cpu_base->lock);
+
+ restart = fn(timer);
+
+ spin_lock_irq(&cpu_base->lock);
+
+ timer->state &= ~HRTIMER_STATE_CALLBACK;
+ if (restart == HRTIMER_RESTART) {
+ BUG_ON(hrtimer_active(timer));
+ /*
+ * Enqueue the timer, allow reprogramming of the event
+ * device
+ */
+ enqueue_hrtimer(timer, timer->base, 1);
+ } else if (hrtimer_active(timer)) {
+ /*
+ * If the timer was rearmed on another CPU, reprogram
+ * the event device.
+ */
+ if (timer->base->first == &timer->node)
+ hrtimer_reprogram(timer, timer->base);
+ }
+ }
+ spin_unlock_irq(&cpu_base->lock);
+}
+
+static void __run_hrtimer(struct hrtimer *timer)
+{
+ struct hrtimer_clock_base *base = timer->base;
+ struct hrtimer_cpu_base *cpu_base = base->cpu_base;
+ enum hrtimer_restart (*fn)(struct hrtimer *);
+ int restart;
+
+ __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
+ timer_stats_account_hrtimer(timer);
+
+ fn = timer->function;
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
+ /*
+ * Used for scheduler timers, avoid lock inversion with
+ * rq->lock and tasklist_lock.
+ *
+ * These timers are required to deal with enqueue expiry
+ * themselves and are not allowed to migrate.
+ */
+ spin_unlock(&cpu_base->lock);
+ restart = fn(timer);
+ spin_lock(&cpu_base->lock);
+ } else
+ restart = fn(timer);
+
+ /*
+ * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
+ * reprogramming of the event hardware. This happens at the end of this
+ * function anyway.
+ */
+ if (restart != HRTIMER_NORESTART) {
+ BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
+ enqueue_hrtimer(timer, base, 0);
+ }
+ timer->state &= ~HRTIMER_STATE_CALLBACK;
+}
+
#ifdef CONFIG_HIGH_RES_TIMERS
/*
@@ -1063,9 +1147,7 @@ void hrtimer_interrupt(struct clock_even
basenow = ktime_add(now, base->offset);
while ((node = base->first)) {
- enum hrtimer_restart (*fn)(struct hrtimer *);
struct hrtimer *timer;
- int restart;
timer = rb_entry(node, struct hrtimer, node);
@@ -1089,37 +1171,7 @@ void hrtimer_interrupt(struct clock_even
continue;
}
- __remove_hrtimer(timer, base,
- HRTIMER_STATE_CALLBACK, 0);
- timer_stats_account_hrtimer(timer);
-
- fn = timer->function;
- if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
- /*
- * Used for scheduler timers, avoid lock
- * inversion with rq->lock and tasklist_lock.
- *
- * These timers are required to deal with
- * enqueue expiry themselves and are not
- * allowed to migrate.
- */
- spin_unlock(&cpu_base->lock);
- restart = fn(timer);
- spin_lock(&cpu_base->lock);
- } else
- restart = fn(timer);
-
- /*
- * Note: We clear the CALLBACK bit after
- * enqueue_hrtimer to avoid reprogramming of
- * the event hardware. This happens at the end
- * of this function anyway.
- */
- if (restart != HRTIMER_NORESTART) {
- BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
- enqueue_hrtimer(timer, base, 0);
- }
- timer->state &= ~HRTIMER_STATE_CALLBACK;
+ __run_hrtimer(timer);
}
spin_unlock(&cpu_base->lock);
base++;
@@ -1140,52 +1192,41 @@ void hrtimer_interrupt(struct clock_even
static void run_hrtimer_softirq(struct softirq_action *h)
{
- struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-
- spin_lock_irq(&cpu_base->lock);
-
- while (!list_empty(&cpu_base->cb_pending)) {
- enum hrtimer_restart (*fn)(struct hrtimer *);
- struct hrtimer *timer;
- int restart;
-
- timer = list_entry(cpu_base->cb_pending.next,
- struct hrtimer, cb_entry);
+ run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
+}
- timer_stats_account_hrtimer(timer);
+#endif /* CONFIG_HIGH_RES_TIMERS */
- fn = timer->function;
- __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
- spin_unlock_irq(&cpu_base->lock);
+/*
+ * Called from timer softirq every jiffy, expire hrtimers:
+ *
+ * For HRT its the fall back code to run the softirq in the timer
+ * softirq context in case the hrtimer initialization failed or has
+ * not been done yet.
+ */
+void hrtimer_run_pending(void)
+{
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
- restart = fn(timer);
+ if (hrtimer_hres_active())
+ return;
- spin_lock_irq(&cpu_base->lock);
+ /*
+ * This _is_ ugly: We have to check in the softirq context,
+ * whether we can switch to highres and / or nohz mode. The
+ * clocksource switch happens in the timer interrupt with
+ * xtime_lock held. Notification from there only sets the
+ * check bit in the tick_oneshot code, otherwise we might
+ * deadlock vs. xtime_lock.
+ */
+ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
+ hrtimer_switch_to_hres();
- timer->state &= ~HRTIMER_STATE_CALLBACK;
- if (restart == HRTIMER_RESTART) {
- BUG_ON(hrtimer_active(timer));
- /*
- * Enqueue the timer, allow reprogramming of the event
- * device
- */
- enqueue_hrtimer(timer, timer->base, 1);
- } else if (hrtimer_active(timer)) {
- /*
- * If the timer was rearmed on another CPU, reprogram
- * the event device.
- */
- if (timer->base->first == &timer->node)
- hrtimer_reprogram(timer, timer->base);
- }
- }
- spin_unlock_irq(&cpu_base->lock);
+ run_hrtimer_pending(cpu_base);
}
-#endif /* CONFIG_HIGH_RES_TIMERS */
-
/*
- * Expire the per base hrtimer-queue:
+ * Called from hardirq context every jiffy
*/
static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
int index)
@@ -1199,46 +1240,27 @@ static inline void run_hrtimer_queue(str
if (base->get_softirq_time)
base->softirq_time = base->get_softirq_time();
- spin_lock_irq(&cpu_base->lock);
+ spin_lock(&cpu_base->lock);
while ((node = base->first)) {
struct hrtimer *timer;
- enum hrtimer_restart (*fn)(struct hrtimer *);
- int restart;
timer = rb_entry(node, struct hrtimer, node);
if (base->softirq_time.tv64 <= timer->expires.tv64)
break;
-#ifdef CONFIG_HIGH_RES_TIMERS
- WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ);
-#endif
- timer_stats_account_hrtimer(timer);
-
- fn = timer->function;
- __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
- spin_unlock_irq(&cpu_base->lock);
-
- restart = fn(timer);
-
- spin_lock_irq(&cpu_base->lock);
-
- timer->state &= ~HRTIMER_STATE_CALLBACK;
- if (restart != HRTIMER_NORESTART) {
- BUG_ON(hrtimer_active(timer));
- enqueue_hrtimer(timer, base, 0);
+ if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
+ __remove_hrtimer(timer, base, HRTIMER_STATE_PENDING, 0);
+ list_add_tail(&timer->cb_entry,
+ &base->cpu_base->cb_pending);
+ continue;
}
+
+ __run_hrtimer(timer);
}
- spin_unlock_irq(&cpu_base->lock);
+ spin_unlock(&cpu_base->lock);
}
-/*
- * Called from timer softirq every jiffy, expire hrtimers:
- *
- * For HRT its the fall back code to run the softirq in the timer
- * softirq context in case the hrtimer initialization failed or has
- * not been done yet.
- */
void hrtimer_run_queues(void)
{
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
@@ -1247,18 +1269,6 @@ void hrtimer_run_queues(void)
if (hrtimer_hres_active())
return;
- /*
- * This _is_ ugly: We have to check in the softirq context,
- * whether we can switch to highres and / or nohz mode. The
- * clocksource switch happens in the timer interrupt with
- * xtime_lock held. Notification from there only sets the
- * check bit in the tick_oneshot code, otherwise we might
- * deadlock vs. xtime_lock.
- */
- if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
- if (hrtimer_switch_to_hres())
- return;
-
hrtimer_get_softirq_time(cpu_base);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
Index: linux-2.6/kernel/timer.c
===================================================================
--- linux-2.6.orig/kernel/timer.c
+++ linux-2.6/kernel/timer.c
@@ -896,7 +896,7 @@ static void run_timer_softirq(struct sof
{
tvec_base_t *base = __get_cpu_var(tvec_bases);
- hrtimer_run_queues();
+ hrtimer_run_pending();
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
@@ -907,6 +907,7 @@ static void run_timer_softirq(struct sof
*/
void run_local_timers(void)
{
+ hrtimer_run_queues();
raise_softirq(TIMER_SOFTIRQ);
softlockup_tick();
}
Index: linux-2.6/include/linux/hrtimer.h
===================================================================
--- linux-2.6.orig/include/linux/hrtimer.h
+++ linux-2.6/include/linux/hrtimer.h
@@ -115,10 +115,8 @@ struct hrtimer {
enum hrtimer_restart (*function)(struct hrtimer *);
struct hrtimer_clock_base *base;
unsigned long state;
-#ifdef CONFIG_HIGH_RES_TIMERS
enum hrtimer_cb_mode cb_mode;
struct list_head cb_entry;
-#endif
#ifdef CONFIG_TIMER_STATS
void *start_site;
char start_comm[16];
@@ -194,10 +192,10 @@ struct hrtimer_cpu_base {
spinlock_t lock;
struct lock_class_key lock_key;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
+ struct list_head cb_pending;
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires_next;
int hres_active;
- struct list_head cb_pending;
unsigned long nr_events;
#endif
};
@@ -319,6 +317,7 @@ extern void hrtimer_init_sleeper(struct
/* Soft interrupt function to run the hrtimer queues: */
extern void hrtimer_run_queues(void);
+extern void hrtimer_run_pending(void);
/* Bootup initialization: */
extern void __init hrtimers_init(void);
--
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 05/11] hrtimer: unlock hrtimer_wakeup
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
` (3 preceding siblings ...)
2008-01-06 16:11 ` [PATCH 04/11] hrtimer: fixup the HRTIMER_CB_IRQSAFE_NO_SOFTIRQ fallback Peter Zijlstra
@ 2008-01-06 16:11 ` Peter Zijlstra
2008-01-06 16:11 ` [PATCH 06/11] sched: rt-group: reduce rescheduling Peter Zijlstra
` (7 subsequent siblings)
12 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-06 16:11 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Peter Zijlstra, Thomas Gleixner
[-- Attachment #1: hrtimer-do_nanosleep.patch --]
[-- Type: text/plain, Size: 1116 bytes --]
hrtimer_wakeup creates a
base->lock
rq->lock
lock dependancy. Avoid this by switching to HRTIMER_CB_IRQSAFE_NO_SOFTIRQ
which doesn't hold base->lock.
This fully untangles hrtimer locks from the scheduler locks, and allows
hrtimer usage in the scheduler proper.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/hrtimer.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
Index: linux-2.6/kernel/hrtimer.c
===================================================================
--- linux-2.6.orig/kernel/hrtimer.c
+++ linux-2.6/kernel/hrtimer.c
@@ -1296,7 +1296,7 @@ void hrtimer_init_sleeper(struct hrtimer
sl->timer.function = hrtimer_wakeup;
sl->task = task;
#ifdef CONFIG_HIGH_RES_TIMERS
- sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_RESTART;
+ sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
#endif
}
@@ -1307,6 +1307,8 @@ static int __sched do_nanosleep(struct h
do {
set_current_state(TASK_INTERRUPTIBLE);
hrtimer_start(&t->timer, t->timer.expires, mode);
+ if (!hrtimer_active(&t->timer))
+ t->task = NULL;
if (likely(t->task))
schedule();
--
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 06/11] sched: rt-group: reduce rescheduling
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
` (4 preceding siblings ...)
2008-01-06 16:11 ` [PATCH 05/11] hrtimer: unlock hrtimer_wakeup Peter Zijlstra
@ 2008-01-06 16:11 ` Peter Zijlstra
2008-01-06 16:11 ` [PATCH 07/11] sched: rt-group: per group period Peter Zijlstra
` (6 subsequent siblings)
12 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-06 16:11 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Peter Zijlstra, Thomas Gleixner
[-- Attachment #1: sched-rt-group-fix-enqueue.patch --]
[-- Type: text/plain, Size: 748 bytes --]
Only reschedule if the new group has a higher prio task.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/sched_rt.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -94,8 +94,11 @@ static void sched_rt_ratio_enqueue(struc
struct sched_rt_entity *rt_se = rt_rq->rt_se;
if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) {
+ struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
+
enqueue_rt_entity(rt_se);
- resched_task(rq_of_rt_rq(rt_rq)->curr);
+ if (rt_rq->highest_prio < curr->prio)
+ resched_task(curr);
}
}
--
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 07/11] sched: rt-group: per group period
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
` (5 preceding siblings ...)
2008-01-06 16:11 ` [PATCH 06/11] sched: rt-group: reduce rescheduling Peter Zijlstra
@ 2008-01-06 16:11 ` Peter Zijlstra
2008-01-06 16:11 ` [PATCH 08/11] sched: rt-group: deal with PI Peter Zijlstra
` (5 subsequent siblings)
12 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-06 16:11 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Peter Zijlstra, Thomas Gleixner
[-- Attachment #1: sched-rt-rq-hrtimer.patch --]
[-- Type: text/plain, Size: 14233 bytes --]
Steven asked for per group periods in order to get closer to RMA or EDF
scheduling.
Use the fancy new hrtimers to provide a per group period
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/sched.h | 2
kernel/sched.c | 229 ++++++++++++++++++++++++++++++++++++++++++-----
kernel/sched_rt.c | 61 ++++++------
kernel/sysctl.c | 2
kernel/time/tick-sched.c | 5 -
5 files changed, 237 insertions(+), 62 deletions(-)
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -177,6 +177,7 @@ struct task_group {
struct rt_rq **rt_rq;
unsigned int rt_ratio;
+ ktime_t rt_period;
/*
* shares assigned to a task group governs how much of cpu bandwidth
@@ -372,6 +373,7 @@ struct rt_rq {
#endif
int rt_throttled;
u64 rt_time;
+ struct hrtimer rt_period_timer;
#ifdef CONFIG_FAIR_GROUP_SCHED
struct rq *rq;
@@ -441,8 +443,6 @@ struct rq {
struct cfs_rq cfs;
struct rt_rq rt;
- u64 rt_period_expire;
- int rt_throttled;
#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */
@@ -595,23 +595,6 @@ static void update_rq_clock(struct rq *r
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
-unsigned long rt_needs_cpu(int cpu)
-{
- struct rq *rq = cpu_rq(cpu);
- u64 delta;
-
- if (!rq->rt_throttled)
- return 0;
-
- if (rq->clock > rq->rt_period_expire)
- return 1;
-
- delta = rq->rt_period_expire - rq->clock;
- do_div(delta, NSEC_PER_SEC / HZ);
-
- return (unsigned long)delta;
-}
-
/*
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
*/
@@ -652,10 +635,10 @@ const_debug unsigned int sysctl_sched_fe
const_debug unsigned int sysctl_sched_nr_migrate = 32;
/*
- * period over which we measure -rt task cpu usage in ms.
+ * period over which we measure -rt task cpu usage in us.
* default: 1s
*/
-const_debug unsigned int sysctl_sched_rt_period = 1000;
+const_debug unsigned int sysctl_sched_rt_period = 1000000;
#define SCHED_RT_FRAC_SHIFT 16
#define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT)
@@ -1245,6 +1228,12 @@ static unsigned long cpu_avg_load_per_ta
static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
#endif /* CONFIG_SMP */
+static inline ktime_t ns_to_ktime(u64 ns)
+{
+ static const ktime_t ktime_zero = { .tv64 = 0 };
+ return ktime_add_ns(ktime_zero, ns);
+}
+
#include "sched_stats.h"
#include "sched_idletask.c"
#include "sched_fair.c"
@@ -3741,7 +3730,6 @@ void scheduler_tick(void)
rq->tick_timestamp = rq->clock;
update_cpu_load(rq);
curr->sched_class->task_tick(rq, curr, 0);
- update_sched_rt_period(rq);
spin_unlock(&rq->lock);
#ifdef CONFIG_SMP
@@ -5287,6 +5275,158 @@ static inline void sched_init_granularit
sysctl_sched_batch_wakeup_granularity *= factor;
}
+static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
+{
+ struct rt_rq *rt_rq =
+ container_of(timer, struct rt_rq, rt_period_timer);
+ struct rq *rq = rq_of_rt_rq(rt_rq);
+ ktime_t now = ktime_get();
+
+ WARN_ON(smp_processor_id() != cpu_of(rq));
+ WARN_ON(!in_irq());
+
+ spin_lock(&rq->lock);
+ update_sched_rt_period(rt_rq);
+ spin_unlock(&rq->lock);
+
+ hrtimer_forward(timer, now, sched_rt_period(rt_rq));
+ return HRTIMER_RESTART;
+}
+
+static void sched_rt_period_start(struct rt_rq *rt_rq)
+{
+ ktime_t period = sched_rt_period(rt_rq);
+
+ WARN_ON(smp_processor_id() != cpu_of(rq_of_rt_rq(rt_rq)));
+
+ for (;;) {
+ ktime_t now = ktime_get();
+ hrtimer_forward(&rt_rq->rt_period_timer, now, period);
+ hrtimer_start(&rt_rq->rt_period_timer,
+ rt_rq->rt_period_timer.expires,
+ HRTIMER_MODE_ABS);
+ if (hrtimer_active(&rt_rq->rt_period_timer))
+ break;
+ }
+}
+
+#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+static void sched_rt_period_stop(struct rt_rq *rt_rq)
+{
+ hrtimer_cancel(&rt_rq->rt_period_timer);
+}
+#endif
+
+static void sched_rt_period_start_cpu(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ struct rt_rq *rt_rq;
+
+ for_each_leaf_rt_rq(rt_rq, rq)
+ sched_rt_period_start(rt_rq);
+}
+
+#ifdef CONFIG_SMP
+static void sched_rt_period_stop_cpu(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ struct rt_rq *rt_rq;
+
+ for_each_leaf_rt_rq(rt_rq, rq)
+ sched_rt_period_stop(rt_rq);
+}
+
+static int sched_rt_period_hotplug(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (unsigned long)hcpu;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
+ sched_rt_period_start_cpu(cpu);
+ return NOTIFY_OK;
+
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ sched_rt_period_stop_cpu(cpu);
+ return NOTIFY_OK;
+
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ return NOTIFY_OK;
+
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+static void __init __sched_rt_period_init(void *arg)
+{
+ int cpu = smp_processor_id();
+ sched_rt_period_start_cpu(cpu);
+}
+
+static void __init sched_rt_period_init(void)
+{
+ on_each_cpu(__sched_rt_period_init, NULL, 0, 1);
+ hotcpu_notifier(sched_rt_period_hotplug, 0);
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void __sched_rt_period_init_tg(void *arg)
+{
+ struct task_group *tg = arg;
+ int cpu = smp_processor_id();
+
+ sched_rt_period_start(tg->rt_rq[cpu]);
+}
+
+static void sched_rt_period_init_tg(struct task_group *tg)
+{
+ on_each_cpu(__sched_rt_period_init_tg, tg, 0, 1);
+}
+
+static void __sched_rt_period_destroy_tg(void *arg)
+{
+ struct task_group *tg = arg;
+ int cpu = smp_processor_id();
+
+ sched_rt_period_stop(tg->rt_rq[cpu]);
+}
+
+static void sched_rt_period_destroy_tg(struct task_group *tg)
+{
+ on_each_cpu(__sched_rt_period_destroy_tg, tg, 0, 1);
+}
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+#else /* CONFIG_SMP */
+static void __init sched_rt_period_init(void)
+{
+ sched_rt_period_start_cpu(0);
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void sched_rt_period_init_tg(struct task_group *tg)
+{
+ sched_rt_period_start(tg->rt_rq[0]);
+}
+
+static void sched_rt_period_destroy_tg(struct task_group *tg)
+{
+ sched_rt_period_stop(tg->rt_rq[0]);
+}
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+#endif /* CONFIG_SMP */
+
#ifdef CONFIG_SMP
/*
* This is how migration works:
@@ -7068,6 +7208,7 @@ void __init sched_init_smp(void)
if (set_cpus_allowed(current, non_isolated_cpus) < 0)
BUG();
sched_init_granularity();
+ sched_rt_period_init();
#ifdef CONFIG_FAIR_GROUP_SCHED
if (nr_cpu_ids == 1)
@@ -7088,6 +7229,7 @@ void __init sched_init_smp(void)
void __init sched_init_smp(void)
{
sched_init_granularity();
+ sched_rt_period_init();
}
#endif /* CONFIG_SMP */
@@ -7131,6 +7273,11 @@ static void init_rt_rq(struct rt_rq *rt_
rt_rq->rt_time = 0;
rt_rq->rt_throttled = 0;
+ hrtimer_init(&rt_rq->rt_period_timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ rt_rq->rt_period_timer.function = sched_rt_period_timer;
+ rt_rq->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+
#ifdef CONFIG_FAIR_GROUP_SCHED
rt_rq->rq = rq;
#endif
@@ -7201,6 +7348,8 @@ void __init sched_init(void)
&per_cpu(init_sched_entity, i), i, 1);
init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
+ init_task_group.rt_period =
+ ns_to_ktime(sysctl_sched_rt_period * NSEC_PER_USEC);
INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
init_tg_rt_entry(rq, &init_task_group,
&per_cpu(init_rt_rq, i),
@@ -7208,8 +7357,6 @@ void __init sched_init(void)
list_add(&init_task_group.list, &task_groups);
#endif
- rq->rt_period_expire = 0;
- rq->rt_throttled = 0;
for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
rq->cpu_load[j] = 0;
@@ -7598,6 +7745,7 @@ struct task_group *sched_create_group(vo
tg->shares = NICE_0_LOAD;
tg->rt_ratio = 0; /* XXX */
+ tg->rt_period = ns_to_ktime(sysctl_sched_rt_period * NSEC_PER_USEC);
for_each_possible_cpu(i) {
rq = cpu_rq(i);
@@ -7637,6 +7785,8 @@ struct task_group *sched_create_group(vo
list_add_rcu(&tg->list, &task_groups);
unlock_task_group_list();
+ sched_rt_period_init_tg(tg);
+
return tg;
err:
@@ -7658,6 +7808,8 @@ void sched_destroy_group(struct task_gro
struct rt_rq *rt_rq = NULL;
int i;
+ sched_rt_period_destroy_tg(tg);
+
lock_task_group_list();
for_each_possible_cpu(i) {
cfs_rq = tg->cfs_rq[i];
@@ -7815,6 +7967,19 @@ unsigned long sched_group_rt_ratio(struc
return tg->rt_ratio;
}
+int sched_group_set_rt_period(struct task_group *tg, unsigned long rt_period)
+{
+ tg->rt_period = ns_to_ktime((u64)rt_period * NSEC_PER_USEC);
+ return 0;
+}
+
+unsigned long sched_group_rt_period(struct task_group *tg)
+{
+ u64 ns = ktime_to_ns(tg->rt_period);
+ do_div(ns, NSEC_PER_USEC);
+ return ns;
+}
+
#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_FAIR_CGROUP_SCHED
@@ -7903,6 +8068,17 @@ static u64 cpu_rt_ratio_read_uint(struct
return (u64) tg->rt_ratio;
}
+static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype,
+ u64 rt_period_val)
+{
+ return sched_group_set_rt_period(cgroup_tg(cgrp), rt_period_val);
+}
+
+static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft)
+{
+ return (u64) sched_group_rt_period(cgroup_tg(cgrp));
+}
+
static struct cftype cpu_files[] = {
{
.name = "shares",
@@ -7914,6 +8090,11 @@ static struct cftype cpu_files[] = {
.read_uint = cpu_rt_ratio_read_uint,
.write_uint = cpu_rt_ratio_write_uint,
},
+ {
+ .name = "rt_period_us",
+ .read_uint = cpu_rt_period_read_uint,
+ .write_uint = cpu_rt_period_write_uint,
+ },
};
static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -230,8 +230,6 @@ static inline int select_nohz_load_balan
}
#endif
-extern unsigned long rt_needs_cpu(int cpu);
-
/*
* Only dump TASK_* tasks. (0 for all tasks)
*/
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -65,6 +65,17 @@ static inline unsigned int sched_rt_rati
return rt_rq->tg->rt_ratio;
}
+static inline ktime_t sched_rt_period(struct rt_rq *rt_rq)
+{
+ BUG_ON(!rt_rq->tg);
+ return rt_rq->tg->rt_period;
+}
+
+static inline u64 sched_rt_period_ns(struct rt_rq *rt_rq)
+{
+ return ktime_to_ns(sched_rt_period(rt_rq));
+}
+
#define for_each_leaf_rt_rq(rt_rq, rq) \
list_for_each_entry(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
@@ -117,6 +128,16 @@ static inline unsigned int sched_rt_rati
return sysctl_sched_rt_ratio;
}
+static inline ktime_t sched_rt_period(struct rt_rq *rt_rq)
+{
+ return ns_to_ktime((u64)sysctl_sched_rt_period * NSEC_PER_USEC);
+}
+
+static inline u64 sched_rt_period_ns(struct rt_rq *rt_rq)
+{
+ return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
+}
+
#define for_each_leaf_rt_rq(rt_rq, rq) \
for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
@@ -174,15 +195,11 @@ static int sched_rt_ratio_exceeded(struc
if (rt_rq->rt_throttled)
return 1;
- period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
+ period = sched_rt_period_ns(rt_rq);
ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
if (rt_rq->rt_time > ratio) {
- struct rq *rq = rq_of_rt_rq(rt_rq);
-
- rq->rt_throttled = 1;
rt_rq->rt_throttled = 1;
-
sched_rt_ratio_dequeue(rt_rq);
return 1;
}
@@ -190,27 +207,16 @@ static int sched_rt_ratio_exceeded(struc
return 0;
}
-static void update_sched_rt_period(struct rq *rq)
+static void update_sched_rt_period(struct rt_rq *rt_rq)
{
- struct rt_rq *rt_rq;
- u64 period;
-
- while (rq->clock > rq->rt_period_expire) {
- period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
- rq->rt_period_expire += period;
-
- for_each_leaf_rt_rq(rt_rq, rq) {
- unsigned long rt_ratio = sched_rt_ratio(rt_rq);
- u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
-
- rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
- if (rt_rq->rt_throttled) {
- rt_rq->rt_throttled = 0;
- sched_rt_ratio_enqueue(rt_rq);
- }
- }
-
- rq->rt_throttled = 0;
+ u64 period = sched_rt_period_ns(rt_rq);
+ unsigned long rt_ratio = sched_rt_ratio(rt_rq);
+ u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
+
+ rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
+ if (rt_rq->rt_throttled) {
+ rt_rq->rt_throttled = 0;
+ sched_rt_ratio_enqueue(rt_rq);
}
}
@@ -238,11 +244,6 @@ static void update_curr_rt(struct rq *rq
cpuacct_charge(curr, delta_exec);
rt_rq->rt_time += delta_exec;
- /*
- * might make it a tad more accurate:
- *
- * update_sched_rt_period(rq);
- */
if (sched_rt_ratio_exceeded(rt_rq))
resched_task(curr);
}
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c
+++ linux-2.6/kernel/sysctl.c
@@ -311,7 +311,7 @@ static struct ctl_table kern_table[] = {
},
{
.ctl_name = CTL_UNNUMBERED,
- .procname = "sched_rt_period_ms",
+ .procname = "sched_rt_period_us",
.data = &sysctl_sched_rt_period,
.maxlen = sizeof(unsigned int),
.mode = 0644,
Index: linux-2.6/kernel/time/tick-sched.c
===================================================================
--- linux-2.6.orig/kernel/time/tick-sched.c
+++ linux-2.6/kernel/time/tick-sched.c
@@ -153,7 +153,6 @@ void tick_nohz_update_jiffies(void)
void tick_nohz_stop_sched_tick(void)
{
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
- unsigned long rt_jiffies;
struct tick_sched *ts;
ktime_t last_update, expires, now, delta;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
@@ -217,10 +216,6 @@ void tick_nohz_stop_sched_tick(void)
next_jiffies = get_next_timer_interrupt(last_jiffies);
delta_jiffies = next_jiffies - last_jiffies;
- rt_jiffies = rt_needs_cpu(cpu);
- if (rt_jiffies && rt_jiffies < delta_jiffies)
- delta_jiffies = rt_jiffies;
-
if (rcu_needs_cpu(cpu))
delta_jiffies = 1;
/*
--
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 08/11] sched: rt-group: deal with PI
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
` (6 preceding siblings ...)
2008-01-06 16:11 ` [PATCH 07/11] sched: rt-group: per group period Peter Zijlstra
@ 2008-01-06 16:11 ` Peter Zijlstra
2008-01-06 16:11 ` [PATCH 09/11] sched: rt-group: dynamic period ticks Peter Zijlstra
` (4 subsequent siblings)
12 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-06 16:11 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Peter Zijlstra, Thomas Gleixner
[-- Attachment #1: sched-rt-group-pi.patch --]
[-- Type: text/plain, Size: 3892 bytes --]
Steven mentioned the fun case where a lock holding task will be throttled.
Simple fix: allow groups that have boosted tasks to run anyway.
This is ofcourse not quite correct. Needs more tricks.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/sched.c | 3 +++
kernel/sched_rt.c | 48 ++++++++++++++++++++++++++++++++++++++++--------
2 files changed, 43 insertions(+), 8 deletions(-)
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -376,6 +376,8 @@ struct rt_rq {
struct hrtimer rt_period_timer;
#ifdef CONFIG_FAIR_GROUP_SCHED
+ unsigned long rt_nr_boosted;
+
struct rq *rq;
struct list_head leaf_rt_rq_list;
struct task_group *tg;
@@ -7279,6 +7281,7 @@ static void init_rt_rq(struct rt_rq *rt_
rt_rq->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
#ifdef CONFIG_FAIR_GROUP_SCHED
+ rt_rq->rt_nr_boosted = 0;
rt_rq->rq = rq;
#endif
}
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -121,6 +121,23 @@ static void sched_rt_ratio_dequeue(struc
dequeue_rt_entity(rt_se);
}
+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+{
+ return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
+}
+
+static int rt_se_boosted(struct sched_rt_entity *rt_se)
+{
+ struct rt_rq *rt_rq = group_rt_rq(rt_se);
+ struct task_struct *p;
+
+ if (rt_rq)
+ return !!rt_rq->rt_nr_boosted;
+
+ p = rt_task_of(rt_se);
+ return p->prio != p->normal_prio;
+}
+
#else
static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
@@ -170,6 +187,10 @@ static inline void sched_rt_ratio_dequeu
{
}
+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+{
+ return rt_rq->rt_throttled;
+}
#endif
static inline int rt_se_prio(struct sched_rt_entity *rt_se)
@@ -190,21 +211,22 @@ static int sched_rt_ratio_exceeded(struc
u64 period, ratio;
if (rt_ratio == SCHED_RT_FRAC)
- return 0;
+ goto out;
if (rt_rq->rt_throttled)
- return 1;
+ goto out;
period = sched_rt_period_ns(rt_rq);
ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
if (rt_rq->rt_time > ratio) {
rt_rq->rt_throttled = 1;
- sched_rt_ratio_dequeue(rt_rq);
- return 1;
+ if (rt_rq_throttled(rt_rq))
+ sched_rt_ratio_dequeue(rt_rq);
}
- return 0;
+out:
+ return rt_rq_throttled(rt_rq);
}
static void update_sched_rt_period(struct rt_rq *rt_rq)
@@ -265,6 +287,10 @@ void inc_rt_tasks(struct sched_rt_entity
update_rt_migration(rq_of_rt_rq(rt_rq));
#endif
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ if (rt_se_boosted(rt_se))
+ rt_rq->rt_nr_boosted++;
+#endif
}
static inline
@@ -295,6 +321,12 @@ void dec_rt_tasks(struct sched_rt_entity
update_rt_migration(rq_of_rt_rq(rt_rq));
#endif /* CONFIG_SMP */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ if (rt_se_boosted(rt_se))
+ rt_rq->rt_nr_boosted--;
+
+ WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
+#endif
}
static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
@@ -303,7 +335,7 @@ static void enqueue_rt_entity(struct sch
struct rt_prio_array *array = &rt_rq->active;
struct rt_rq *group_rq = group_rt_rq(rt_se);
- if (group_rq && group_rq->rt_throttled)
+ if (group_rq && rt_rq_throttled(group_rq))
return;
list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
@@ -476,7 +508,7 @@ static struct sched_rt_entity *pick_next
struct list_head *queue;
int idx;
- if (sched_rt_ratio_exceeded(rt_rq))
+ if (rt_rq_throttled(rt_rq))
goto out;
idx = sched_find_first_bit(array->bitmap);
@@ -500,7 +532,7 @@ static struct task_struct *pick_next_tas
if (unlikely(!rt_rq->rt_nr_running))
return NULL;
- if (sched_rt_ratio_exceeded(rt_rq))
+ if (rt_rq_throttled(rt_rq))
return NULL;
do {
--
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 09/11] sched: rt-group: dynamic period ticks
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
` (7 preceding siblings ...)
2008-01-06 16:11 ` [PATCH 08/11] sched: rt-group: deal with PI Peter Zijlstra
@ 2008-01-06 16:11 ` Peter Zijlstra
2008-01-06 16:11 ` [PATCH 10/11] sched: rt-group: EDF Peter Zijlstra
` (3 subsequent siblings)
12 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-06 16:11 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Peter Zijlstra, Thomas Gleixner
[-- Attachment #1: sched-rt-rq-dyn.patch --]
[-- Type: text/plain, Size: 7384 bytes --]
Disable the period updates for inactive groups.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/sched.c | 158 ------------------------------------------------------
kernel/sched_rt.c | 54 ++++++++++++++++++
2 files changed, 53 insertions(+), 159 deletions(-)
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -5277,158 +5277,6 @@ static inline void sched_init_granularit
sysctl_sched_batch_wakeup_granularity *= factor;
}
-static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
-{
- struct rt_rq *rt_rq =
- container_of(timer, struct rt_rq, rt_period_timer);
- struct rq *rq = rq_of_rt_rq(rt_rq);
- ktime_t now = ktime_get();
-
- WARN_ON(smp_processor_id() != cpu_of(rq));
- WARN_ON(!in_irq());
-
- spin_lock(&rq->lock);
- update_sched_rt_period(rt_rq);
- spin_unlock(&rq->lock);
-
- hrtimer_forward(timer, now, sched_rt_period(rt_rq));
- return HRTIMER_RESTART;
-}
-
-static void sched_rt_period_start(struct rt_rq *rt_rq)
-{
- ktime_t period = sched_rt_period(rt_rq);
-
- WARN_ON(smp_processor_id() != cpu_of(rq_of_rt_rq(rt_rq)));
-
- for (;;) {
- ktime_t now = ktime_get();
- hrtimer_forward(&rt_rq->rt_period_timer, now, period);
- hrtimer_start(&rt_rq->rt_period_timer,
- rt_rq->rt_period_timer.expires,
- HRTIMER_MODE_ABS);
- if (hrtimer_active(&rt_rq->rt_period_timer))
- break;
- }
-}
-
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
-static void sched_rt_period_stop(struct rt_rq *rt_rq)
-{
- hrtimer_cancel(&rt_rq->rt_period_timer);
-}
-#endif
-
-static void sched_rt_period_start_cpu(int cpu)
-{
- struct rq *rq = cpu_rq(cpu);
- struct rt_rq *rt_rq;
-
- for_each_leaf_rt_rq(rt_rq, rq)
- sched_rt_period_start(rt_rq);
-}
-
-#ifdef CONFIG_SMP
-static void sched_rt_period_stop_cpu(int cpu)
-{
- struct rq *rq = cpu_rq(cpu);
- struct rt_rq *rt_rq;
-
- for_each_leaf_rt_rq(rt_rq, rq)
- sched_rt_period_stop(rt_rq);
-}
-
-static int sched_rt_period_hotplug(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- int cpu = (unsigned long)hcpu;
-
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- sched_rt_period_start_cpu(cpu);
- return NOTIFY_OK;
-
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- sched_rt_period_stop_cpu(cpu);
- return NOTIFY_OK;
-
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- return NOTIFY_OK;
-
- default:
- return NOTIFY_DONE;
- }
-
- return NOTIFY_OK;
-}
-
-static void __init __sched_rt_period_init(void *arg)
-{
- int cpu = smp_processor_id();
- sched_rt_period_start_cpu(cpu);
-}
-
-static void __init sched_rt_period_init(void)
-{
- on_each_cpu(__sched_rt_period_init, NULL, 0, 1);
- hotcpu_notifier(sched_rt_period_hotplug, 0);
-}
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-static void __sched_rt_period_init_tg(void *arg)
-{
- struct task_group *tg = arg;
- int cpu = smp_processor_id();
-
- sched_rt_period_start(tg->rt_rq[cpu]);
-}
-
-static void sched_rt_period_init_tg(struct task_group *tg)
-{
- on_each_cpu(__sched_rt_period_init_tg, tg, 0, 1);
-}
-
-static void __sched_rt_period_destroy_tg(void *arg)
-{
- struct task_group *tg = arg;
- int cpu = smp_processor_id();
-
- sched_rt_period_stop(tg->rt_rq[cpu]);
-}
-
-static void sched_rt_period_destroy_tg(struct task_group *tg)
-{
- on_each_cpu(__sched_rt_period_destroy_tg, tg, 0, 1);
-}
-#endif /* CONFIG_FAIR_GROUP_SCHED */
-#else /* CONFIG_SMP */
-static void __init sched_rt_period_init(void)
-{
- sched_rt_period_start_cpu(0);
-}
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-static void sched_rt_period_init_tg(struct task_group *tg)
-{
- sched_rt_period_start(tg->rt_rq[0]);
-}
-
-static void sched_rt_period_destroy_tg(struct task_group *tg)
-{
- sched_rt_period_stop(tg->rt_rq[0]);
-}
-#endif /* CONFIG_FAIR_GROUP_SCHED */
-#endif /* CONFIG_SMP */
-
#ifdef CONFIG_SMP
/*
* This is how migration works:
@@ -7210,7 +7058,6 @@ void __init sched_init_smp(void)
if (set_cpus_allowed(current, non_isolated_cpus) < 0)
BUG();
sched_init_granularity();
- sched_rt_period_init();
#ifdef CONFIG_FAIR_GROUP_SCHED
if (nr_cpu_ids == 1)
@@ -7231,7 +7078,6 @@ void __init sched_init_smp(void)
void __init sched_init_smp(void)
{
sched_init_granularity();
- sched_rt_period_init();
}
#endif /* CONFIG_SMP */
@@ -7788,8 +7634,6 @@ struct task_group *sched_create_group(vo
list_add_rcu(&tg->list, &task_groups);
unlock_task_group_list();
- sched_rt_period_init_tg(tg);
-
return tg;
err:
@@ -7811,8 +7655,6 @@ void sched_destroy_group(struct task_gro
struct rt_rq *rt_rq = NULL;
int i;
- sched_rt_period_destroy_tg(tg);
-
lock_task_group_list();
for_each_possible_cpu(i) {
cfs_rq = tg->cfs_rq[i];
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -221,8 +221,10 @@ static int sched_rt_ratio_exceeded(struc
if (rt_rq->rt_time > ratio) {
rt_rq->rt_throttled = 1;
- if (rt_rq_throttled(rt_rq))
+ if (rt_rq_throttled(rt_rq)) {
+ WARN_ON(!hrtimer_active(&rt_rq->rt_period_timer));
sched_rt_ratio_dequeue(rt_rq);
+ }
}
out:
@@ -242,6 +244,52 @@ static void update_sched_rt_period(struc
}
}
+static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
+{
+ struct rt_rq *rt_rq =
+ container_of(timer, struct rt_rq, rt_period_timer);
+ struct rq *rq = rq_of_rt_rq(rt_rq);
+ ktime_t now = ktime_get();
+
+ WARN_ON(smp_processor_id() != cpu_of(rq));
+ WARN_ON(!in_irq());
+
+ spin_lock(&rq->lock);
+ update_sched_rt_period(rt_rq);
+ spin_unlock(&rq->lock);
+
+ hrtimer_forward(timer, now, sched_rt_period(rt_rq));
+
+ return HRTIMER_RESTART;
+}
+
+static void sched_rt_period_start(struct rt_rq *rt_rq)
+{
+ ktime_t period;
+
+ WARN_ON(smp_processor_id() != cpu_of(rq_of_rt_rq(rt_rq)));
+
+ if (hrtimer_active(&rt_rq->rt_period_timer))
+ return;
+
+ period = sched_rt_period(rt_rq);
+
+ for (;;) {
+ ktime_t now = ktime_get();
+ hrtimer_forward(&rt_rq->rt_period_timer, now, period);
+ hrtimer_start(&rt_rq->rt_period_timer,
+ rt_rq->rt_period_timer.expires,
+ HRTIMER_MODE_ABS);
+ if (hrtimer_active(&rt_rq->rt_period_timer))
+ break;
+ }
+}
+
+static void sched_rt_period_stop(struct rt_rq *rt_rq)
+{
+ hrtimer_cancel(&rt_rq->rt_period_timer);
+}
+
/*
* Update the current task's runtime statistics. Skip current tasks that
* are not in our scheduling class.
@@ -274,6 +322,8 @@ static inline
void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
+ if (!rt_rq->rt_nr_running && !group_rt_rq(rt_se))
+ sched_rt_period_start(rt_rq);
rt_rq->rt_nr_running++;
#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
if (rt_se_prio(rt_se) < rt_rq->highest_prio)
@@ -299,6 +349,8 @@ void dec_rt_tasks(struct sched_rt_entity
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
WARN_ON(!rt_rq->rt_nr_running);
rt_rq->rt_nr_running--;
+ if (!rt_rq->rt_nr_running && !group_rt_rq(rt_se))
+ sched_rt_period_stop(rt_rq);
#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
if (rt_rq->rt_nr_running) {
struct rt_prio_array *array;
--
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 10/11] sched: rt-group: EDF
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
` (8 preceding siblings ...)
2008-01-06 16:11 ` [PATCH 09/11] sched: rt-group: dynamic period ticks Peter Zijlstra
@ 2008-01-06 16:11 ` Peter Zijlstra
2008-01-06 16:11 ` [PATCH 11/11] sched: rt-group: interface Peter Zijlstra
` (2 subsequent siblings)
12 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-06 16:11 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Peter Zijlstra, Thomas Gleixner
[-- Attachment #1: sched-rt-group-edf.patch --]
[-- Type: text/plain, Size: 6548 bytes --]
Use a simple Ealiest Deadline First implementation to schedule the realtime
groups.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/sched.h | 1
kernel/sched.c | 13 +++++
kernel/sched_rt.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 124 insertions(+), 5 deletions(-)
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -942,6 +942,7 @@ struct sched_rt_entity {
int nr_cpus_allowed;
#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct rb_node run_node;
struct sched_rt_entity *parent;
/* rq on which this entity is (to be) queued: */
struct rt_rq *rt_rq;
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -360,6 +360,11 @@ struct cfs_rq {
#endif
};
+enum rt_rq_type {
+ RT_RQ_PRIO,
+ RT_RQ_EDF,
+};
+
/* Real-Time classes' related field in a runqueue: */
struct rt_rq {
struct rt_prio_array active;
@@ -376,6 +381,10 @@ struct rt_rq {
struct hrtimer rt_period_timer;
#ifdef CONFIG_FAIR_GROUP_SCHED
+ enum rt_rq_type rt_rq_type;
+ struct rb_root deadlines;
+ struct rb_node *rb_leftmost;
+
unsigned long rt_nr_boosted;
struct rq *rq;
@@ -7127,6 +7136,9 @@ static void init_rt_rq(struct rt_rq *rt_
rt_rq->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
#ifdef CONFIG_FAIR_GROUP_SCHED
+ rt_rq->rt_rq_type = RT_RQ_PRIO;
+ rt_rq->deadlines = RB_ROOT;
+ rt_rq->rb_leftmost = NULL;
rt_rq->rt_nr_boosted = 0;
rt_rq->rq = rq;
#endif
@@ -7196,6 +7208,7 @@ void __init sched_init(void)
&per_cpu(init_cfs_rq, i),
&per_cpu(init_sched_entity, i), i, 1);
+ rq->rt.rt_rq_type = RT_RQ_EDF;
init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
init_task_group.rt_period =
ns_to_ktime(sysctl_sched_rt_period * NSEC_PER_USEC);
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -138,6 +138,84 @@ static int rt_se_boosted(struct sched_rt
return p->prio != p->normal_prio;
}
+static inline u64 rt_deadline(struct sched_rt_entity *rt_se)
+{
+ struct rt_rq *group_rq = group_rt_rq(rt_se);
+
+ BUG_ON(!group_rq);
+ return ktime_to_ns(group_rq->rt_period_timer.expires);
+}
+
+static void enqueue_rt_deadline(struct sched_rt_entity *rt_se)
+{
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+ struct rb_node **link;
+ struct rb_node *parent;
+ struct sched_rt_entity *entry;
+ u64 deadline;
+ int leftmost = 1;
+
+ if (rt_rq->rt_rq_type != RT_RQ_EDF)
+ return;
+
+ link = &rt_rq->deadlines.rb_node;
+ parent = NULL;
+ deadline = rt_deadline(rt_se);
+
+ while (*link) {
+ parent = *link;
+ entry = rb_entry(parent, struct sched_rt_entity, run_node);
+
+ if (deadline < rt_deadline(entry)) {
+ link = &parent->rb_left;
+ } else {
+ link = &parent->rb_right;
+ leftmost = 0;
+ }
+ }
+
+ if (leftmost)
+ rt_rq->rb_leftmost = &rt_se->run_node;
+
+ rb_link_node(&rt_se->run_node, parent, link);
+ rb_insert_color(&rt_se->run_node, &rt_rq->deadlines);
+}
+
+static void dequeue_rt_deadline(struct sched_rt_entity *rt_se)
+{
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+
+ if (rt_rq->rt_rq_type != RT_RQ_EDF)
+ return;
+
+ if (rt_rq->rb_leftmost == &rt_se->run_node)
+ rt_rq->rb_leftmost = rb_next(&rt_se->run_node);
+
+ rb_erase(&rt_se->run_node, &rt_rq->deadlines);
+}
+
+static void requeue_rt_deadline(struct rt_rq *rt_rq)
+{
+ struct sched_rt_entity *rt_se = rt_rq->rt_se;
+
+ BUG_ON(!rt_se);
+ if (on_rt_rq(rt_se)) {
+ dequeue_rt_deadline(rt_se);
+ enqueue_rt_deadline(rt_se);
+ }
+}
+
+static struct sched_rt_entity *next_rt_deadline(struct rt_rq *rt_rq)
+{
+ if (rt_rq->rt_rq_type != RT_RQ_EDF)
+ return NULL;
+
+ if (!rt_rq->rb_leftmost)
+ return NULL;
+
+ return rb_entry(rt_rq->rb_leftmost, struct sched_rt_entity, run_node);
+}
+
#else
static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
@@ -191,6 +269,23 @@ static inline int rt_rq_throttled(struct
{
return rt_rq->rt_throttled;
}
+
+static inline void enqueue_rt_deadline(struct sched_rt_entity *rt_se)
+{
+}
+
+static inline void dequeue_rt_deadline(struct sched_rt_entity *rt_se)
+{
+}
+
+static inline void requeue_rt_deadline(struct rt_rq *rt_rq)
+{
+}
+
+static inline struct sched_rt_entity *next_rt_deadline(struct rt_rq *rt_rq)
+{
+ return NULL;
+}
#endif
static inline int rt_se_prio(struct sched_rt_entity *rt_se)
@@ -254,12 +349,13 @@ static enum hrtimer_restart sched_rt_per
WARN_ON(smp_processor_id() != cpu_of(rq));
WARN_ON(!in_irq());
+ hrtimer_forward(timer, now, sched_rt_period(rt_rq));
+
spin_lock(&rq->lock);
+ requeue_rt_deadline(rt_rq);
update_sched_rt_period(rt_rq);
spin_unlock(&rq->lock);
- hrtimer_forward(timer, now, sched_rt_period(rt_rq));
-
return HRTIMER_RESTART;
}
@@ -283,6 +379,8 @@ static void sched_rt_period_start(struct
if (hrtimer_active(&rt_rq->rt_period_timer))
break;
}
+
+ requeue_rt_deadline(rt_rq);
}
static void sched_rt_period_stop(struct rt_rq *rt_rq)
@@ -393,6 +491,8 @@ static void enqueue_rt_entity(struct sch
list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
__set_bit(rt_se_prio(rt_se), array->bitmap);
+ enqueue_rt_deadline(rt_se);
+
inc_rt_tasks(rt_se, rt_rq);
}
@@ -405,6 +505,8 @@ static void dequeue_rt_entity(struct sch
if (list_empty(array->queue + rt_se_prio(rt_se)))
__clear_bit(rt_se_prio(rt_se), array->bitmap);
+ dequeue_rt_deadline(rt_se);
+
dec_rt_tasks(rt_se, rt_rq);
}
@@ -552,8 +654,7 @@ static void check_preempt_curr_rt(struct
resched_task(rq->curr);
}
-static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
- struct rt_rq *rt_rq)
+static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq)
{
struct rt_prio_array *array = &rt_rq->active;
struct sched_rt_entity *next = NULL;
@@ -563,6 +664,10 @@ static struct sched_rt_entity *pick_next
if (rt_rq_throttled(rt_rq))
goto out;
+ next = next_rt_deadline(rt_rq);
+ if (next)
+ goto out;
+
idx = sched_find_first_bit(array->bitmap);
BUG_ON(idx >= MAX_RT_PRIO);
@@ -588,7 +693,7 @@ static struct task_struct *pick_next_tas
return NULL;
do {
- rt_se = pick_next_rt_entity(rq, rt_rq);
+ rt_se = pick_next_rt_entity(rt_rq);
if (unlikely(!rt_se))
goto retry;
rt_rq = group_rt_rq(rt_se);
--
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 11/11] sched: rt-group: interface
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
` (9 preceding siblings ...)
2008-01-06 16:11 ` [PATCH 10/11] sched: rt-group: EDF Peter Zijlstra
@ 2008-01-06 16:11 ` Peter Zijlstra
2008-01-07 10:51 ` [PATCH 00/11] another rt group sched update Peter Zijlstra
2008-01-07 11:17 ` [PATCH 00/11] another rt group sched update Ingo Molnar
12 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-06 16:11 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Peter Zijlstra, Thomas Gleixner
[-- Attachment #1: sched-rt-group-interface.patch --]
[-- Type: text/plain, Size: 11501 bytes --]
Change the rt_ratio interface to rt_runtime_us, to match rt_period_us.
This avoids picking a granularity for the ratio.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/sched.h | 8 +++
kernel/sched.c | 116 ++++++++++++++++++++++++++++++++++----------------
kernel/sched_rt.c | 42 +++++++-----------
kernel/sysctl.c | 4 -
4 files changed, 106 insertions(+), 64 deletions(-)
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -1518,7 +1518,7 @@ extern unsigned int sysctl_sched_feature
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_rt_period;
-extern unsigned int sysctl_sched_rt_ratio;
+extern unsigned int sysctl_sched_rt_runtime;
#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
extern unsigned int sysctl_sched_min_bal_int_shares;
extern unsigned int sysctl_sched_max_bal_int_shares;
@@ -2014,6 +2014,12 @@ extern void sched_destroy_group(struct t
extern void sched_move_task(struct task_struct *tsk);
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
extern unsigned long sched_group_shares(struct task_group *tg);
+extern int sched_group_set_rt_runtime(struct task_group *tg,
+ unsigned long rt_runtime_us);
+extern unsigned long sched_group_rt_runtime(struct task_group *tg);
+extern int sched_group_set_rt_period(struct task_group *tg,
+ unsigned long rt_runtime_us);
+extern unsigned long sched_group_rt_period(struct task_group *tg);
#endif
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -176,7 +176,7 @@ struct task_group {
struct sched_rt_entity **rt_se;
struct rt_rq **rt_rq;
- unsigned int rt_ratio;
+ u64 rt_runtime;
ktime_t rt_period;
/*
@@ -646,19 +646,16 @@ const_debug unsigned int sysctl_sched_fe
const_debug unsigned int sysctl_sched_nr_migrate = 32;
/*
- * period over which we measure -rt task cpu usage in us.
+ * period over which we measure rt task cpu usage in us.
* default: 1s
*/
const_debug unsigned int sysctl_sched_rt_period = 1000000;
-#define SCHED_RT_FRAC_SHIFT 16
-#define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT)
-
/*
- * ratio of time -rt tasks may consume.
- * default: 95%
+ * part of the period that we allow rt tasks to run in us.
+ * default: 0.95s
*/
-const_debug unsigned int sysctl_sched_rt_ratio = 62259;
+const_debug unsigned int sysctl_sched_rt_runtime = 950000;
/*
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
@@ -7209,7 +7206,8 @@ void __init sched_init(void)
&per_cpu(init_sched_entity, i), i, 1);
rq->rt.rt_rq_type = RT_RQ_EDF;
- init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
+ init_task_group.rt_runtime =
+ sysctl_sched_rt_runtime * NSEC_PER_USEC;
init_task_group.rt_period =
ns_to_ktime(sysctl_sched_rt_period * NSEC_PER_USEC);
INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
@@ -7606,7 +7604,7 @@ struct task_group *sched_create_group(vo
goto err;
tg->shares = NICE_0_LOAD;
- tg->rt_ratio = 0; /* XXX */
+ tg->rt_runtime = 0; /* XXX */
tg->rt_period = ns_to_ktime(sysctl_sched_rt_period * NSEC_PER_USEC);
for_each_possible_cpu(i) {
@@ -7801,41 +7799,87 @@ unsigned long sched_group_shares(struct
}
/*
- * Ensure the total rt_ratio <= sysctl_sched_rt_ratio
+ * Ensure that the real time constraints are schedulable.
*/
-int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio)
+static DEFINE_MUTEX(rt_constraints_mutex);
+
+static unsigned long to_ratio(u64 period, u64 runtime)
+{
+ u64 r = runtime * (1ULL << 16);
+ do_div(r, period);
+ return r;
+}
+
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
{
struct task_group *tgi;
unsigned long total = 0;
+ unsigned long global_ratio =
+ to_ratio(sysctl_sched_rt_period, sysctl_sched_rt_runtime);
rcu_read_lock();
- list_for_each_entry_rcu(tgi, &task_groups, list)
- total += tgi->rt_ratio;
+ list_for_each_entry_rcu(tgi, &task_groups, list) {
+ if (tgi == tg)
+ continue;
+
+ total += to_ratio(ktime_to_ns(tgi->rt_period), tgi->rt_runtime);
+ }
rcu_read_unlock();
- if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio)
- return -EINVAL;
+ return total + to_ratio(period, runtime) < global_ratio;
+}
- tg->rt_ratio = rt_ratio;
- return 0;
+int sched_group_set_rt_runtime(struct task_group *tg,
+ unsigned long rt_runtime_us)
+{
+ u64 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
+ int err = 0;
+
+ mutex_lock(&rt_constraints_mutex);
+ if (!__rt_schedulable(tg, ktime_to_ns(tg->rt_period), rt_runtime)) {
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ tg->rt_runtime = rt_runtime;
+ unlock:
+ mutex_unlock(&rt_constraints_mutex);
+
+ return err;
}
-unsigned long sched_group_rt_ratio(struct task_group *tg)
+unsigned long sched_group_rt_runtime(struct task_group *tg)
{
- return tg->rt_ratio;
+ u64 rt_runtime_us = tg->rt_runtime;
+
+ do_div(rt_runtime_us, NSEC_PER_USEC);
+ return rt_runtime_us;
}
-int sched_group_set_rt_period(struct task_group *tg, unsigned long rt_period)
+int sched_group_set_rt_period(struct task_group *tg, unsigned long rt_period_us)
{
- tg->rt_period = ns_to_ktime((u64)rt_period * NSEC_PER_USEC);
- return 0;
+ u64 rt_period = (u64)rt_period_us * NSEC_PER_USEC;
+ int err = 0;
+
+ mutex_lock(&rt_constraints_mutex);
+ if (!__rt_schedulable(tg, rt_period, tg->rt_runtime)) {
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ tg->rt_period = ns_to_ktime(rt_period);
+ unlock:
+ mutex_unlock(&rt_constraints_mutex);
+
+ return err;
}
unsigned long sched_group_rt_period(struct task_group *tg)
{
- u64 ns = ktime_to_ns(tg->rt_period);
- do_div(ns, NSEC_PER_USEC);
- return ns;
+ u64 rt_period_us = ktime_to_ns(tg->rt_period);
+
+ do_div(rt_period_us, NSEC_PER_USEC);
+ return rt_period_us;
}
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -7913,17 +7957,15 @@ static u64 cpu_shares_read_uint(struct c
return (u64) tg->shares;
}
-static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype,
- u64 rt_ratio_val)
+static int cpu_rt_runtime_write_uint(struct cgroup *cgrp, struct cftype *cftype,
+ u64 rt_runtime_val)
{
- return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val);
+ return sched_group_set_rt_runtime(cgroup_tg(cgrp), rt_runtime_val);
}
-static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft)
+static u64 cpu_rt_runtime_read_uint(struct cgroup *cgrp, struct cftype *cft)
{
- struct task_group *tg = cgroup_tg(cgrp);
-
- return (u64) tg->rt_ratio;
+ return sched_group_rt_runtime(cgroup_tg(cgrp));
}
static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype,
@@ -7934,7 +7976,7 @@ static int cpu_rt_period_write_uint(stru
static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft)
{
- return (u64) sched_group_rt_period(cgroup_tg(cgrp));
+ return sched_group_rt_period(cgroup_tg(cgrp));
}
static struct cftype cpu_files[] = {
@@ -7944,9 +7986,9 @@ static struct cftype cpu_files[] = {
.write_uint = cpu_shares_write_uint,
},
{
- .name = "rt_ratio",
- .read_uint = cpu_rt_ratio_read_uint,
- .write_uint = cpu_rt_ratio_write_uint,
+ .name = "rt_runtime_us",
+ .read_uint = cpu_rt_runtime_read_uint,
+ .write_uint = cpu_rt_runtime_write_uint,
},
{
.name = "rt_period_us",
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -57,12 +57,12 @@ static inline int on_rt_rq(struct sched_
#ifdef CONFIG_FAIR_GROUP_SCHED
-static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
+static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
{
if (!rt_rq->tg)
- return SCHED_RT_FRAC;
+ return 0;
- return rt_rq->tg->rt_ratio;
+ return rt_rq->tg->rt_runtime;
}
static inline ktime_t sched_rt_period(struct rt_rq *rt_rq)
@@ -100,7 +100,7 @@ static inline struct rt_rq *group_rt_rq(
static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
-static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
+static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
struct sched_rt_entity *rt_se = rt_rq->rt_se;
@@ -113,7 +113,7 @@ static void sched_rt_ratio_enqueue(struc
}
}
-static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
+static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
struct sched_rt_entity *rt_se = rt_rq->rt_se;
@@ -218,9 +218,9 @@ static struct sched_rt_entity *next_rt_d
#else
-static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
+static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
{
- return sysctl_sched_rt_ratio;
+ return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
}
static inline ktime_t sched_rt_period(struct rt_rq *rt_rq)
@@ -257,11 +257,11 @@ static inline struct rt_rq *group_rt_rq(
return NULL;
}
-static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
+static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
}
-static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
+static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
}
@@ -300,25 +300,21 @@ static inline int rt_se_prio(struct sche
return rt_task_of(rt_se)->prio;
}
-static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
+static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
{
- unsigned int rt_ratio = sched_rt_ratio(rt_rq);
- u64 period, ratio;
+ u64 runtime = sched_rt_runtime(rt_rq);
- if (rt_ratio == SCHED_RT_FRAC)
+ if (!runtime)
goto out;
if (rt_rq->rt_throttled)
goto out;
- period = sched_rt_period_ns(rt_rq);
- ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
-
- if (rt_rq->rt_time > ratio) {
+ if (rt_rq->rt_time > runtime) {
rt_rq->rt_throttled = 1;
if (rt_rq_throttled(rt_rq)) {
WARN_ON(!hrtimer_active(&rt_rq->rt_period_timer));
- sched_rt_ratio_dequeue(rt_rq);
+ sched_rt_rq_dequeue(rt_rq);
}
}
@@ -328,14 +324,12 @@ out:
static void update_sched_rt_period(struct rt_rq *rt_rq)
{
- u64 period = sched_rt_period_ns(rt_rq);
- unsigned long rt_ratio = sched_rt_ratio(rt_rq);
- u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
+ u64 runtime = sched_rt_runtime(rt_rq);
- rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
+ rt_rq->rt_time -= min(rt_rq->rt_time, runtime);
if (rt_rq->rt_throttled) {
rt_rq->rt_throttled = 0;
- sched_rt_ratio_enqueue(rt_rq);
+ sched_rt_rq_enqueue(rt_rq);
}
}
@@ -412,7 +406,7 @@ static void update_curr_rt(struct rq *rq
cpuacct_charge(curr, delta_exec);
rt_rq->rt_time += delta_exec;
- if (sched_rt_ratio_exceeded(rt_rq))
+ if (sched_rt_runtime_exceeded(rt_rq))
resched_task(curr);
}
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c
+++ linux-2.6/kernel/sysctl.c
@@ -319,8 +319,8 @@ static struct ctl_table kern_table[] = {
},
{
.ctl_name = CTL_UNNUMBERED,
- .procname = "sched_rt_ratio",
- .data = &sysctl_sched_rt_ratio,
+ .procname = "sched_rt_runtime_us",
+ .data = &sysctl_sched_rt_runtime,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
--
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 00/11] another rt group sched update
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
` (10 preceding siblings ...)
2008-01-06 16:11 ` [PATCH 11/11] sched: rt-group: interface Peter Zijlstra
@ 2008-01-07 10:51 ` Peter Zijlstra
2008-01-07 11:24 ` Peter Zijlstra
2008-01-07 12:23 ` Srivatsa Vaddagiri
2008-01-07 11:17 ` [PATCH 00/11] another rt group sched update Ingo Molnar
12 siblings, 2 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-07 10:51 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Thomas Gleixner
[-- Attachment #1: Type: text/plain, Size: 1037 bytes --]
The list of open points and issues for this work:
- review/testing
- handle the PI case better
The only thing I can come up with currently is to basically have two
priority arrays one for boosted and one for non boosted tasks, and
normally run the highest of either array, but in the case of a
throttled group, only pick from the boosted array.
Not sure I like that for its space overhead, Steven?
- I occasionally see a weird lockup on iterating the task_groups list
on smp machines. - I failed to see anything wrong, but hey, this
stack of used brown paper bags is steadily growing.
- figure out what to do for UID based group scheduling, the current
implementation leaves it impossible for !root users to execute
real time tasks by setting rt_runtime_us to 0, and it has no way
to change it.
Srivatsa, what happened to the per uid weight patches?, Perhaps we
can extend that interface to allow changing this.
- I guess documentation needs to be written ;-)
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 00/11] another rt group sched update
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
` (11 preceding siblings ...)
2008-01-07 10:51 ` [PATCH 00/11] another rt group sched update Peter Zijlstra
@ 2008-01-07 11:17 ` Ingo Molnar
12 siblings, 0 replies; 31+ messages in thread
From: Ingo Molnar @ 2008-01-07 11:17 UTC (permalink / raw)
To: Peter Zijlstra
Cc: LKML, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Thomas Gleixner
* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> this time compile tested on all 16 combinations of:
>
> CONFIG_SMP
> CONFIG_FAIR_GROUP_SCHED
> CONFIG_HIGH_RES_TIMERS
> CONFIG_NO_HZ
>
> ran some but not all combinations
thanks, applied. This is a really big step forwards in terms of making
RT task CPU usage more flexible and more manageable.
Ingo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 00/11] another rt group sched update
2008-01-07 10:51 ` [PATCH 00/11] another rt group sched update Peter Zijlstra
@ 2008-01-07 11:24 ` Peter Zijlstra
2008-01-07 12:23 ` Srivatsa Vaddagiri
1 sibling, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-07 11:24 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Thomas Gleixner
On Mon, 2008-01-07 at 11:51 +0100, Peter Zijlstra wrote:
> - I occasionally see a weird lockup on iterating the task_groups list
> on smp machines. - I failed to see anything wrong, but hey, this
> stack of used brown paper bags is steadily growing.
D'oh
---
Don't add a task_group to the task_groups list for each cpu, there is
only a single task_group.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -7175,6 +7175,10 @@ void __init sched_init(void)
init_defrootdomain();
#endif
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ list_add(&init_task_group.list, &task_groups);
+#endif
+
for_each_possible_cpu(i) {
struct rq *rq;
@@ -7201,8 +7205,6 @@ void __init sched_init(void)
init_tg_rt_entry(rq, &init_task_group,
&per_cpu(init_rt_rq, i),
&per_cpu(init_sched_rt_entity, i), i, 1);
-
- list_add(&init_task_group.list, &task_groups);
#endif
for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 04/11] hrtimer: fixup the HRTIMER_CB_IRQSAFE_NO_SOFTIRQ fallback
2008-01-06 16:11 ` [PATCH 04/11] hrtimer: fixup the HRTIMER_CB_IRQSAFE_NO_SOFTIRQ fallback Peter Zijlstra
@ 2008-01-07 11:56 ` Peter Zijlstra
2008-01-08 11:16 ` Ingo Molnar
0 siblings, 1 reply; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-07 11:56 UTC (permalink / raw)
To: LKML
Cc: Ingo Molnar, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Thomas Gleixner
[-- Attachment #1: Type: text/plain, Size: 2970 bytes --]
On Sun, 2008-01-06 at 17:11 +0100, Peter Zijlstra wrote:
> plain text document attachment (hrtimer-fallback.patch)
I know I should have boot tested more combinations :-(
Please fold this into the patch.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/hrtimer.c | 38 ++++++++++++++++++--------------------
1 file changed, 18 insertions(+), 20 deletions(-)
Index: linux-2.6/kernel/hrtimer.c
===================================================================
--- linux-2.6.orig/kernel/hrtimer.c
+++ linux-2.6/kernel/hrtimer.c
@@ -325,6 +325,22 @@ unsigned long ktime_divns(const ktime_t
}
#endif /* BITS_PER_LONG >= 64 */
+/*
+ * Check, whether the timer is on the callback pending list
+ */
+static inline int hrtimer_cb_pending(const struct hrtimer *timer)
+{
+ return timer->state & HRTIMER_STATE_PENDING;
+}
+
+/*
+ * Remove a timer from the callback pending list
+ */
+static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
+{
+ list_del_init(&timer->cb_entry);
+}
+
/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -494,29 +510,12 @@ void hres_timers_resume(void)
}
/*
- * Check, whether the timer is on the callback pending list
- */
-static inline int hrtimer_cb_pending(const struct hrtimer *timer)
-{
- return timer->state & HRTIMER_STATE_PENDING;
-}
-
-/*
- * Remove a timer from the callback pending list
- */
-static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
-{
- list_del_init(&timer->cb_entry);
-}
-
-/*
* Initialize the high resolution related parts of cpu_base
*/
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
{
base->expires_next.tv64 = KTIME_MAX;
base->hres_active = 0;
- INIT_LIST_HEAD(&base->cb_pending);
}
/*
@@ -524,7 +523,6 @@ static inline void hrtimer_init_hres(str
*/
static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
{
- INIT_LIST_HEAD(&timer->cb_entry);
}
/*
@@ -618,8 +616,6 @@ static inline int hrtimer_enqueue_reprog
{
return 0;
}
-static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; }
-static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { }
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
static inline int hrtimer_reprogram(struct hrtimer *timer,
@@ -1006,6 +1002,7 @@ void hrtimer_init(struct hrtimer *timer,
clock_id = CLOCK_MONOTONIC;
timer->base = &cpu_base->clock_base[clock_id];
+ INIT_LIST_HEAD(&timer->cb_entry);
hrtimer_init_timer_hres(timer);
#ifdef CONFIG_TIMER_STATS
@@ -1419,6 +1416,7 @@ static void __devinit init_hrtimers_cpu(
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
cpu_base->clock_base[i].cpu_base = cpu_base;
+ INIT_LIST_HEAD(&cpu_base->cb_pending);
hrtimer_init_hres(cpu_base);
}
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 00/11] another rt group sched update
2008-01-07 12:23 ` Srivatsa Vaddagiri
@ 2008-01-07 12:12 ` Peter Zijlstra
2008-01-07 16:57 ` [PATCH 12/11] sched: rt-group: uid-group interface Peter Zijlstra
1 sibling, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-07 12:12 UTC (permalink / raw)
To: vatsa
Cc: LKML, Ingo Molnar, Balbir Singh, dmitry.adamushko,
Steven Rostedt, Gregory Haskins, Thomas Gleixner
[-- Attachment #1: Type: text/plain, Size: 841 bytes --]
On Mon, 2008-01-07 at 17:53 +0530, Srivatsa Vaddagiri wrote:
> On Mon, Jan 07, 2008 at 11:51:20AM +0100, Peter Zijlstra wrote:
> > - figure out what to do for UID based group scheduling, the current
> > implementation leaves it impossible for !root users to execute
> > real time tasks by setting rt_runtime_us to 0, and it has no way
> > to change it.
> >
> > Srivatsa, what happened to the per uid weight patches?, Perhaps we
> > can extend that interface to allow changing this.
>
> Hi Peter,
> The sysfs interface for tweaking each user's share should be in
> mainline already (sysfs_create_file() in user_kobject_create()). This
> could be extended for your purpose, hopefully in a straightforward
> manner (you never know that with sysfs :(
Ah, I missed that going in. Thanks, I'll give it a go.
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 00/11] another rt group sched update
2008-01-07 10:51 ` [PATCH 00/11] another rt group sched update Peter Zijlstra
2008-01-07 11:24 ` Peter Zijlstra
@ 2008-01-07 12:23 ` Srivatsa Vaddagiri
2008-01-07 12:12 ` Peter Zijlstra
2008-01-07 16:57 ` [PATCH 12/11] sched: rt-group: uid-group interface Peter Zijlstra
1 sibling, 2 replies; 31+ messages in thread
From: Srivatsa Vaddagiri @ 2008-01-07 12:23 UTC (permalink / raw)
To: Peter Zijlstra
Cc: LKML, Ingo Molnar, Balbir Singh, dmitry.adamushko,
Steven Rostedt, Gregory Haskins, Thomas Gleixner
On Mon, Jan 07, 2008 at 11:51:20AM +0100, Peter Zijlstra wrote:
> - figure out what to do for UID based group scheduling, the current
> implementation leaves it impossible for !root users to execute
> real time tasks by setting rt_runtime_us to 0, and it has no way
> to change it.
>
> Srivatsa, what happened to the per uid weight patches?, Perhaps we
> can extend that interface to allow changing this.
Hi Peter,
The sysfs interface for tweaking each user's share should be in
mainline already (sysfs_create_file() in user_kobject_create()). This
could be extended for your purpose, hopefully in a straightforward
manner (you never know that with sysfs :(
--
Regards,
vatsa
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 12/11] sched: rt-group: uid-group interface
2008-01-07 12:23 ` Srivatsa Vaddagiri
2008-01-07 12:12 ` Peter Zijlstra
@ 2008-01-07 16:57 ` Peter Zijlstra
2008-01-08 10:33 ` Ingo Molnar
2008-01-08 10:57 ` Dhaval Giani
1 sibling, 2 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-07 16:57 UTC (permalink / raw)
To: vatsa
Cc: LKML, Ingo Molnar, Balbir Singh, dmitry.adamushko,
Steven Rostedt, Gregory Haskins, Thomas Gleixner
Subject: sched: rt-group: add uid-group interface
Extend the /sys/kernel/uids/<uid>/ interface to allow setting
the group's rt_period and rt_runtime.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/sched.h | 4 +-
kernel/user.c | 93 +++++++++++++++++++++++++++++++++++++++++++-------
2 files changed, 84 insertions(+), 13 deletions(-)
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -559,7 +559,9 @@ struct user_struct {
struct task_group *tg;
#ifdef CONFIG_SYSFS
struct kset kset;
- struct subsys_attribute user_attr;
+ struct subsys_attribute share_attr;
+ struct subsys_attribute rt_period_attr;
+ struct subsys_attribute rt_runtime_attr;
struct work_struct work;
#endif
#endif
Index: linux-2.6/kernel/user.c
===================================================================
--- linux-2.6.orig/kernel/user.c
+++ linux-2.6/kernel/user.c
@@ -129,7 +129,7 @@ static inline void uids_mutex_unlock(voi
}
/* return cpu shares held by the user */
-static ssize_t cpu_shares_show(struct kset *kset, char *buffer)
+static ssize_t cpu_share_show(struct kset *kset, char *buffer)
{
struct user_struct *up = container_of(kset, struct user_struct, kset);
@@ -137,8 +137,8 @@ static ssize_t cpu_shares_show(struct ks
}
/* modify cpu shares held by the user */
-static ssize_t cpu_shares_store(struct kset *kset, const char *buffer,
- size_t size)
+static ssize_t cpu_share_store(struct kset *kset, const char *buffer,
+ size_t size)
{
struct user_struct *up = container_of(kset, struct user_struct, kset);
unsigned long shares;
@@ -151,12 +151,67 @@ static ssize_t cpu_shares_store(struct k
return (rc ? rc : size);
}
-static void user_attr_init(struct subsys_attribute *sa, char *name, int mode)
+static ssize_t cpu_rt_period_show(struct kset *kset, char *buffer)
{
- sa->attr.name = name;
- sa->attr.mode = mode;
- sa->show = cpu_shares_show;
- sa->store = cpu_shares_store;
+ struct user_struct *up = container_of(kset, struct user_struct, kset);
+
+ return sprintf(buffer, "%lu\n", sched_group_rt_period(up->tg));
+}
+
+static ssize_t cpu_rt_period_store(struct kset *kset, const char *buffer,
+ size_t size)
+{
+ struct user_struct *up = container_of(kset, struct user_struct, kset);
+ unsigned long rt_period_us;
+ int rc;
+
+ sscanf(buffer, "%lu", &rt_period_us);
+ rc = sched_group_set_rt_period(up->tg, rt_period_us);
+
+ return (rc ?: size);
+}
+
+static ssize_t cpu_rt_runtime_show(struct kset *kset, char *buffer)
+{
+ struct user_struct *up = container_of(kset, struct user_struct, kset);
+
+ return sprintf(buffer, "%lu\n", sched_group_rt_runtime(up->tg));
+}
+
+static ssize_t cpu_rt_runtime_store(struct kset *kset, const char *buffer,
+ size_t size)
+{
+ struct user_struct *up = container_of(kset, struct user_struct, kset);
+ unsigned long rt_runtime_us;
+ int rc;
+
+ sscanf(buffer, "%lu", &rt_runtime_us);
+ rc = sched_group_set_rt_runtime(up->tg, rt_runtime_us);
+
+ return (rc ?: size);
+}
+
+static void user_attr_init(struct user_struct *up)
+{
+ struct subsys_attribute *sa;
+
+ sa = &up->share_attr;
+ sa->attr.name = "cpu_share";
+ sa->attr.mode = 0644;
+ sa->show = cpu_share_show;
+ sa->store = cpu_share_store;
+
+ sa = &up->rt_period_attr;
+ sa->attr.name = "cpu_rt_period_us";
+ sa->attr.mode = 0644;
+ sa->show = cpu_rt_period_show;
+ sa->store = cpu_rt_period_store;
+
+ sa = &up->rt_runtime_attr;
+ sa->attr.name = "cpu_rt_runtime_us";
+ sa->attr.mode = 0644;
+ sa->show = cpu_rt_runtime_show;
+ sa->store = cpu_rt_runtime_store;
}
/* Create "/sys/kernel/uids/<uid>" directory and
@@ -172,15 +227,27 @@ static int user_kobject_create(struct us
kobj->parent = &uids_kobject; /* create under /sys/kernel/uids dir */
kobject_set_name(kobj, "%d", up->uid);
kset_init(kset);
- user_attr_init(&up->user_attr, "cpu_share", 0644);
+ user_attr_init(up);
error = kobject_add(kobj);
if (error)
goto done;
- error = sysfs_create_file(kobj, &up->user_attr.attr);
+ error = sysfs_create_file(kobj, &up->share_attr.attr);
+ if (error)
+ goto error1;
+ error = sysfs_create_file(kobj, &up->rt_period_attr.attr);
if (error)
- kobject_del(kobj);
+ goto error2;
+ error = sysfs_create_file(kobj, &up->rt_runtime_attr.attr);
+ if (error)
+ goto error3;
+
+ if (0) {
+error3: sysfs_remove_file(kobj, &up->rt_period_attr.attr);
+error2: sysfs_remove_file(kobj, &up->share_attr.attr);
+error1: kobject_del(kobj);
+ }
kobject_uevent(kobj, KOBJ_ADD);
@@ -238,7 +305,9 @@ static void remove_user_sysfs_dir(struct
if (!remove_user)
goto done;
- sysfs_remove_file(kobj, &up->user_attr.attr);
+ sysfs_remove_file(kobj, &up->share_attr.attr);
+ sysfs_remove_file(kobj, &up->rt_period_attr.attr);
+ sysfs_remove_file(kobj, &up->rt_runtime_attr.attr);
kobject_uevent(kobj, KOBJ_REMOVE);
kobject_del(kobj);
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 12/11] sched: rt-group: uid-group interface
2008-01-07 16:57 ` [PATCH 12/11] sched: rt-group: uid-group interface Peter Zijlstra
@ 2008-01-08 10:33 ` Ingo Molnar
2008-01-08 10:57 ` Dhaval Giani
1 sibling, 0 replies; 31+ messages in thread
From: Ingo Molnar @ 2008-01-08 10:33 UTC (permalink / raw)
To: Peter Zijlstra
Cc: vatsa, LKML, Balbir Singh, dmitry.adamushko, Steven Rostedt,
Gregory Haskins, Thomas Gleixner
* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> Subject: sched: rt-group: add uid-group interface
>
> Extend the /sys/kernel/uids/<uid>/ interface to allow setting the
> group's rt_period and rt_runtime.
thanks, applied.
Ingo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 12/11] sched: rt-group: uid-group interface
2008-01-07 16:57 ` [PATCH 12/11] sched: rt-group: uid-group interface Peter Zijlstra
2008-01-08 10:33 ` Ingo Molnar
@ 2008-01-08 10:57 ` Dhaval Giani
2008-01-08 11:02 ` Peter Zijlstra
2008-01-08 23:26 ` Peter Zijlstra
1 sibling, 2 replies; 31+ messages in thread
From: Dhaval Giani @ 2008-01-08 10:57 UTC (permalink / raw)
To: Peter Zijlstra
Cc: vatsa, LKML, Ingo Molnar, Balbir Singh, dmitry.adamushko,
Steven Rostedt, Gregory Haskins, Thomas Gleixner
On Mon, Jan 07, 2008 at 05:57:42PM +0100, Peter Zijlstra wrote:
>
> Subject: sched: rt-group: add uid-group interface
>
> Extend the /sys/kernel/uids/<uid>/ interface to allow setting
> the group's rt_period and rt_runtime.
>
Hi Peter,
Cool stuff! I will try out these patches and try to give you some
feedback.
One request though, could you please add some documentation to
Documentation/ABI/testing/sysfs-kernel-uids?
Thanks,
--
regards,
Dhaval
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 12/11] sched: rt-group: uid-group interface
2008-01-08 10:57 ` Dhaval Giani
@ 2008-01-08 11:02 ` Peter Zijlstra
2008-01-08 14:31 ` Kay Sievers
2008-01-08 23:26 ` Peter Zijlstra
1 sibling, 1 reply; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-08 11:02 UTC (permalink / raw)
To: Dhaval Giani
Cc: vatsa, LKML, Ingo Molnar, Balbir Singh, dmitry.adamushko,
Steven Rostedt, Gregory Haskins, Thomas Gleixner
[-- Attachment #1: Type: text/plain, Size: 643 bytes --]
On Tue, 2008-01-08 at 16:27 +0530, Dhaval Giani wrote:
> On Mon, Jan 07, 2008 at 05:57:42PM +0100, Peter Zijlstra wrote:
> >
> > Subject: sched: rt-group: add uid-group interface
> >
> > Extend the /sys/kernel/uids/<uid>/ interface to allow setting
> > the group's rt_period and rt_runtime.
> >
>
> Hi Peter,
>
> Cool stuff! I will try out these patches and try to give you some
> feedback.
Thanks, much appreciated!
> One request though, could you please add some documentation to
> Documentation/ABI/testing/sysfs-kernel-uids?
I already have documentation on the todo list, I'll add this file to
that list :-)
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 04/11] hrtimer: fixup the HRTIMER_CB_IRQSAFE_NO_SOFTIRQ fallback
2008-01-07 11:56 ` Peter Zijlstra
@ 2008-01-08 11:16 ` Ingo Molnar
0 siblings, 0 replies; 31+ messages in thread
From: Ingo Molnar @ 2008-01-08 11:16 UTC (permalink / raw)
To: Peter Zijlstra
Cc: LKML, Balbir Singh, dmitry.adamushko, Srivatsa Vaddagiri,
Steven Rostedt, Gregory Haskins, Thomas Gleixner
* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Sun, 2008-01-06 at 17:11 +0100, Peter Zijlstra wrote:
> > plain text document attachment (hrtimer-fallback.patch)
>
> I know I should have boot tested more combinations :-(
> Please fold this into the patch.
done.
Ingo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 12/11] sched: rt-group: uid-group interface
2008-01-08 11:02 ` Peter Zijlstra
@ 2008-01-08 14:31 ` Kay Sievers
2008-01-08 23:35 ` Peter Zijlstra
0 siblings, 1 reply; 31+ messages in thread
From: Kay Sievers @ 2008-01-08 14:31 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Dhaval Giani, vatsa, LKML, Ingo Molnar, Balbir Singh,
dmitry.adamushko, Steven Rostedt, Gregory Haskins,
Thomas Gleixner
On Jan 8, 2008 12:02 PM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>
> On Tue, 2008-01-08 at 16:27 +0530, Dhaval Giani wrote:
> > On Mon, Jan 07, 2008 at 05:57:42PM +0100, Peter Zijlstra wrote:
> > >
> > > Subject: sched: rt-group: add uid-group interface
> > >
> > > Extend the /sys/kernel/uids/<uid>/ interface to allow setting
> > > the group's rt_period and rt_runtime.
> > >
> >
> > Hi Peter,
> >
> > Cool stuff! I will try out these patches and try to give you some
> > feedback.
>
> Thanks, much appreciated!
>
> > One request though, could you please add some documentation to
> > Documentation/ABI/testing/sysfs-kernel-uids?
>
> I already have documentation on the todo list, I'll add this file to
> that list :-)
Care to rebase the patch against -mm, we fixed the mixed-up usage
of ksets and kobjects, and this can not apply anymore:
http://git.kernel.org/?p=linux/kernel/git/gregkh/patches.git;a=blob;f=driver/struct-user_info-sysfs.patch;hb=HEAD
There is also an attribute group now which makes it much easier to add
new files.
Thanks,
Kay
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 12/11] sched: rt-group: uid-group interface
2008-01-08 10:57 ` Dhaval Giani
2008-01-08 11:02 ` Peter Zijlstra
@ 2008-01-08 23:26 ` Peter Zijlstra
1 sibling, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-08 23:26 UTC (permalink / raw)
To: Dhaval Giani
Cc: vatsa, LKML, Ingo Molnar, Balbir Singh, dmitry.adamushko,
Steven Rostedt, Gregory Haskins, Thomas Gleixner
On Tue, 2008-01-08 at 16:27 +0530, Dhaval Giani wrote:
> On Mon, Jan 07, 2008 at 05:57:42PM +0100, Peter Zijlstra wrote:
> >
> > Subject: sched: rt-group: add uid-group interface
> >
> > Extend the /sys/kernel/uids/<uid>/ interface to allow setting
> > the group's rt_period and rt_runtime.
> >
>
> Hi Peter,
>
> Cool stuff! I will try out these patches and try to give you some
> feedback.
>
> One request though, could you please add some documentation to
> Documentation/ABI/testing/sysfs-kernel-uids?
compile tested only attempt at finalizing the interface
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -1519,8 +1519,6 @@ extern unsigned int sysctl_sched_child_r
extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
-extern unsigned int sysctl_sched_rt_period;
-extern unsigned int sysctl_sched_rt_runtime;
#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
extern unsigned int sysctl_sched_min_bal_int_shares;
extern unsigned int sysctl_sched_max_bal_int_shares;
@@ -1530,6 +1528,8 @@ int sched_nr_latency_handler(struct ctl_
struct file *file, void __user *buffer, size_t *length,
loff_t *ppos);
#endif
+extern unsigned int sysctl_sched_rt_period;
+extern int sysctl_sched_rt_runtime;
extern unsigned int sysctl_sched_compat_yield;
@@ -2017,8 +2017,8 @@ extern void sched_move_task(struct task_
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
extern unsigned long sched_group_shares(struct task_group *tg);
extern int sched_group_set_rt_runtime(struct task_group *tg,
- unsigned long rt_runtime_us);
-extern unsigned long sched_group_rt_runtime(struct task_group *tg);
+ long rt_runtime_us);
+extern long sched_group_rt_runtime(struct task_group *tg);
extern int sched_group_set_rt_period(struct task_group *tg,
unsigned long rt_runtime_us);
extern unsigned long sched_group_rt_period(struct task_group *tg);
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -649,13 +649,18 @@ const_debug unsigned int sysctl_sched_nr
* period over which we measure rt task cpu usage in us.
* default: 1s
*/
-const_debug unsigned int sysctl_sched_rt_period = 1000000;
+unsigned int sysctl_sched_rt_period = 1000000;
/*
* part of the period that we allow rt tasks to run in us.
* default: 0.95s
*/
-const_debug unsigned int sysctl_sched_rt_runtime = 950000;
+int sysctl_sched_rt_runtime = 950000;
+
+/*
+ * single value that denotes runtime == period, ie unlimited time.
+ */
+#define RUNTIME_INF ((u64)~0ULL)
/*
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
@@ -7751,7 +7756,7 @@ struct task_group *sched_create_group(vo
goto err;
tg->shares = NICE_0_LOAD;
- tg->rt_runtime = 0; /* XXX */
+ tg->rt_runtime = 0;
tg->rt_period = ns_to_ktime(sysctl_sched_rt_period * NSEC_PER_USEC);
for_each_possible_cpu(i) {
@@ -7956,9 +7961,12 @@ static DEFINE_MUTEX(rt_constraints_mutex
static unsigned long to_ratio(u64 period, u64 runtime)
{
- u64 r = runtime * (1ULL << 16);
- do_div(r, period);
- return r;
+ if (runtime == RUNTIME_INF)
+ return 1ULL << 16;
+
+ runtime *= (1ULL << 16);
+ do_div(runtime, period);
+ return runtime;
}
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
@@ -7980,12 +7988,15 @@ static int __rt_schedulable(struct task_
return total + to_ratio(period, runtime) < global_ratio;
}
-int sched_group_set_rt_runtime(struct task_group *tg,
- unsigned long rt_runtime_us)
+int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
{
- u64 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
+ u64 rt_runtime;
int err = 0;
+ rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
+ if (rt_runtime_us == -1)
+ rt_runtime = RUNTIME_INF;
+
mutex_lock(&rt_constraints_mutex);
if (!__rt_schedulable(tg, ktime_to_ns(tg->rt_period), rt_runtime)) {
err = -EINVAL;
@@ -7999,10 +8010,14 @@ int sched_group_set_rt_runtime(struct ta
return err;
}
-unsigned long sched_group_rt_runtime(struct task_group *tg)
+long sched_group_rt_runtime(struct task_group *tg)
{
- u64 rt_runtime_us = tg->rt_runtime;
+ u64 rt_runtime_us;
+ if (tg->rt_runtime == RUNTIME_INF)
+ return -1;
+
+ rt_runtime_us = tg->rt_runtime;
do_div(rt_runtime_us, NSEC_PER_USEC);
return rt_runtime_us;
}
@@ -8108,15 +8123,49 @@ static u64 cpu_shares_read_uint(struct c
return (u64) tg->shares;
}
-static int cpu_rt_runtime_write_uint(struct cgroup *cgrp, struct cftype *cftype,
- u64 rt_runtime_val)
-{
- return sched_group_set_rt_runtime(cgroup_tg(cgrp), rt_runtime_val);
+static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file,
+ const char __user *userbuf,
+ size_t nbytes, loff_t *unused_ppos)
+{
+ char buffer[64];
+ int retval = 0;
+ s64 val;
+ char *end;
+
+ if (!nbytes)
+ return -EINVAL;
+ if (nbytes >= sizeof(buffer))
+ return -E2BIG;
+ if (copy_from_user(buffer, userbuf, nbytes))
+ return -EFAULT;
+
+ buffer[nbytes] = 0; /* nul-terminate */
+
+ /* strip newline if necessary */
+ if (nbytes && (buffer[nbytes-1] == '\n'))
+ buffer[nbytes-1] = 0;
+ val = simple_strtoll(buffer, &end, 0);
+ if (*end)
+ return -EINVAL;
+
+ /* Pass to subsystem */
+ retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
+ if (!retval)
+ retval = nbytes;
+ return retval;
}
-static u64 cpu_rt_runtime_read_uint(struct cgroup *cgrp, struct cftype *cft)
-{
- return sched_group_rt_runtime(cgroup_tg(cgrp));
+static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file,
+ char __user *buf, size_t nbytes,
+ loff_t *ppos)
+{
+ char tmp[64];
+ long val = sched_group_rt_runtime(cgroup_tg(cgrp));
+ int len = sprintf(tmp, "%ld\n", val);
+
+ return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype,
@@ -8138,8 +8187,8 @@ static struct cftype cpu_files[] = {
},
{
.name = "rt_runtime_us",
- .read_uint = cpu_rt_runtime_read_uint,
- .write_uint = cpu_rt_runtime_write_uint,
+ .read = cpu_rt_runtime_read,
+ .write = cpu_rt_runtime_write,
},
{
.name = "rt_period_us",
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -60,7 +60,7 @@ static inline int on_rt_rq(struct sched_
static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
{
if (!rt_rq->tg)
- return 0;
+ return RUNTIME_INF;
return rt_rq->tg->rt_runtime;
}
@@ -220,6 +220,9 @@ static struct sched_rt_entity *next_rt_d
static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
{
+ if (sysctl_sched_rt_runtime == -1)
+ return RUNTIME_INF;
+
return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
}
@@ -304,7 +307,7 @@ static int sched_rt_runtime_exceeded(str
{
u64 runtime = sched_rt_runtime(rt_rq);
- if (!runtime)
+ if (runtime == RUNTIME_INF)
goto out;
if (rt_rq->rt_throttled)
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c
+++ linux-2.6/kernel/sysctl.c
@@ -309,22 +309,6 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
- {
- .ctl_name = CTL_UNNUMBERED,
- .procname = "sched_rt_period_us",
- .data = &sysctl_sched_rt_period,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = CTL_UNNUMBERED,
- .procname = "sched_rt_runtime_us",
- .data = &sysctl_sched_rt_runtime,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
{
.ctl_name = CTL_UNNUMBERED,
@@ -346,6 +330,22 @@ static struct ctl_table kern_table[] = {
#endif
{
.ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_rt_period_us",
+ .data = &sysctl_sched_rt_period,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_rt_runtime_us",
+ .data = &sysctl_sched_rt_runtime,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
.procname = "sched_compat_yield",
.data = &sysctl_sched_compat_yield,
.maxlen = sizeof(unsigned int),
Index: linux-2.6/kernel/user.c
===================================================================
--- linux-2.6.orig/kernel/user.c
+++ linux-2.6/kernel/user.c
@@ -175,17 +175,17 @@ static ssize_t cpu_rt_runtime_show(struc
{
struct user_struct *up = container_of(kset, struct user_struct, kset);
- return sprintf(buffer, "%lu\n", sched_group_rt_runtime(up->tg));
+ return sprintf(buffer, "%ld\n", sched_group_rt_runtime(up->tg));
}
static ssize_t cpu_rt_runtime_store(struct kset *kset, const char *buffer,
size_t size)
{
struct user_struct *up = container_of(kset, struct user_struct, kset);
- unsigned long rt_runtime_us;
+ long rt_runtime_us;
int rc;
- sscanf(buffer, "%lu", &rt_runtime_us);
+ sscanf(buffer, "%ld", &rt_runtime_us);
rc = sched_group_set_rt_runtime(up->tg, rt_runtime_us);
return (rc ?: size);
Index: linux-2.6/Documentation/ABI/testing/sysfs-kernel-uids
===================================================================
--- linux-2.6.orig/Documentation/ABI/testing/sysfs-kernel-uids
+++ linux-2.6/Documentation/ABI/testing/sysfs-kernel-uids
@@ -12,3 +12,14 @@ Description:
B has shares = 2048, User B will get twice the CPU
bandwidth user A will. For more details refer
Documentation/sched-design-CFS.txt
+
+What: /sys/kernel/uids/<uid>/cpu_rt_period_us
+Date: January 2008
+Contact: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Description: See Documentation/sched-rt-group.txt
+
+What: /sys/kernel/uids/<uid>/cpu_rt_runtime_us
+Date: January 2008
+Contact: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Description: See Documentation/sched-rt-group.txt
+
Index: linux-2.6/Documentation/sched-rt-group.txt
===================================================================
--- /dev/null
+++ linux-2.6/Documentation/sched-rt-group.txt
@@ -0,0 +1,69 @@
+
+
+Real-Time group scheduling.
+
+The problem space:
+
+In order to schedule multiple groups of realtime tasks each group must
+be assigned a fixed portion of the cpu time available. Without a minimum
+guarantee a realtime group can obviously fall short. A fuzzy upper limit
+is of no use since it cannot be relied upon. Which leaves us with just
+the single fixed portion.
+
+CPU time is divided by means of specifying how much time can be spend
+running in a given period. Say a frame fixed realtime renderer must
+deliver a 25 frames a second, which yields a period of 0.04s. Now say
+it will also have to play some music and respond to input, leaving it
+with around 80% for the graphics. We can then give this group a runtime
+of 0.8 * 0.04s = 0.032s.
+
+This way the graphics group will have a 0.04s period with a 0.032s runtime
+limit.
+
+Now if the audio thread needs to refill the dma buffer every 0.005s, but
+needs only about 3% cpu time to do so, it will can do with a 0.03 * 0.005s
+= 0.00015s.
+
+If it so happens that the graphics group runs at a higher priority than
+the audio group is might be that the audio group will not get CPU time
+in time to meet its deadline. Whereas the graphics group will still easily
+make its deadline if it were delayed for the amount of time the audio
+group needs.
+
+This problem is solved using Earliest Deadline First (EDF) scheduling of the
+realtime groups.
+
+The Interface:
+
+system wide:
+
+/proc/sys/kernel/sched_rt_period_us
+/proc/sys/kernel/sched_rt_runtime_us
+
+CONFIG_FAIR_USER_SCHED
+
+/sys/kernel/uids/<uid>/cpu_rt_period_us
+/sys/kernel/uids/<uid>/cpu_rt_runtime_us
+
+or
+
+CONFIG_FAIR_CGROUP_SCHED
+
+/cgroup/<cgroup>/cpu.rt_period_us
+/cgroup/<cgroup>/cpu.rt_runtime_us
+
+[ time is specified in us because the interface is s32, this gives an
+ operating range of ~35m to 1us ]
+
+The period takes values in [ 1, INT_MAX ], runtime in [ -1, INT_MAX - 1 ].
+
+A runtime of -1 specifies runtime == period, ie. no limit.
+
+New groups get the period from /proc/sys/kernel/sched_rt_period_us and
+a runtime of 0.
+
+Settings are constrainted to:
+
+ \Sum_{i} runtime_{i} / period_{i} <= global_runtime / global_period
+
+in order to keep the configuration schedulable.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 12/11] sched: rt-group: uid-group interface
2008-01-08 14:31 ` Kay Sievers
@ 2008-01-08 23:35 ` Peter Zijlstra
2008-01-08 23:58 ` Greg KH
0 siblings, 1 reply; 31+ messages in thread
From: Peter Zijlstra @ 2008-01-08 23:35 UTC (permalink / raw)
To: Kay Sievers
Cc: Dhaval Giani, vatsa, LKML, Ingo Molnar, Balbir Singh,
dmitry.adamushko, Steven Rostedt, Gregory Haskins,
Thomas Gleixner, Greg Kroah-Hartman, Andrew Morton
On Tue, 2008-01-08 at 15:31 +0100, Kay Sievers wrote:
> On Jan 8, 2008 12:02 PM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> >
> > On Tue, 2008-01-08 at 16:27 +0530, Dhaval Giani wrote:
> > > On Mon, Jan 07, 2008 at 05:57:42PM +0100, Peter Zijlstra wrote:
> > > >
> > > > Subject: sched: rt-group: add uid-group interface
> > > >
> > > > Extend the /sys/kernel/uids/<uid>/ interface to allow setting
> > > > the group's rt_period and rt_runtime.
> > > >
> > >
> > > Hi Peter,
> > >
> > > Cool stuff! I will try out these patches and try to give you some
> > > feedback.
> >
> > Thanks, much appreciated!
> >
> > > One request though, could you please add some documentation to
> > > Documentation/ABI/testing/sysfs-kernel-uids?
> >
> > I already have documentation on the todo list, I'll add this file to
> > that list :-)
>
> Care to rebase the patch against -mm, we fixed the mixed-up usage
> of ksets and kobjects, and this can not apply anymore:
> http://git.kernel.org/?p=linux/kernel/git/gregkh/patches.git;a=blob;f=driver/struct-user_info-sysfs.patch;hb=HEAD
>
> There is also an attribute group now which makes it much easier to add
> new files.
Ingo, Greg,
What would be the easiest way to carry this forward? sched-devel and
greg's tree would intersect at this point and leave poor akpm with the
resulting mess. Should I just make an incremental patch akpm can carry
and push? Or can we base one tree off the other?
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 12/11] sched: rt-group: uid-group interface
2008-01-08 23:58 ` Greg KH
@ 2008-01-08 23:57 ` Ingo Molnar
2008-01-10 0:05 ` Greg KH
0 siblings, 1 reply; 31+ messages in thread
From: Ingo Molnar @ 2008-01-08 23:57 UTC (permalink / raw)
To: Greg KH
Cc: Peter Zijlstra, Kay Sievers, Dhaval Giani, vatsa, LKML,
Balbir Singh, dmitry.adamushko, Steven Rostedt, Gregory Haskins,
Thomas Gleixner, Andrew Morton
* Greg KH <gregkh@suse.de> wrote:
> On Wed, Jan 09, 2008 at 12:35:32AM +0100, Peter Zijlstra wrote:
> >
> > On Tue, 2008-01-08 at 15:31 +0100, Kay Sievers wrote:
> > > On Jan 8, 2008 12:02 PM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > > >
> > > > On Tue, 2008-01-08 at 16:27 +0530, Dhaval Giani wrote:
> > > > > On Mon, Jan 07, 2008 at 05:57:42PM +0100, Peter Zijlstra wrote:
> > > > > >
> > > > > > Subject: sched: rt-group: add uid-group interface
> > > > > >
> > > > > > Extend the /sys/kernel/uids/<uid>/ interface to allow setting
> > > > > > the group's rt_period and rt_runtime.
> > > > > >
> > > > >
> > > > > Hi Peter,
> > > > >
> > > > > Cool stuff! I will try out these patches and try to give you some
> > > > > feedback.
> > > >
> > > > Thanks, much appreciated!
> > > >
> > > > > One request though, could you please add some documentation to
> > > > > Documentation/ABI/testing/sysfs-kernel-uids?
> > > >
> > > > I already have documentation on the todo list, I'll add this file to
> > > > that list :-)
> > >
> > > Care to rebase the patch against -mm, we fixed the mixed-up usage
> > > of ksets and kobjects, and this can not apply anymore:
> > > http://git.kernel.org/?p=linux/kernel/git/gregkh/patches.git;a=blob;f=driver/struct-user_info-sysfs.patch;hb=HEAD
> > >
> > > There is also an attribute group now which makes it much easier to add
> > > new files.
> >
> > Ingo, Greg,
> >
> > What would be the easiest way to carry this forward? sched-devel and
> > greg's tree would intersect at this point and leave poor akpm with the
> > resulting mess. Should I just make an incremental patch akpm can carry
> > and push? Or can we base one tree off the other?
>
> If it's just a single patch for this, I'd be glad to take it. But by
> looking at the [11/12] above, I doubt this is so...
>
> If it's not that rough (12 patches is not a big deal), I'd be glad to
> take these through my tree, after you fix up Kay's requests above :)
hm, i'd really like to see this tested and go through sched.git. It's
only the few sysfs bits which interfere, right?
Ingo
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 12/11] sched: rt-group: uid-group interface
2008-01-08 23:35 ` Peter Zijlstra
@ 2008-01-08 23:58 ` Greg KH
2008-01-08 23:57 ` Ingo Molnar
0 siblings, 1 reply; 31+ messages in thread
From: Greg KH @ 2008-01-08 23:58 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Kay Sievers, Dhaval Giani, vatsa, LKML, Ingo Molnar,
Balbir Singh, dmitry.adamushko, Steven Rostedt, Gregory Haskins,
Thomas Gleixner, Andrew Morton
On Wed, Jan 09, 2008 at 12:35:32AM +0100, Peter Zijlstra wrote:
>
> On Tue, 2008-01-08 at 15:31 +0100, Kay Sievers wrote:
> > On Jan 8, 2008 12:02 PM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > >
> > > On Tue, 2008-01-08 at 16:27 +0530, Dhaval Giani wrote:
> > > > On Mon, Jan 07, 2008 at 05:57:42PM +0100, Peter Zijlstra wrote:
> > > > >
> > > > > Subject: sched: rt-group: add uid-group interface
> > > > >
> > > > > Extend the /sys/kernel/uids/<uid>/ interface to allow setting
> > > > > the group's rt_period and rt_runtime.
> > > > >
> > > >
> > > > Hi Peter,
> > > >
> > > > Cool stuff! I will try out these patches and try to give you some
> > > > feedback.
> > >
> > > Thanks, much appreciated!
> > >
> > > > One request though, could you please add some documentation to
> > > > Documentation/ABI/testing/sysfs-kernel-uids?
> > >
> > > I already have documentation on the todo list, I'll add this file to
> > > that list :-)
> >
> > Care to rebase the patch against -mm, we fixed the mixed-up usage
> > of ksets and kobjects, and this can not apply anymore:
> > http://git.kernel.org/?p=linux/kernel/git/gregkh/patches.git;a=blob;f=driver/struct-user_info-sysfs.patch;hb=HEAD
> >
> > There is also an attribute group now which makes it much easier to add
> > new files.
>
> Ingo, Greg,
>
> What would be the easiest way to carry this forward? sched-devel and
> greg's tree would intersect at this point and leave poor akpm with the
> resulting mess. Should I just make an incremental patch akpm can carry
> and push? Or can we base one tree off the other?
If it's just a single patch for this, I'd be glad to take it. But by
looking at the [11/12] above, I doubt this is so...
If it's not that rough (12 patches is not a big deal), I'd be glad to
take these through my tree, after you fix up Kay's requests above :)
thanks,
greg k-h
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 12/11] sched: rt-group: uid-group interface
2008-01-08 23:57 ` Ingo Molnar
@ 2008-01-10 0:05 ` Greg KH
2008-02-07 4:17 ` Dhaval Giani
0 siblings, 1 reply; 31+ messages in thread
From: Greg KH @ 2008-01-10 0:05 UTC (permalink / raw)
To: Ingo Molnar
Cc: Peter Zijlstra, Kay Sievers, Dhaval Giani, vatsa, LKML,
Balbir Singh, dmitry.adamushko, Steven Rostedt, Gregory Haskins,
Thomas Gleixner, Andrew Morton
On Wed, Jan 09, 2008 at 12:57:50AM +0100, Ingo Molnar wrote:
>
> * Greg KH <gregkh@suse.de> wrote:
>
> > On Wed, Jan 09, 2008 at 12:35:32AM +0100, Peter Zijlstra wrote:
> > >
> > > On Tue, 2008-01-08 at 15:31 +0100, Kay Sievers wrote:
> > > > On Jan 8, 2008 12:02 PM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > > > >
> > > > > On Tue, 2008-01-08 at 16:27 +0530, Dhaval Giani wrote:
> > > > > > On Mon, Jan 07, 2008 at 05:57:42PM +0100, Peter Zijlstra wrote:
> > > > > > >
> > > > > > > Subject: sched: rt-group: add uid-group interface
> > > > > > >
> > > > > > > Extend the /sys/kernel/uids/<uid>/ interface to allow setting
> > > > > > > the group's rt_period and rt_runtime.
> > > > > > >
> > > > > >
> > > > > > Hi Peter,
> > > > > >
> > > > > > Cool stuff! I will try out these patches and try to give you some
> > > > > > feedback.
> > > > >
> > > > > Thanks, much appreciated!
> > > > >
> > > > > > One request though, could you please add some documentation to
> > > > > > Documentation/ABI/testing/sysfs-kernel-uids?
> > > > >
> > > > > I already have documentation on the todo list, I'll add this file to
> > > > > that list :-)
> > > >
> > > > Care to rebase the patch against -mm, we fixed the mixed-up usage
> > > > of ksets and kobjects, and this can not apply anymore:
> > > > http://git.kernel.org/?p=linux/kernel/git/gregkh/patches.git;a=blob;f=driver/struct-user_info-sysfs.patch;hb=HEAD
> > > >
> > > > There is also an attribute group now which makes it much easier to add
> > > > new files.
> > >
> > > Ingo, Greg,
> > >
> > > What would be the easiest way to carry this forward? sched-devel and
> > > greg's tree would intersect at this point and leave poor akpm with the
> > > resulting mess. Should I just make an incremental patch akpm can carry
> > > and push? Or can we base one tree off the other?
> >
> > If it's just a single patch for this, I'd be glad to take it. But by
> > looking at the [11/12] above, I doubt this is so...
> >
> > If it's not that rough (12 patches is not a big deal), I'd be glad to
> > take these through my tree, after you fix up Kay's requests above :)
>
> hm, i'd really like to see this tested and go through sched.git. It's
> only the few sysfs bits which interfere, right?
Yes, that should be it.
So why not put the majority of this through sched.git, then when my
sysfs changes go in at the beginning of the .25 merge cycle, you can
then add the sysfs changes through your tree or anywhere else.
Unless you are relying on the sysfs changes for this whole feature, and
without them it just doesn't make any sense at all?
thanks,
greg k-h
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 12/11] sched: rt-group: uid-group interface
2008-01-10 0:05 ` Greg KH
@ 2008-02-07 4:17 ` Dhaval Giani
2008-02-07 5:42 ` Greg KH
0 siblings, 1 reply; 31+ messages in thread
From: Dhaval Giani @ 2008-02-07 4:17 UTC (permalink / raw)
To: Greg KH
Cc: Ingo Molnar, Peter Zijlstra, Kay Sievers, vatsa, LKML,
Balbir Singh, dmitry.adamushko, Steven Rostedt, Gregory Haskins,
Thomas Gleixner, Andrew Morton
On Wed, Jan 09, 2008 at 04:05:31PM -0800, Greg KH wrote:
> > > > Ingo, Greg,
> > > >
> > > > What would be the easiest way to carry this forward? sched-devel and
> > > > greg's tree would intersect at this point and leave poor akpm with the
> > > > resulting mess. Should I just make an incremental patch akpm can carry
> > > > and push? Or can we base one tree off the other?
> > >
> > > If it's just a single patch for this, I'd be glad to take it. But by
> > > looking at the [11/12] above, I doubt this is so...
> > >
> > > If it's not that rough (12 patches is not a big deal), I'd be glad to
> > > take these through my tree, after you fix up Kay's requests above :)
> >
> > hm, i'd really like to see this tested and go through sched.git. It's
> > only the few sysfs bits which interfere, right?
>
> Yes, that should be it.
>
> So why not put the majority of this through sched.git, then when my
> sysfs changes go in at the beginning of the .25 merge cycle, you can
> then add the sysfs changes through your tree or anywhere else.
>
Hi,
I was wondering where these changes are right now. I don't see the sysfs
interface for rt-group-sched in mainline right now.
Thanks,
--
regards,
Dhaval
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 12/11] sched: rt-group: uid-group interface
2008-02-07 4:17 ` Dhaval Giani
@ 2008-02-07 5:42 ` Greg KH
0 siblings, 0 replies; 31+ messages in thread
From: Greg KH @ 2008-02-07 5:42 UTC (permalink / raw)
To: Dhaval Giani
Cc: Ingo Molnar, Peter Zijlstra, Kay Sievers, vatsa, LKML,
Balbir Singh, dmitry.adamushko, Steven Rostedt, Gregory Haskins,
Thomas Gleixner, Andrew Morton
On Thu, Feb 07, 2008 at 09:47:22AM +0530, Dhaval Giani wrote:
> On Wed, Jan 09, 2008 at 04:05:31PM -0800, Greg KH wrote:
> > > > > Ingo, Greg,
> > > > >
> > > > > What would be the easiest way to carry this forward? sched-devel and
> > > > > greg's tree would intersect at this point and leave poor akpm with the
> > > > > resulting mess. Should I just make an incremental patch akpm can carry
> > > > > and push? Or can we base one tree off the other?
> > > >
> > > > If it's just a single patch for this, I'd be glad to take it. But by
> > > > looking at the [11/12] above, I doubt this is so...
> > > >
> > > > If it's not that rough (12 patches is not a big deal), I'd be glad to
> > > > take these through my tree, after you fix up Kay's requests above :)
> > >
> > > hm, i'd really like to see this tested and go through sched.git. It's
> > > only the few sysfs bits which interfere, right?
> >
> > Yes, that should be it.
> >
> > So why not put the majority of this through sched.git, then when my
> > sysfs changes go in at the beginning of the .25 merge cycle, you can
> > then add the sysfs changes through your tree or anywhere else.
> >
>
> Hi,
>
> I was wondering where these changes are right now. I don't see the sysfs
> interface for rt-group-sched in mainline right now.
All of the sysfs changes I had are in Linus's tree, so you don't need me
anymore :)
thanks,
greg k-h
^ permalink raw reply [flat|nested] 31+ messages in thread
end of thread, other threads:[~2008-02-07 5:43 UTC | newest]
Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-01-06 16:11 [PATCH 00/11] another rt group sched update Peter Zijlstra
2008-01-06 16:11 ` [PATCH 01/11] sched: rt throttling vs no_hz Peter Zijlstra
2008-01-06 16:11 ` [PATCH 02/11] sched: load_balance_monitor rename Peter Zijlstra
2008-01-06 16:11 ` [PATCH 03/11] hrtimer: clean up cpu->base locking tricks Peter Zijlstra
2008-01-06 16:11 ` [PATCH 04/11] hrtimer: fixup the HRTIMER_CB_IRQSAFE_NO_SOFTIRQ fallback Peter Zijlstra
2008-01-07 11:56 ` Peter Zijlstra
2008-01-08 11:16 ` Ingo Molnar
2008-01-06 16:11 ` [PATCH 05/11] hrtimer: unlock hrtimer_wakeup Peter Zijlstra
2008-01-06 16:11 ` [PATCH 06/11] sched: rt-group: reduce rescheduling Peter Zijlstra
2008-01-06 16:11 ` [PATCH 07/11] sched: rt-group: per group period Peter Zijlstra
2008-01-06 16:11 ` [PATCH 08/11] sched: rt-group: deal with PI Peter Zijlstra
2008-01-06 16:11 ` [PATCH 09/11] sched: rt-group: dynamic period ticks Peter Zijlstra
2008-01-06 16:11 ` [PATCH 10/11] sched: rt-group: EDF Peter Zijlstra
2008-01-06 16:11 ` [PATCH 11/11] sched: rt-group: interface Peter Zijlstra
2008-01-07 10:51 ` [PATCH 00/11] another rt group sched update Peter Zijlstra
2008-01-07 11:24 ` Peter Zijlstra
2008-01-07 12:23 ` Srivatsa Vaddagiri
2008-01-07 12:12 ` Peter Zijlstra
2008-01-07 16:57 ` [PATCH 12/11] sched: rt-group: uid-group interface Peter Zijlstra
2008-01-08 10:33 ` Ingo Molnar
2008-01-08 10:57 ` Dhaval Giani
2008-01-08 11:02 ` Peter Zijlstra
2008-01-08 14:31 ` Kay Sievers
2008-01-08 23:35 ` Peter Zijlstra
2008-01-08 23:58 ` Greg KH
2008-01-08 23:57 ` Ingo Molnar
2008-01-10 0:05 ` Greg KH
2008-02-07 4:17 ` Dhaval Giani
2008-02-07 5:42 ` Greg KH
2008-01-08 23:26 ` Peter Zijlstra
2008-01-07 11:17 ` [PATCH 00/11] another rt group sched update Ingo Molnar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).