LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [RFC PATCH -rt 0/2] RCU priority boosting that survives vicious testing
@ 2007-02-01  1:21 Paul E. McKenney
  2007-02-01  1:24 ` [RFC PATCH -rt 1/2] " Paul E. McKenney
  2007-02-01  1:26 ` [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture Paul E. McKenney
  0 siblings, 2 replies; 18+ messages in thread
From: Paul E. McKenney @ 2007-02-01  1:21 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, tglx, dipankar, tytso, dvhltc, oleg, twoerner.k, josh,
	billh, nielsen.esben, corbet

Hello!

This is yet another update of the RCU priority-boosting patch
(see http://lkml.org/lkml/2007/1/24/294 for the previous version).
This series contains the RCU-boost patch itself and rcutorture
modifications to test it more thoroughly.  This version incorporates
feedback from Josh Triplett and applies cleanly to 2.6.20-rc6-rt6.

						Thanx, Paul

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [RFC PATCH -rt 1/2] RCU priority boosting that survives vicious testing
  2007-02-01  1:21 [RFC PATCH -rt 0/2] RCU priority boosting that survives vicious testing Paul E. McKenney
@ 2007-02-01  1:24 ` Paul E. McKenney
  2007-02-01  8:23   ` Ingo Molnar
  2007-02-01  1:26 ` [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture Paul E. McKenney
  1 sibling, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2007-02-01  1:24 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, tglx, dipankar, tytso, dvhltc, oleg, twoerner.k, josh,
	billh, nielsen.esben, corbet

Here is the RCU priority-boosting patch.  Pretty close to the
http://lkml.org/lkml/2007/1/24/295 version.  This patch prevents
preempted or blocked low-priority RCU readers from indefinitely
stalling RCU grace periods.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---

 include/linux/init_task.h  |   12 +
 include/linux/rcupdate.h   |   12 +
 include/linux/rcupreempt.h |   20 +
 include/linux/sched.h      |   16 +
 init/main.c                |    1 
 kernel/Kconfig.preempt     |   32 ++
 kernel/fork.c              |    6 
 kernel/rcupreempt.c        |  528 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/rtmutex.c           |    7 
 kernel/sched.c             |    5 
 10 files changed, 636 insertions(+), 3 deletions(-)

diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/include/linux/init_task.h linux-2.6.20-rc4-rt1-rcub/include/linux/init_task.h
--- linux-2.6.20-rc4-rt1/include/linux/init_task.h	2007-01-09 10:59:54.000000000 -0800
+++ linux-2.6.20-rc4-rt1-rcub/include/linux/init_task.h	2007-01-09 11:01:12.000000000 -0800
@@ -87,6 +87,17 @@ extern struct nsproxy init_nsproxy;
 	.siglock	= __SPIN_LOCK_UNLOCKED(sighand.siglock),	\
 }
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+#define INIT_RCU_BOOST_PRIO .rcu_prio	= MAX_PRIO,
+#define INIT_PREEMPT_RCU_BOOST(tsk)					\
+	.rcub_rbdp	= NULL,						\
+	.rcub_state	= RCU_BOOST_IDLE,				\
+	.rcub_entry	= LIST_HEAD_INIT(tsk.rcub_entry),
+#else /* #ifdef CONFIG_PREEMPT_RCU_BOOST */
+#define INIT_RCU_BOOST_PRIO
+#define INIT_PREEMPT_RCU_BOOST(tsk)
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU_BOOST */
+
 extern struct group_info init_groups;
 
 /*
@@ -143,6 +154,7 @@ extern struct group_info init_groups;
 	.pi_lock	= RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),		\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
+	INIT_PREEMPT_RCU_BOOST(tsk)					\
 }
 
 
diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/include/linux/rcupdate.h linux-2.6.20-rc4-rt1-rcub/include/linux/rcupdate.h
--- linux-2.6.20-rc4-rt1/include/linux/rcupdate.h	2007-01-09 10:59:54.000000000 -0800
+++ linux-2.6.20-rc4-rt1-rcub/include/linux/rcupdate.h	2007-01-09 11:01:12.000000000 -0800
@@ -227,6 +227,18 @@ extern void rcu_barrier(void);
 extern void rcu_init(void);
 extern void rcu_advance_callbacks(int cpu, int user);
 extern void rcu_check_callbacks(int cpu, int user);
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+extern void init_rcu_boost_late(void);
+extern void __rcu_preempt_boost(void);
+#define rcu_preempt_boost() \
+	do { \
+		if (unlikely(current->rcu_read_lock_nesting > 0)) \
+			__rcu_preempt_boost(); \
+	} while (0)
+#else /* #ifdef CONFIG_PREEMPT_RCU_BOOST */
+#define init_rcu_boost_late()
+#define rcu_preempt_boost()
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU_BOOST */
 
 #endif /* __KERNEL__ */
 #endif /* __LINUX_RCUPDATE_H */
diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/include/linux/rcupreempt.h linux-2.6.20-rc4-rt1-rcub/include/linux/rcupreempt.h
--- linux-2.6.20-rc4-rt1/include/linux/rcupreempt.h	2007-01-09 10:59:54.000000000 -0800
+++ linux-2.6.20-rc4-rt1-rcub/include/linux/rcupreempt.h	2007-01-25 16:30:46.000000000 -0800
@@ -42,6 +42,26 @@
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+/*
+ * Task state with respect to being RCU-boosted.  This state is changed
+ * by the task itself in response to the following three events:
+ * 1. Preemption (or block on lock) while in RCU read-side critical section.
+ * 2. Outermost rcu_read_unlock() for blocked RCU read-side critical section.
+ *
+ * The RCU-boost task also updates the state when boosting priority.
+ */
+enum rcu_boost_state {
+	RCU_BOOST_IDLE = 0,	   /* Not yet blocked if in RCU read-side. */
+	RCU_BOOST_BLOCKED = 1,	   /* Blocked from RCU read-side. */
+	RCU_BOOSTED = 2,	   /* Boosting complete. */
+	RCU_BOOST_INVALID = 3,	   /* For bogus state sightings. */
+};
+
+#define N_RCU_BOOST_STATE (RCU_BOOST_INVALID + 1)
+
+#endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST */
+
 #define rcu_qsctr_inc(cpu)
 #define rcu_bh_qsctr_inc(cpu)
 #define call_rcu_bh(head, rcu) call_rcu(head, rcu)
diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/include/linux/sched.h linux-2.6.20-rc4-rt1-rcub/include/linux/sched.h
--- linux-2.6.20-rc4-rt1/include/linux/sched.h	2007-01-09 10:59:54.000000000 -0800
+++ linux-2.6.20-rc4-rt1-rcub/include/linux/sched.h	2007-01-09 11:01:12.000000000 -0800
@@ -699,6 +699,14 @@ struct signal_struct {
 #define is_rt_policy(p)		((p) != SCHED_NORMAL && (p) != SCHED_BATCH)
 #define has_rt_policy(p)	unlikely(is_rt_policy((p)->policy))
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+#define set_rcu_prio(p, prio)  ((p)->rcu_prio = (prio))
+#define get_rcu_prio(p) ((p)->rcu_prio)
+#else /* #ifdef CONFIG_PREEMPT_RCU_BOOST */
+#define set_rcu_prio(p, prio)  do { } while (0)
+#define get_rcu_prio(p) MAX_PRIO
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU_BOOST */
+
 /*
  * Some day this will be a full-fledged user tracking system..
  */
@@ -982,6 +990,9 @@ struct task_struct {
 #endif
 	int load_weight;	/* for niceness load balancing purposes */
 	int prio, static_prio, normal_prio;
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+	int rcu_prio;
+#endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST */
 	struct list_head run_list;
 	struct prio_array *array;
 
@@ -1003,6 +1014,11 @@ struct task_struct {
         atomic_t *rcu_flipctr1;
         atomic_t *rcu_flipctr2;
 #endif
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+	struct rcu_boost_dat *rcub_rbdp;
+	enum rcu_boost_state rcub_state;
+	struct list_head rcub_entry;
+#endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST */
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	struct sched_info sched_info;
diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/init/main.c linux-2.6.20-rc4-rt1-rcub/init/main.c
--- linux-2.6.20-rc4-rt1/init/main.c	2007-01-09 10:59:54.000000000 -0800
+++ linux-2.6.20-rc4-rt1-rcub/init/main.c	2007-01-09 11:01:12.000000000 -0800
@@ -712,6 +712,7 @@ static void __init do_basic_setup(void)
 	init_workqueues();
 	usermodehelper_init();
 	driver_init();
+	init_rcu_boost_late();
 
 #ifdef CONFIG_SYSCTL
 	sysctl_init();
diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/kernel/Kconfig.preempt linux-2.6.20-rc4-rt1-rcub/kernel/Kconfig.preempt
--- linux-2.6.20-rc4-rt1/kernel/Kconfig.preempt	2007-01-09 10:59:54.000000000 -0800
+++ linux-2.6.20-rc4-rt1-rcub/kernel/Kconfig.preempt	2007-01-09 11:01:12.000000000 -0800
@@ -176,3 +176,35 @@ config RCU_TRACE
 
 	  Say Y here if you want to enable RCU tracing
 	  Say N if you are unsure.
+
+config PREEMPT_RCU_BOOST
+	bool "Enable priority boosting of RCU read-side critical sections"
+	depends on PREEMPT_RCU
+	default n
+	help
+	  This option permits priority boosting of RCU read-side critical
+	  sections that have been preempted in order to prevent indefinite
+	  delay of grace periods in face of runaway non-realtime processes.
+
+	  Say N if you are unsure.
+
+config PREEMPT_RCU_BOOST_STATS
+	bool "Enable RCU priority-boosting statistic printing"
+	depends on PREEMPT_RCU_BOOST
+	default n
+	help
+	  This option enables debug printk()s of RCU boost statistics,
+	  which are normally only used to debug RCU priority boost
+	  implementations.
+
+	  Say N if you are unsure.
+
+config PREEMPT_RCU_BOOST_STATS_INTERVAL
+	int "RCU priority-boosting statistic printing interval (seconds)"
+	depends on PREEMPT_RCU_BOOST_STATS
+	default 100
+	range 10 86400
+	help
+	  This option controls the timing of debug printk()s of RCU boost
+	  statistics, which are normally only used to debug RCU priority
+	  boost implementations.
diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/kernel/fork.c linux-2.6.20-rc4-rt1-rcub/kernel/fork.c
--- linux-2.6.20-rc4-rt1/kernel/fork.c	2007-01-09 10:59:54.000000000 -0800
+++ linux-2.6.20-rc4-rt1-rcub/kernel/fork.c	2007-01-09 11:01:12.000000000 -0800
@@ -1129,6 +1129,12 @@ static struct task_struct *copy_process(
 	p->softirq_context = 0;
 #endif
 	p->pagefault_disabled = 0;
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+	p->rcu_prio = MAX_PRIO;
+	p->rcub_rbdp = NULL;
+	p->rcub_state = RCU_BOOST_IDLE;
+	INIT_LIST_HEAD(&p->rcub_entry);
+#endif
 #ifdef CONFIG_LOCKDEP
 	p->lockdep_depth = 0; /* no locks held yet */
 	p->curr_chain_key = 0;
diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/kernel/rcupreempt.c linux-2.6.20-rc4-rt1-rcub/kernel/rcupreempt.c
--- linux-2.6.20-rc4-rt1/kernel/rcupreempt.c	2007-01-09 10:59:54.000000000 -0800
+++ linux-2.6.20-rc4-rt1-rcub/kernel/rcupreempt.c	2007-01-25 11:53:47.000000000 -0800
@@ -49,6 +49,7 @@
 #include <linux/byteorder/swabb.h>
 #include <linux/cpumask.h>
 #include <linux/rcupreempt_trace.h>
+#include <linux/kthread.h>
 
 /*
  * PREEMPT_RCU data structures.
@@ -80,6 +81,526 @@ static struct rcu_ctrlblk rcu_ctrlblk = 
 static DEFINE_PER_CPU(atomic_t [2], rcu_flipctr) =
 	{ ATOMIC_INIT(0), ATOMIC_INIT(0) };
 
+#ifndef CONFIG_PREEMPT_RCU_BOOST
+static inline void init_rcu_boost_early(void) { }
+static inline void rcu_read_unlock_unboost(void) { }
+#else /* #ifndef CONFIG_PREEMPT_RCU_BOOST */
+
+/* Defines possible event indices for ->rbs_stats[] (first index). */
+
+#define RCU_BOOST_DAT_BLOCK	0
+#define RCU_BOOST_DAT_BOOST	1
+#define RCU_BOOST_DAT_UNLOCK	2
+#define N_RCU_BOOST_DAT_EVENTS	3
+
+/* RCU-boost per-CPU array element. */
+
+struct rcu_boost_dat {
+	raw_spinlock_t rbs_mutex;
+	struct list_head rbs_toboost;
+	struct list_head rbs_boosted;
+	unsigned long rbs_blocked;
+	unsigned long rbs_boost_attempt;
+	unsigned long rbs_boost;
+	unsigned long rbs_unlock;
+	unsigned long rbs_unboosted;
+#ifdef CONFIG_PREEMPT_RCU_BOOST_STATS
+	unsigned long rbs_stats[N_RCU_BOOST_DAT_EVENTS][N_RCU_BOOST_STATE];
+#endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS */
+};
+#define RCU_BOOST_ELEMENTS 4
+
+int rcu_boost_idx = -1; /* invalid value in case someone uses RCU early. */
+DEFINE_PER_CPU(struct rcu_boost_dat, rcu_boost_dat[RCU_BOOST_ELEMENTS]);
+static struct task_struct *rcu_boost_task = NULL;
+
+#ifdef CONFIG_PREEMPT_RCU_BOOST_STATS
+
+/*
+ * Function to increment indicated ->rbs_stats[] element.
+ */
+static inline void rcu_boost_dat_stat(struct rcu_boost_dat *rbdp,
+				      int event,
+				      enum rcu_boost_state oldstate)
+{
+	if (oldstate >= RCU_BOOST_IDLE &&
+	    oldstate <= RCU_BOOSTED) {
+		rbdp->rbs_stats[event][oldstate]++;
+	} else {
+		rbdp->rbs_stats[event][RCU_BOOST_INVALID]++;
+	}
+}
+
+#define rcu_boost_dat_stat_block(rbdp, oldstate) \
+	rcu_boost_dat_stat(rbdp, RCU_BOOST_DAT_BLOCK, oldstate)
+#define rcu_boost_dat_stat_boost(rbdp, oldstate) \
+	rcu_boost_dat_stat(rbdp, RCU_BOOST_DAT_BOOST, oldstate)
+#define rcu_boost_dat_stat_unlock(rbdp, oldstate) \
+	rcu_boost_dat_stat(rbdp, RCU_BOOST_DAT_UNLOCK, oldstate)
+
+/*
+ * Prefix for kprint() strings for periodic statistics messages.
+ */
+static char *rcu_boost_state_event[] = {
+	"block:  ",
+	"boost:  ",
+	"unlock: ",
+};
+
+/*
+ * Indicators for numbers in kprint() strings.  "!" indicates a state-event
+ * pair that should not happen, while "?" indicates a state that should
+ * not happen.
+ */
+static char *rcu_boost_state_error[] = {
+	/*ibBe*/
+	 "   ?",  /* block */
+	 "!  ?",  /* boost */
+	 "?  ?",  /* unlock */
+};
+
+/*
+ * Print out RCU booster task statistics at the specified interval.
+ */
+static void rcu_boost_dat_stat_print(void)
+{
+	/* Three decimal digits per byte plus spacing per number and line. */
+	char buf[N_RCU_BOOST_STATE * (sizeof(long) * 3 + 2) + 2];
+	int cpu;
+	int event;
+	int i;
+	static time_t lastprint = 0;
+	struct rcu_boost_dat *rbdp;
+	int state;
+	struct rcu_boost_dat sum;
+
+	/* Wait a graceful interval between printk spamming. */
+
+	if (xtime.tv_sec - lastprint <
+	    CONFIG_PREEMPT_RCU_BOOST_STATS_INTERVAL)
+		return;
+
+	/* Sum up the state/event-independent counters. */
+
+	sum.rbs_blocked = 0;
+	sum.rbs_boost_attempt = 0;
+	sum.rbs_boost = 0;
+	sum.rbs_unlock = 0;
+	sum.rbs_unboosted = 0;
+	for_each_possible_cpu(cpu)
+		for (i = 0; i < RCU_BOOST_ELEMENTS; i++) {
+			rbdp = per_cpu(rcu_boost_dat, cpu);
+			sum.rbs_blocked += rbdp[i].rbs_blocked;
+			sum.rbs_boost_attempt += rbdp[i].rbs_boost_attempt;
+			sum.rbs_boost += rbdp[i].rbs_boost;
+			sum.rbs_unlock += rbdp[i].rbs_unlock;
+			sum.rbs_unboosted += rbdp[i].rbs_unboosted;
+		}
+
+	/* Sum up the state/event-dependent counters. */
+
+	for (event = 0; event < N_RCU_BOOST_DAT_EVENTS; event++)
+		for (state = 0; state < N_RCU_BOOST_STATE; state++) {
+			sum.rbs_stats[event][state] = 0;
+			for_each_possible_cpu(cpu) {
+				for (i = 0; i < RCU_BOOST_ELEMENTS; i++) {
+					sum.rbs_stats[event][state]
+					    += per_cpu(rcu_boost_dat,
+						       cpu)[i].rbs_stats[event][state];
+				}
+			}
+		}
+
+	/* Print them out! */
+
+	printk(KERN_ALERT
+	       "rcu_boost_dat: idx=%d "
+	       "b=%lu ul=%lu ub=%lu boost: a=%lu b=%lu\n",
+	       rcu_boost_idx,
+	       sum.rbs_blocked, sum.rbs_unlock, sum.rbs_unboosted,
+	       sum.rbs_boost_attempt, sum.rbs_boost);
+	for (event = 0; event < N_RCU_BOOST_DAT_EVENTS; event++) {
+		i = 0;
+		for (state = 0; state < N_RCU_BOOST_STATE; state++) {
+			i += sprintf(&buf[i], " %ld%c",
+				     sum.rbs_stats[event][state],
+				     rcu_boost_state_error[event][state]);
+		}
+		printk(KERN_ALERT "rcu_boost_dat %s %s\n",
+		       rcu_boost_state_event[event], buf);
+	}
+
+	/* Go away and don't come back for awhile. */
+
+	lastprint = xtime.tv_sec;
+}
+
+#else /* #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS */
+
+#define rcu_boost_dat_stat_block(rbdp, oldstate)
+#define rcu_boost_dat_stat_boost(rbdp, oldstate)
+#define rcu_boost_dat_stat_unlock(rbdp, oldstate)
+#define rcu_boost_dat_stat_print()
+
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS */
+
+/*
+ * Initialize RCU-boost state.  This happens early in the boot process,
+ * when the scheduler does not yet exist.  So don't try to use it.
+ */
+static void init_rcu_boost_early(void)
+{
+	struct rcu_boost_dat *rbdp;
+	int cpu;
+	int i;
+
+	for_each_possible_cpu(cpu) {
+		rbdp = per_cpu(rcu_boost_dat, cpu);
+		for (i = 0; i < RCU_BOOST_ELEMENTS; i++) {
+			rbdp[i].rbs_mutex =
+				RAW_SPIN_LOCK_UNLOCKED(rbdp[i].rbs_mutex);
+			INIT_LIST_HEAD(&rbdp[i].rbs_toboost);
+			INIT_LIST_HEAD(&rbdp[i].rbs_boosted);
+			rbdp[i].rbs_blocked = 0;
+			rbdp[i].rbs_boost_attempt = 0;
+			rbdp[i].rbs_boost = 0;
+			rbdp[i].rbs_unlock = 0;
+			rbdp[i].rbs_unboosted = 0;
+#ifdef CONFIG_PREEMPT_RCU_BOOST_STATS
+			{
+				int j, k;
+
+				for (j = 0; j < N_RCU_BOOST_DAT_EVENTS; j++)
+					for (k = 0; k < N_RCU_BOOST_STATE; k++)
+						rbdp[i].rbs_stats[j][k] = 0;
+			}
+#endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS */
+		}
+		smp_wmb();  /* Make sure readers see above initialization. */
+		rcu_boost_idx = 0;  /* Allow readers to access data. */
+	}
+}
+
+/*
+ * Return the list on which the calling task should add itself, or
+ * NULL if too early during initialization.
+ */
+static inline struct rcu_boost_dat *rcu_rbd_new(void)
+{
+	int cpu = raw_smp_processor_id();  /* locks used, so preemption OK. */
+	int idx = rcu_boost_idx;
+
+	smp_read_barrier_depends(); barrier(); /* rmb() on Alpha for idx. */
+	if (unlikely(idx < 0))
+		return (NULL);
+	return &per_cpu(rcu_boost_dat, cpu)[idx];
+}
+
+/*
+ * Return the list from which to boost target tasks.
+ * May only be invoked by the booster task, so guaranteed to
+ * already be initialized.  Use rcu_boost_dat element least recently
+ * the destination for task blocking in RCU read-side critical sections.
+ */
+static inline struct rcu_boost_dat *rcu_rbd_boosting(int cpu)
+{
+	int idx = (rcu_boost_idx + 1) & (RCU_BOOST_ELEMENTS - 1);
+
+	return &per_cpu(rcu_boost_dat, cpu)[idx];
+}
+
+#define PREEMPT_RCU_BOOSTER_PRIO 49  /* Match curr_irq_prio manually. */
+			             /*  Administrators can always adjust */
+				     /*  via the /proc interface. */
+
+/*
+ * Boost the specified task from an RCU viewpoint.
+ * Boost the target task to a priority just a bit less-favored than
+ * that of the RCU-boost task, but boost to a realtime priority even
+ * if the RCU-boost task is running at a non-realtime priority.
+ * We check the priority of the RCU-boost task each time we boost
+ * in case the sysadm manually changes the priority.
+ */
+static void rcu_boost_prio(struct task_struct *taskp)
+{
+	unsigned long oldirq;
+	int rcuprio;
+
+	spin_lock_irqsave(&current->pi_lock, oldirq);
+	rcuprio = rt_mutex_getprio(current) + 1;
+	if (rcuprio >= MAX_USER_RT_PRIO)
+		rcuprio = MAX_USER_RT_PRIO - 1;
+	spin_unlock_irqrestore(&current->pi_lock, oldirq);
+	spin_lock_irqsave(&taskp->pi_lock, oldirq);
+	if (taskp->rcu_prio != rcuprio) {
+		taskp->rcu_prio = rcuprio;
+		if (taskp->rcu_prio != taskp->prio)
+			rt_mutex_setprio(taskp, taskp->rcu_prio);
+	}
+	spin_unlock_irqrestore(&taskp->pi_lock, oldirq);
+}
+
+/*
+ * Unboost the specified task from an RCU viewpoint.
+ */
+static void rcu_unboost_prio(struct task_struct *taskp)
+{
+	int nprio;
+	unsigned long oldirq;
+
+	spin_lock_irqsave(&taskp->pi_lock, oldirq);
+	taskp->rcu_prio = MAX_PRIO;
+	nprio = rt_mutex_getprio(taskp);
+	if (nprio > taskp->prio)
+		rt_mutex_setprio(taskp, nprio);
+	spin_unlock_irqrestore(&taskp->pi_lock, oldirq);
+}
+
+/*
+ * Boost all of the RCU-reader tasks on the specified list.
+ */
+static void rcu_boost_one_reader_list(struct rcu_boost_dat *rbdp)
+{
+	LIST_HEAD(list);
+	unsigned long oldirq;
+	struct task_struct *taskp;
+
+	/*
+	 * Splice both lists onto a local list.  We will still
+	 * need to hold the lock when manipulating the local list
+	 * because tasks can remove themselves at any time.
+	 * The reason for splicing the rbs_boosted list is that
+	 * our priority may have changed, so reboosting may be
+	 * required.
+	 */
+
+	spin_lock_irqsave(&rbdp->rbs_mutex, oldirq);
+	list_splice_init(&rbdp->rbs_toboost, &list);
+	list_splice_init(&rbdp->rbs_boosted, &list);
+	while (!list_empty(&list)) {
+
+		/*
+		 * Pause for a bit before boosting each task.
+		 * @@@FIXME: reduce/eliminate pausing in case of OOM.
+		 */
+
+		spin_unlock_irqrestore(&rbdp->rbs_mutex, oldirq);
+		schedule_timeout_uninterruptible(1);
+		spin_lock_irqsave(&rbdp->rbs_mutex, oldirq);
+
+		/*
+		 * All tasks might have removed themselves while
+		 * we were waiting.  Recheck list emptiness.
+		 */
+
+		if (list_empty(&list))
+			break;
+
+		/* Remove first task in local list, count the attempt. */
+
+		taskp = list_entry(list.next, typeof(*taskp), rcub_entry);
+		list_del_init(&taskp->rcub_entry);
+		rbdp->rbs_boost_attempt++;
+
+		/* Ignore tasks in unexpected states. */
+
+		if (taskp->rcub_state == RCU_BOOST_IDLE) {
+			list_add_tail(&taskp->rcub_entry, &rbdp->rbs_toboost);
+			rcu_boost_dat_stat_boost(rbdp, taskp->rcub_state);
+			continue;
+		}
+
+		/* Boost the task's priority. */
+
+		rcu_boost_prio(taskp);
+		rbdp->rbs_boost++;
+		rcu_boost_dat_stat_boost(rbdp, taskp->rcub_state);
+		taskp->rcub_state = RCU_BOOSTED;
+		list_add_tail(&taskp->rcub_entry, &rbdp->rbs_boosted);
+	}
+	spin_unlock_irqrestore(&rbdp->rbs_mutex, oldirq);
+}
+
+/*
+ * Priority-boost tasks stuck in RCU read-side critical sections as
+ * needed (presumably rarely).
+ */
+static int rcu_booster(void *arg)
+{
+	int cpu;
+	struct sched_param sp;
+
+	sp.sched_priority = PREEMPT_RCU_BOOSTER_PRIO;
+	sched_setscheduler(current, SCHED_RR, &sp);
+	current->flags |= PF_NOFREEZE;
+
+	do {
+
+		/* Advance the lists of tasks. */
+
+		rcu_boost_idx = (rcu_boost_idx + 1) % RCU_BOOST_ELEMENTS;
+		for_each_possible_cpu(cpu) {
+
+			/*
+			 * Boost all sufficiently aged readers.
+			 * Readers must first be preempted or block
+			 * on a mutex in an RCU read-side critical section,
+			 * then remain in that critical section for
+			 * RCU_BOOST_ELEMENTS-1 time intervals.
+			 * So most of the time we should end up doing
+			 * nothing.
+			 */
+
+			rcu_boost_one_reader_list(rcu_rbd_boosting(cpu));
+
+			/*
+			 * Large SMP systems may need to sleep sometimes
+			 * in this loop.  Or have multiple RCU-boost tasks.
+			 */
+		}
+
+		/*
+		 * Sleep to allow any unstalled RCU read-side critical
+		 * sections to age out of the list.  @@@ FIXME: reduce,
+		 * adjust, or eliminate in case of OOM.
+		 */
+
+		schedule_timeout_uninterruptible(HZ / 100);
+
+		/* Print stats if enough time has passed. */
+
+		rcu_boost_dat_stat_print();
+
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+/*
+ * Perform the portions of RCU-boost initialization that require the
+ * scheduler to be up and running.
+ */
+void init_rcu_boost_late(void)
+{
+
+	/* Spawn RCU-boost task. */
+
+	printk(KERN_INFO "Starting RCU priority booster\n");
+	rcu_boost_task = kthread_run(rcu_booster, NULL, "RCU Prio Booster");
+	if (IS_ERR(rcu_boost_task)) {
+		printk(KERN_ALERT
+		       "Unable to create RCU Priority Booster, errno %d\n",
+		       -PTR_ERR(rcu_boost_task));
+
+		/*
+		 * Continue running, but tasks permanently blocked
+		 * in RCU read-side critical sections will be able
+		 * to stall grace-period processing, potentially
+		 * OOMing the machine.
+		 */
+
+		rcu_boost_task = NULL;
+	}
+}
+
+/*
+ * Update task's RCU-boost state to reflect blocking in RCU read-side
+ * critical section, so that the RCU-boost task can find it in case it
+ * later needs its priority boosted.
+ */
+void __rcu_preempt_boost(void)
+{
+	struct rcu_boost_dat *rbdp;
+	unsigned long oldirq;
+
+	/* Identify list to place task on for possible later boosting. */
+
+	local_irq_save(oldirq);
+	rbdp = rcu_rbd_new();
+	if (rbdp == NULL) {
+		local_irq_restore(oldirq);
+		printk(KERN_ALERT
+		       "Preempted RCU read-side critical section too early.\n");
+		return;
+	}
+	spin_lock(&rbdp->rbs_mutex);
+	rbdp->rbs_blocked++;
+
+	/*
+	 * Update state.  We hold the lock and aren't yet on the list,
+	 * so the booster cannot mess with us yet.
+	 */
+
+	rcu_boost_dat_stat_block(rbdp, current->rcub_state);
+	if (current->rcub_state != RCU_BOOST_IDLE) {
+
+		/*
+		 * We have been here before, so just update stats.
+		 * It may seem strange to do all this work just to
+		 * accumulate statistics, but this is such a
+		 * low-probability code path that we shouldn't care.
+		 * If it becomes a problem, it can be fixed.
+		 */
+
+		spin_unlock_irqrestore(&rbdp->rbs_mutex, oldirq);
+		return;
+	}
+	current->rcub_state = RCU_BOOST_BLOCKED;
+
+	/* Now add ourselves to the list so that the booster can find us. */
+
+	list_add_tail(&current->rcub_entry, &rbdp->rbs_toboost);
+	current->rcub_rbdp = rbdp;
+	spin_unlock_irqrestore(&rbdp->rbs_mutex, oldirq);
+}
+
+/*
+ * Do the list-removal and priority-unboosting "heavy lifting" when
+ * required.
+ */
+static void __rcu_read_unlock_unboost(void)
+{
+	unsigned long oldirq;
+	struct rcu_boost_dat *rbdp;
+
+	/* Identify the list structure and acquire the corresponding lock. */
+
+	rbdp = current->rcub_rbdp;
+	spin_lock_irqsave(&rbdp->rbs_mutex, oldirq);
+
+	/* Remove task from the list it was on. */
+
+	list_del_init(&current->rcub_entry);
+	rbdp->rbs_unlock++;
+	current->rcub_rbdp = NULL;
+
+	/* Record stats, unboost if needed, and update state. */
+
+	rcu_boost_dat_stat_unlock(rbdp, current->rcub_state);
+	if (current->rcub_state == RCU_BOOSTED) {
+		rcu_unboost_prio(current);
+		rbdp->rbs_unboosted++;
+	}
+	current->rcub_state = RCU_BOOST_IDLE;
+	spin_unlock_irqrestore(&rbdp->rbs_mutex, oldirq);
+}
+
+/*
+ * Do any state changes and unboosting needed for rcu_read_unlock().
+ * Pass any complex work on to __rcu_read_unlock_unboost().
+ * The vast majority of the time, no work will be needed, as preemption
+ * and blocking within RCU read-side critical sections is comparatively
+ * rare.
+ */
+static inline void rcu_read_unlock_unboost(void)
+{
+
+	if (unlikely(current->rcub_state != RCU_BOOST_IDLE))
+		__rcu_read_unlock_unboost();
+}
+
+#endif /* #else #ifndef CONFIG_PREEMPT_RCU_BOOST */
+
 /*
  * Return the number of RCU batches processed thus far.  Useful
  * for debug and statistics.
@@ -155,6 +676,7 @@ void __rcu_read_unlock(void)
 			atomic_dec(current->rcu_flipctr2);
 			current->rcu_flipctr2 = NULL;
 		}
+		rcu_read_unlock_unboost();
 	}
 
 	local_irq_restore(oldirq);
@@ -345,6 +867,11 @@ int notrace rcu_pending(int cpu)
 		rcu_data.nextlist != NULL);
 }
 
+/*
+ * Initialize RCU.  This is called very early in boot, so is restricted
+ * to very simple operations.  Don't even think about messing with anything
+ * that involves the scheduler, as it doesn't exist yet.
+ */
 void __init __rcu_init(void)
 {
 /*&&&&*/printk("WARNING: experimental RCU implementation.\n");
@@ -356,6 +883,7 @@ void __init __rcu_init(void)
 	rcu_data.waittail = &rcu_data.waitlist;
 	rcu_data.donelist = NULL;
 	rcu_data.donetail = &rcu_data.donelist;
+	init_rcu_boost_early();
 	tasklet_init(&rcu_data.rcu_tasklet, rcu_process_callbacks, 0UL);
 }
 
diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/kernel/rtmutex.c linux-2.6.20-rc4-rt1-rcub/kernel/rtmutex.c
--- linux-2.6.20-rc4-rt1/kernel/rtmutex.c	2007-01-09 10:59:54.000000000 -0800
+++ linux-2.6.20-rc4-rt1-rcub/kernel/rtmutex.c	2007-01-25 12:35:44.000000000 -0800
@@ -105,11 +105,12 @@ static inline void init_lists(struct rt_
  */
 int rt_mutex_getprio(struct task_struct *task)
 {
+	int prio = min(task->normal_prio, get_rcu_prio(task));
+
 	if (likely(!task_has_pi_waiters(task)))
-		return task->normal_prio;
+		return prio;
 
-	return min(task_top_pi_waiter(task)->pi_list_entry.prio,
-		   task->normal_prio);
+	return min(task_top_pi_waiter(task)->pi_list_entry.prio, prio);
 }
 
 /*
diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/kernel/sched.c linux-2.6.20-rc4-rt1-rcub/kernel/sched.c
--- linux-2.6.20-rc4-rt1/kernel/sched.c	2007-01-09 10:59:54.000000000 -0800
+++ linux-2.6.20-rc4-rt1-rcub/kernel/sched.c	2007-01-09 11:01:12.000000000 -0800
@@ -1962,6 +1962,7 @@ void fastcall sched_fork(struct task_str
 	 * Make sure we do not leak PI boosting priority to the child:
 	 */
 	p->prio = current->normal_prio;
+	set_rcu_prio(p, MAX_PRIO);
 
 	INIT_LIST_HEAD(&p->run_list);
 	p->array = NULL;
@@ -2044,6 +2045,7 @@ void fastcall wake_up_new_task(struct ta
 			else {
 				p->prio = current->prio;
 				p->normal_prio = current->normal_prio;
+				set_rcu_prio(p, MAX_PRIO);
 				__activate_task_after(p, current, rq);
 			}
 			set_need_resched();
@@ -3986,6 +3988,8 @@ void __sched __schedule(void)
 	}
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
 
+	rcu_preempt_boost();
+
 	preempt_disable(); // FIXME: disable irqs here
 	prev = current;
 	release_kernel_lock(prev);
@@ -5725,6 +5729,7 @@ void __cpuinit init_idle(struct task_str
 	idle->sleep_avg = 0;
 	idle->array = NULL;
 	idle->prio = idle->normal_prio = MAX_PRIO;
+	set_rcu_prio(idle, MAX_PRIO);
 	idle->state = TASK_RUNNING;
 	idle->cpus_allowed = cpumask_of_cpu(cpu);
 	set_task_cpu(idle, cpu);

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-02-01  1:21 [RFC PATCH -rt 0/2] RCU priority boosting that survives vicious testing Paul E. McKenney
  2007-02-01  1:24 ` [RFC PATCH -rt 1/2] " Paul E. McKenney
@ 2007-02-01  1:26 ` Paul E. McKenney
  2007-02-01  2:12   ` Nigel Cunningham
  1 sibling, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2007-02-01  1:26 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, tglx, dipankar, tytso, dvhltc, oleg, twoerner.k, josh,
	billh, nielsen.esben, corbet

This patch adds an optional preemption kernel thread to the rcutorture
tests.  This thread sets itself to a low RT priority and chews up CPU
in 10-second bursts, verifying that grace periods progress during this
10-second interval.  Passes RCU torture testing on a 4-CPU (a pair of
2-CPU dies) 64-bit Xeon system.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---

 rcutorture.c |   90 +++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 76 insertions(+), 14 deletions(-)

diff -urpNa -X dontdiff linux-2.6.20-rc6-rt4/kernel/rcutorture.c linux-2.6.20-rc6-rt4-rcubtorture/kernel/rcutorture.c
--- linux-2.6.20-rc6-rt4/kernel/rcutorture.c	2007-01-29 17:02:10.000000000 -0800
+++ linux-2.6.20-rc6-rt4-rcubtorture/kernel/rcutorture.c	2007-01-29 17:58:38.000000000 -0800
@@ -58,6 +58,7 @@ static int stat_interval;	/* Interval be
 static int verbose;		/* Print more debug info. */
 static int test_no_idle_hz;	/* Test RCU's support for tickless idle CPUs. */
 static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
+static int preempt_torture = 0;	/* Realtime task preempts torture readers. */
 static char *torture_type = "rcu"; /* What RCU implementation to torture. */
 
 module_param(nreaders, int, 0);
@@ -72,6 +73,8 @@ module_param(test_no_idle_hz, bool, 0);
 MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
 module_param(shuffle_interval, int, 0);
 MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
+module_param(preempt_torture, bool, 0);
+MODULE_PARM_DESC(preempt_torture, "Enable realtime preemption torture");
 module_param(torture_type, charp, 0);
 MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
 
@@ -194,6 +197,8 @@ struct rcu_torture_ops {
 	int (*completed)(void);
 	void (*deferredfree)(struct rcu_torture *p);
 	void (*sync)(void);
+	long (*preemptstart)(void);
+	void (*preemptend)(void);
 	int (*stats)(char *page);
 	char *name;
 };
@@ -258,16 +263,74 @@ static void rcu_torture_deferred_free(st
 	call_rcu(&p->rtort_rcu, rcu_torture_cb);
 }
 
+static struct task_struct *rcu_preeempt_task;
+static unsigned long rcu_torture_preempt_errors = 0;
+
+static int rcu_torture_preempt(void *arg)
+{
+	int completedstart;
+	int err;
+	time_t gcstart;
+	struct sched_param sp;
+
+	sp.sched_priority = MAX_RT_PRIO - 1;
+	err = sched_setscheduler(current, SCHED_RR, &sp);
+	if (err != 0)
+		printk(KERN_ALERT "rcu_torture_preempt() priority err: %d\n",
+		       err);
+	current->flags |= PF_NOFREEZE;
+
+	do {
+		completedstart = rcu_torture_completed();
+		gcstart = xtime.tv_sec;
+		while ((xtime.tv_sec - gcstart < 10) &&
+		       (rcu_torture_completed() == completedstart))
+			cond_resched();
+		if (rcu_torture_completed() == completedstart)
+			rcu_torture_preempt_errors++;
+		schedule_timeout_interruptible(HZ);
+	} while (!kthread_should_stop());
+	return 0;
+}
+
+static long rcu_preempt_start(void)
+{
+	long retval = 0;
+
+	rcu_preeempt_task = kthread_run(rcu_torture_preempt, NULL,
+					"rcu_torture_preempt");
+	if (IS_ERR(rcu_preeempt_task)) {
+		VERBOSE_PRINTK_ERRSTRING("Failed to create preempter");
+		retval = PTR_ERR(rcu_preeempt_task);
+		rcu_preeempt_task = NULL;
+	}
+	return retval;
+}
+
+static void rcu_preempt_end(void)
+{
+	if (rcu_preeempt_task != NULL) {
+		VERBOSE_PRINTK_STRING("Stopping rcu_preempt task");
+		kthread_stop(rcu_preeempt_task);
+	}
+	rcu_preeempt_task = NULL;
+}
+
+static int rcu_preempt_stats(char *page) {
+	return sprintf(page,
+		       "Preemption stalls: %lu\n", rcu_torture_preempt_errors);
+}
+
 static struct rcu_torture_ops rcu_ops = {
-	.init = NULL,
-	.cleanup = NULL,
 	.readlock = rcu_torture_read_lock,
 	.readdelay = rcu_read_delay,
 	.readunlock = rcu_torture_read_unlock,
 	.completed = rcu_torture_completed,
 	.deferredfree = rcu_torture_deferred_free,
 	.sync = synchronize_rcu,
-	.stats = NULL,
+	.preemptstart = rcu_preempt_start,
+	.preemptend = rcu_preempt_end,
+	.stats = rcu_preempt_stats,
 	.name = "rcu"
 };
 
@@ -299,14 +362,12 @@ static void rcu_sync_torture_init(void)
 
 static struct rcu_torture_ops rcu_sync_ops = {
 	.init = rcu_sync_torture_init,
-	.cleanup = NULL,
 	.readlock = rcu_torture_read_lock,
 	.readdelay = rcu_read_delay,
 	.readunlock = rcu_torture_read_unlock,
 	.completed = rcu_torture_completed,
 	.deferredfree = rcu_sync_torture_deferred_free,
 	.sync = synchronize_rcu,
-	.stats = NULL,
 	.name = "rcu_sync"
 };
 
@@ -362,28 +423,23 @@ static void rcu_bh_torture_synchronize(v
 }
 
 static struct rcu_torture_ops rcu_bh_ops = {
-	.init = NULL,
-	.cleanup = NULL,
 	.readlock = rcu_bh_torture_read_lock,
 	.readdelay = rcu_read_delay,  /* just reuse rcu's version. */
 	.readunlock = rcu_bh_torture_read_unlock,
 	.completed = rcu_bh_torture_completed,
 	.deferredfree = rcu_bh_torture_deferred_free,
 	.sync = rcu_bh_torture_synchronize,
-	.stats = NULL,
 	.name = "rcu_bh"
 };
 
 static struct rcu_torture_ops rcu_bh_sync_ops = {
 	.init = rcu_sync_torture_init,
-	.cleanup = NULL,
 	.readlock = rcu_bh_torture_read_lock,
 	.readdelay = rcu_read_delay,  /* just reuse rcu's version. */
 	.readunlock = rcu_bh_torture_read_unlock,
 	.completed = rcu_bh_torture_completed,
 	.deferredfree = rcu_sync_torture_deferred_free,
 	.sync = rcu_bh_torture_synchronize,
-	.stats = NULL,
 	.name = "rcu_bh_sync"
 };
 
@@ -495,14 +551,12 @@ static void sched_torture_synchronize(vo
 
 static struct rcu_torture_ops sched_ops = {
 	.init = rcu_sync_torture_init,
-	.cleanup = NULL,
 	.readlock = sched_torture_read_lock,
 	.readdelay = rcu_read_delay,  /* just reuse rcu's version. */
 	.readunlock = sched_torture_read_unlock,
 	.completed = sched_torture_completed,
 	.deferredfree = rcu_sync_torture_deferred_free,
 	.sync = sched_torture_synchronize,
-	.stats = NULL,
 	.name = "sched"
 };
 
@@ -801,9 +855,10 @@ rcu_torture_print_module_parms(char *tag
 	printk(KERN_ALERT "%s" TORTURE_FLAG
 		"--- %s: nreaders=%d nfakewriters=%d "
 		"stat_interval=%d verbose=%d test_no_idle_hz=%d "
-		"shuffle_interval = %d\n",
+		"shuffle_interval=%d preempt_torture=%d\n",
 		torture_type, tag, nrealreaders, nfakewriters,
-		stat_interval, verbose, test_no_idle_hz, shuffle_interval);
+		stat_interval, verbose, test_no_idle_hz, shuffle_interval,
+		preempt_torture);
 }
 
 static void
@@ -856,6 +911,8 @@ rcu_torture_cleanup(void)
 		kthread_stop(stats_task);
 	}
 	stats_task = NULL;
+	if (preempt_torture && (cur_ops->preemptend != NULL))
+		cur_ops->preemptend();
 
 	/* Wait for all RCU callbacks to fire.  */
 	rcu_barrier();
@@ -997,6 +1054,11 @@ rcu_torture_init(void)
 			goto unwind;
 		}
 	}
+	if (preempt_torture && (cur_ops->preemptstart != NULL)) {
+		firsterr = cur_ops->preemptstart();
+		if (firsterr != 0)
+			goto unwind;
+	}
 	return 0;
 
 unwind:

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-02-01  1:26 ` [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture Paul E. McKenney
@ 2007-02-01  2:12   ` Nigel Cunningham
  2007-02-01  2:31     ` Paul E. McKenney
  0 siblings, 1 reply; 18+ messages in thread
From: Nigel Cunningham @ 2007-02-01  2:12 UTC (permalink / raw)
  To: paulmck
  Cc: linux-kernel, mingo, tglx, dipankar, tytso, dvhltc, oleg,
	twoerner.k, josh, billh, nielsen.esben, corbet

Hi Paul.

On Wed, 2007-01-31 at 17:26 -0800, Paul E. McKenney wrote:
> This patch adds an optional preemption kernel thread to the rcutorture
> tests.  This thread sets itself to a low RT priority and chews up CPU
> in 10-second bursts, verifying that grace periods progress during this
> 10-second interval.  Passes RCU torture testing on a 4-CPU (a pair of
> 2-CPU dies) 64-bit Xeon system.

[...]

> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> +static int rcu_torture_preempt(void *arg)
> +{
> +	int completedstart;
> +	int err;
> +	time_t gcstart;
> +	struct sched_param sp;
> +
> +	sp.sched_priority = MAX_RT_PRIO - 1;
> +	err = sched_setscheduler(current, SCHED_RR, &sp);
> +	if (err != 0)
> +		printk(KERN_ALERT "rcu_torture_preempt() priority err: %d\n",
> +		       err);
> +	current->flags |= PF_NOFREEZE;
> +
> +	do {
> +		completedstart = rcu_torture_completed();
> +		gcstart = xtime.tv_sec;
> +		while ((xtime.tv_sec - gcstart < 10) &&
> +		       (rcu_torture_completed() == completedstart))
> +			cond_resched();
> +		if (rcu_torture_completed() == completedstart)
> +			rcu_torture_preempt_errors++;
> +		schedule_timeout_interruptible(HZ);
> +	} while (!kthread_should_stop());
> +	return 0;
> +}

Does it need to be NOFREEZE? I would think that it should be frozen
during a suspend/hibernate.

Regards,

Nigel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-02-01  2:12   ` Nigel Cunningham
@ 2007-02-01  2:31     ` Paul E. McKenney
  2007-02-01  2:42       ` Nigel Cunningham
  0 siblings, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2007-02-01  2:31 UTC (permalink / raw)
  To: Nigel Cunningham
  Cc: linux-kernel, mingo, tglx, dipankar, tytso, dvhltc, oleg,
	twoerner.k, josh, billh, nielsen.esben, corbet

On Thu, Feb 01, 2007 at 01:12:16PM +1100, Nigel Cunningham wrote:
> Hi Paul.
> 
> On Wed, 2007-01-31 at 17:26 -0800, Paul E. McKenney wrote:
> > This patch adds an optional preemption kernel thread to the rcutorture
> > tests.  This thread sets itself to a low RT priority and chews up CPU
> > in 10-second bursts, verifying that grace periods progress during this
> > 10-second interval.  Passes RCU torture testing on a 4-CPU (a pair of
> > 2-CPU dies) 64-bit Xeon system.
> 
> [...]
> 
> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> > +static int rcu_torture_preempt(void *arg)
> > +{
> > +	int completedstart;
> > +	int err;
> > +	time_t gcstart;
> > +	struct sched_param sp;
> > +
> > +	sp.sched_priority = MAX_RT_PRIO - 1;
> > +	err = sched_setscheduler(current, SCHED_RR, &sp);
> > +	if (err != 0)
> > +		printk(KERN_ALERT "rcu_torture_preempt() priority err: %d\n",
> > +		       err);
> > +	current->flags |= PF_NOFREEZE;
> > +
> > +	do {
> > +		completedstart = rcu_torture_completed();
> > +		gcstart = xtime.tv_sec;
> > +		while ((xtime.tv_sec - gcstart < 10) &&
> > +		       (rcu_torture_completed() == completedstart))
> > +			cond_resched();
> > +		if (rcu_torture_completed() == completedstart)
> > +			rcu_torture_preempt_errors++;
> > +		schedule_timeout_interruptible(HZ);
> > +	} while (!kthread_should_stop());
> > +	return 0;
> > +}
> 
> Does it need to be NOFREEZE? I would think that it should be frozen
> during a suspend/hibernate.

Good to hear from you, Nigel!

Should indeed be OK to freeze during suspend/hibernate.  Will my
schedule_timeout_interruptible() be sufficient to allow the freeze
to happen, or do I need to add an explicit try_to_freeze()?

Ah, and I probably need to use the same trick that mtd_blktrans_thread()
does to avoid having all my sleeps killed of by an errant signal:

	spin_lock_irq(&current->sighand->siglock);
	sigfillset(&current->blocked);
	recalc_sigpending();
	spin_unlock_irq(&current->sighand->siglock);

Or is such paranoia unnecessary?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-02-01  2:31     ` Paul E. McKenney
@ 2007-02-01  2:42       ` Nigel Cunningham
  2007-02-01  5:46         ` Paul E. McKenney
  0 siblings, 1 reply; 18+ messages in thread
From: Nigel Cunningham @ 2007-02-01  2:42 UTC (permalink / raw)
  To: paulmck
  Cc: linux-kernel, mingo, tglx, dipankar, tytso, dvhltc, oleg,
	twoerner.k, josh, billh, nielsen.esben, corbet

Hi Paul.

On Wed, 2007-01-31 at 18:31 -0800, Paul E. McKenney wrote:
> Good to hear from you, Nigel!

Thanks :)

> Should indeed be OK to freeze during suspend/hibernate.  Will my
> schedule_timeout_interruptible() be sufficient to allow the freeze
> to happen, or do I need to add an explicit try_to_freeze()?

You need a try_to_freeze() - the process has to enter the refrigerator()
function to be counted as frozen.

> Ah, and I probably need to use the same trick that mtd_blktrans_thread()
> does to avoid having all my sleeps killed of by an errant signal:
> 
> 	spin_lock_irq(&current->sighand->siglock);
> 	sigfillset(&current->blocked);
> 	recalc_sigpending();
> 	spin_unlock_irq(&current->sighand->siglock);
> 
> Or is such paranoia unnecessary?

Yeah. try_to_freeze() is a function now, so you can do something if
(try_to_freeze()) goto sleep_again if you so desire.

Regards,

Nigel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-02-01  2:42       ` Nigel Cunningham
@ 2007-02-01  5:46         ` Paul E. McKenney
  2007-02-01 22:13           ` Nigel Cunningham
  0 siblings, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2007-02-01  5:46 UTC (permalink / raw)
  To: Nigel Cunningham
  Cc: linux-kernel, mingo, tglx, dipankar, tytso, dvhltc, oleg,
	twoerner.k, josh, billh, nielsen.esben, corbet

On Thu, Feb 01, 2007 at 01:42:42PM +1100, Nigel Cunningham wrote:
> Hi Paul.
> 
> On Wed, 2007-01-31 at 18:31 -0800, Paul E. McKenney wrote:
> > Good to hear from you, Nigel!
> 
> Thanks :)
> 
> > Should indeed be OK to freeze during suspend/hibernate.  Will my
> > schedule_timeout_interruptible() be sufficient to allow the freeze
> > to happen, or do I need to add an explicit try_to_freeze()?
> 
> You need a try_to_freeze() - the process has to enter the refrigerator()
> function to be counted as frozen.

Even though it explicitly sleeps each time through the loop?  Hmmm...

> > Ah, and I probably need to use the same trick that mtd_blktrans_thread()
> > does to avoid having all my sleeps killed of by an errant signal:
> > 
> > 	spin_lock_irq(&current->sighand->siglock);
> > 	sigfillset(&current->blocked);
> > 	recalc_sigpending();
> > 	spin_unlock_irq(&current->sighand->siglock);
> > 
> > Or is such paranoia unnecessary?
> 
> Yeah. try_to_freeze() is a function now, so you can do something if
> (try_to_freeze()) goto sleep_again if you so desire.

If try_to_freeze() succeeds, do I need to clean up signal state?
It didn't look like it to me, but thought I should ask the expert!

My guess is that I can simply do:

	try_to_freeze();
	schedule_timeout_interruptible(HZ);

The schedule_timeout_interruptible() might return early, but if I
don't care about getting a shorter than expected sleep, I am OK,
right?  Besides, one would have to get a couple of very closely
spaced freeze_processes() calls for this to happen.  ;-)

						Thanx, Paul

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 1/2] RCU priority boosting that survives vicious testing
  2007-02-01  1:24 ` [RFC PATCH -rt 1/2] " Paul E. McKenney
@ 2007-02-01  8:23   ` Ingo Molnar
  2007-02-02 15:56     ` Paul E. McKenney
  0 siblings, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2007-02-01  8:23 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, tglx, dipankar, tytso, dvhltc, oleg, twoerner.k,
	josh, billh, nielsen.esben, corbet


* Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:

> Here is the RCU priority-boosting patch.  Pretty close to the 
> http://lkml.org/lkml/2007/1/24/295 version.  This patch prevents 
> preempted or blocked low-priority RCU readers from indefinitely 
> stalling RCU grace periods.

thanks - i've applied both patches to -rt and it's looking good so far! 
Find a small cosmetic fix below.

	Ingo

Index: linux/kernel/rcupreempt.c
===================================================================
--- linux.orig/kernel/rcupreempt.c
+++ linux/kernel/rcupreempt.c
@@ -489,7 +489,7 @@ void init_rcu_boost_late(void)
 	rcu_boost_task = kthread_run(rcu_booster, NULL, "RCU Prio Booster");
 	if (IS_ERR(rcu_boost_task)) {
 		printk(KERN_ALERT
-		       "Unable to create RCU Priority Booster, errno %d\n",
+		       "Unable to create RCU Priority Booster, errno %ld\n",
 		       -PTR_ERR(rcu_boost_task));
 
 		/*

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-02-01  5:46         ` Paul E. McKenney
@ 2007-02-01 22:13           ` Nigel Cunningham
  0 siblings, 0 replies; 18+ messages in thread
From: Nigel Cunningham @ 2007-02-01 22:13 UTC (permalink / raw)
  To: paulmck
  Cc: linux-kernel, mingo, tglx, dipankar, tytso, dvhltc, oleg,
	twoerner.k, josh, billh, nielsen.esben, corbet

Hi.

On Wed, 2007-01-31 at 21:46 -0800, Paul E. McKenney wrote:
> On Thu, Feb 01, 2007 at 01:42:42PM +1100, Nigel Cunningham wrote:
> > Hi Paul.
> > 
> > On Wed, 2007-01-31 at 18:31 -0800, Paul E. McKenney wrote:
> > > Good to hear from you, Nigel!
> > 
> > Thanks :)
> > 
> > > Should indeed be OK to freeze during suspend/hibernate.  Will my
> > > schedule_timeout_interruptible() be sufficient to allow the freeze
> > > to happen, or do I need to add an explicit try_to_freeze()?
> > 
> > You need a try_to_freeze() - the process has to enter the refrigerator()
> > function to be counted as frozen.
> 
> Even though it explicitly sleeps each time through the loop?  Hmmm...

Yes. Sleeping isn't enough - we have to be sure it won't wake up and
perform work at inappropriate times (we don't know what process X might
do if it did wake; the result could be an inconsistent image). It
therefore needs to enter the refrigerator function so that the freezer
code can ensure it remains inactive until the suspend-to-whatever cycle
is complete.

> > > Ah, and I probably need to use the same trick that mtd_blktrans_thread()
> > > does to avoid having all my sleeps killed of by an errant signal:
> > > 
> > > 	spin_lock_irq(&current->sighand->siglock);
> > > 	sigfillset(&current->blocked);
> > > 	recalc_sigpending();
> > > 	spin_unlock_irq(&current->sighand->siglock);
> > > 
> > > Or is such paranoia unnecessary?
> > 
> > Yeah. try_to_freeze() is a function now, so you can do something if
> > (try_to_freeze()) goto sleep_again if you so desire.
> 
> If try_to_freeze() succeeds, do I need to clean up signal state?
> It didn't look like it to me, but thought I should ask the expert!

No, you don't need to. We have recalc_sigpending() in the refrigerator
function.

> My guess is that I can simply do:
> 
> 	try_to_freeze();
> 	schedule_timeout_interruptible(HZ);
> 
> The schedule_timeout_interruptible() might return early, but if I
> don't care about getting a shorter than expected sleep, I am OK,
> right?  Besides, one would have to get a couple of very closely
> spaced freeze_processes() calls for this to happen.  ;-)

Yes, that looks good to me.

Regards,

Nigel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 1/2] RCU priority boosting that survives vicious testing
  2007-02-01  8:23   ` Ingo Molnar
@ 2007-02-02 15:56     ` Paul E. McKenney
  0 siblings, 0 replies; 18+ messages in thread
From: Paul E. McKenney @ 2007-02-02 15:56 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, tglx, dipankar, tytso, dvhltc, oleg, twoerner.k,
	josh, billh, nielsen.esben, corbet

On Thu, Feb 01, 2007 at 09:23:33AM +0100, Ingo Molnar wrote:
> 
> * Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:
> 
> > Here is the RCU priority-boosting patch.  Pretty close to the 
> > http://lkml.org/lkml/2007/1/24/295 version.  This patch prevents 
> > preempted or blocked low-priority RCU readers from indefinitely 
> > stalling RCU grace periods.
> 
> thanks - i've applied both patches to -rt and it's looking good so far! 
> Find a small cosmetic fix below.

Sorry for the %d noise!  :-/

I will send another patch addressing Nigel's freeze_processes()
review comments.

						Thanx, Paul

> 	Ingo
> 
> Index: linux/kernel/rcupreempt.c
> ===================================================================
> --- linux.orig/kernel/rcupreempt.c
> +++ linux/kernel/rcupreempt.c
> @@ -489,7 +489,7 @@ void init_rcu_boost_late(void)
>  	rcu_boost_task = kthread_run(rcu_booster, NULL, "RCU Prio Booster");
>  	if (IS_ERR(rcu_boost_task)) {
>  		printk(KERN_ALERT
> -		       "Unable to create RCU Priority Booster, errno %d\n",
> +		       "Unable to create RCU Priority Booster, errno %ld\n",
>  		       -PTR_ERR(rcu_boost_task));
> 
>  		/*

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-01-29  2:11           ` Paul E. McKenney
@ 2007-01-29  6:05             ` Josh Triplett
  0 siblings, 0 replies; 18+ messages in thread
From: Josh Triplett @ 2007-01-29  6:05 UTC (permalink / raw)
  To: paulmck
  Cc: linux-kernel, mingo, tglx, dipankar, tytso, dvhltc, oleg,
	twoerner.k, billh, nielsen.esben, corbet

Paul E. McKenney wrote:
> On Thu, Jan 25, 2007 at 11:06:35AM -0800, Josh Triplett wrote:
>> Paul E. McKenney wrote:
>>> On Thu, Jan 25, 2007 at 12:47:04AM -0800, Josh Triplett wrote:
>>>> One major item: this new test feature really needs a new module parameter to
>>>> enable or disable it.
>>> CONFIG_PREEMPT_RCU_BOOST is the parameter -- if not set, then no test.
>>> This parameter is provided by the accompanying RCU-boost patch.
>> It seems useful for rcutorture to use or not use the preempting thread
>> independently of CONFIG_PREEMPT_RCU_BOOST.  That would bring you from two
>> cases to four, and the two new cases both make sense:
>>
>> * CONFIG_PREEMPT_RCU_BOOST=n, but run rcutorture with the preempting thread.
>>   This configuration allows you to demonstrate the need for
>>   CONFIG_PREEMPT_RCU_BOOST, by showing what happens when you need it and don't
>>   have it.
>>
>> * CONFIG_PREEMPT_RCU_BOOST=y, but run rcutorture without the preempting
>>   thread.  This configuration allows you to test with rcutorture while running
>>   a *real* real-time workload rather than the simple preempting thread, or
>>   just test basic RCU functionality.
>>
>> A simple boolean module_param would work here.
> 
> OK, am finally with you.  See below for updated patch.

Looks good to me.

> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Signed-off-by: Josh Triplett <josh@kernel.org>

- Josh Triplett


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-01-25 19:06         ` Josh Triplett
  2007-01-26  1:52           ` Paul E. McKenney
@ 2007-01-29  2:11           ` Paul E. McKenney
  2007-01-29  6:05             ` Josh Triplett
  1 sibling, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2007-01-29  2:11 UTC (permalink / raw)
  To: Josh Triplett
  Cc: linux-kernel, mingo, tglx, dipankar, tytso, dvhltc, oleg,
	twoerner.k, billh, nielsen.esben, corbet

On Thu, Jan 25, 2007 at 11:06:35AM -0800, Josh Triplett wrote:
> Paul E. McKenney wrote:
> > On Thu, Jan 25, 2007 at 12:47:04AM -0800, Josh Triplett wrote:
> >> One major item: this new test feature really needs a new module parameter to
> >> enable or disable it.
> > 
> > CONFIG_PREEMPT_RCU_BOOST is the parameter -- if not set, then no test.
> > This parameter is provided by the accompanying RCU-boost patch.
> 
> It seems useful for rcutorture to use or not use the preempting thread
> independently of CONFIG_PREEMPT_RCU_BOOST.  That would bring you from two
> cases to four, and the two new cases both make sense:
> 
> * CONFIG_PREEMPT_RCU_BOOST=n, but run rcutorture with the preempting thread.
>   This configuration allows you to demonstrate the need for
>   CONFIG_PREEMPT_RCU_BOOST, by showing what happens when you need it and don't
>   have it.
> 
> * CONFIG_PREEMPT_RCU_BOOST=y, but run rcutorture without the preempting
>   thread.  This configuration allows you to test with rcutorture while running
>   a *real* real-time workload rather than the simple preempting thread, or
>   just test basic RCU functionality.
> 
> A simple boolean module_param would work here.

OK, am finally with you.  See below for updated patch.

> At some point, we may want to add the ability to run multiple preempting
> threads, but that doesn't need to happen for this patch.

Or to make it bounce around onto multiple CPUs, I suppose.  Leaving
these out for the moment.  ;-)

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---

 rcutorture.c |   90 +++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 76 insertions(+), 14 deletions(-)

diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/kernel/rcutorture.c linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c
--- linux-2.6.20-rc4-rt1/kernel/rcutorture.c	2007-01-09 10:59:54.000000000 -0800
+++ linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c	2007-01-26 22:16:13.000000000 -0800
@@ -58,6 +58,7 @@ static int stat_interval;	/* Interval be
 static int verbose;		/* Print more debug info. */
 static int test_no_idle_hz;	/* Test RCU's support for tickless idle CPUs. */
 static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
+static int preempt_torture = 0;	/* Realtime task preempts torture readers. */
 static char *torture_type = "rcu"; /* What RCU implementation to torture. */
 
 module_param(nreaders, int, 0);
@@ -72,6 +73,8 @@ module_param(test_no_idle_hz, bool, 0);
 MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
 module_param(shuffle_interval, int, 0);
 MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
+module_param(preempt_torture, bool, 0);
+MODULE_PARM_DESC(preempt_torture, "Enable realtime preemption torture");
 module_param(torture_type, charp, 0);
 MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
 
@@ -194,6 +197,8 @@ struct rcu_torture_ops {
 	int (*completed)(void);
 	void (*deferredfree)(struct rcu_torture *p);
 	void (*sync)(void);
+	long (*preemptstart)(void);
+	void (*preemptend)(void);
 	int (*stats)(char *page);
 	char *name;
 };
@@ -258,16 +263,74 @@ static void rcu_torture_deferred_free(st
 	call_rcu(&p->rtort_rcu, rcu_torture_cb);
 }
 
+static struct task_struct *rcu_preeempt_task;
+static unsigned long rcu_torture_preempt_errors = 0;
+
+static int rcu_torture_preempt(void *arg)
+{
+	int completedstart;
+	int err;
+	time_t gcstart;
+	struct sched_param sp;
+
+	sp.sched_priority = MAX_RT_PRIO - 1;
+	err = sched_setscheduler(current, SCHED_RR, &sp);
+	if (err != 0)
+		printk(KERN_ALERT "rcu_torture_preempt() priority err: %d\n",
+		       err);
+	current->flags |= PF_NOFREEZE;
+
+	do {
+		completedstart = rcu_torture_completed();
+		gcstart = xtime.tv_sec;
+		while ((xtime.tv_sec - gcstart < 10) &&
+		       (rcu_torture_completed() == completedstart))
+			cond_resched();
+		if (rcu_torture_completed() == completedstart)
+			rcu_torture_preempt_errors++;
+		schedule_timeout_interruptible(HZ);
+	} while (!kthread_should_stop());
+	return NULL;
+}
+
+static long rcu_preempt_start(void)
+{
+	long retval = 0;
+
+	rcu_preeempt_task = kthread_run(rcu_torture_preempt, NULL,
+					"rcu_torture_preempt");
+	if (IS_ERR(rcu_preeempt_task)) {
+		VERBOSE_PRINTK_ERRSTRING("Failed to create preempter");
+		retval = PTR_ERR(rcu_preeempt_task);
+		rcu_preeempt_task = NULL;
+	}
+	return retval;
+}
+
+static void rcu_preempt_end(void)
+{
+	if (rcu_preeempt_task != NULL) {
+		VERBOSE_PRINTK_STRING("Stopping rcu_preempt task");
+		kthread_stop(rcu_preeempt_task);
+	}
+	rcu_preeempt_task = NULL;
+}
+
+static int rcu_preempt_stats(char *page) {
+	return sprintf(page,
+		       "Preemption stalls: %lu\n", rcu_torture_preempt_errors);
+}
+
 static struct rcu_torture_ops rcu_ops = {
-	.init = NULL,
-	.cleanup = NULL,
 	.readlock = rcu_torture_read_lock,
 	.readdelay = rcu_read_delay,
 	.readunlock = rcu_torture_read_unlock,
 	.completed = rcu_torture_completed,
 	.deferredfree = rcu_torture_deferred_free,
 	.sync = synchronize_rcu,
-	.stats = NULL,
+	.preemptstart = rcu_preempt_start,
+	.preemptend = rcu_preempt_end,
+	.stats = rcu_preempt_stats,
 	.name = "rcu"
 };
 
@@ -299,14 +362,12 @@ static void rcu_sync_torture_init(void)
 
 static struct rcu_torture_ops rcu_sync_ops = {
 	.init = rcu_sync_torture_init,
-	.cleanup = NULL,
 	.readlock = rcu_torture_read_lock,
 	.readdelay = rcu_read_delay,
 	.readunlock = rcu_torture_read_unlock,
 	.completed = rcu_torture_completed,
 	.deferredfree = rcu_sync_torture_deferred_free,
 	.sync = synchronize_rcu,
-	.stats = NULL,
 	.name = "rcu_sync"
 };
 
@@ -362,28 +423,23 @@ static void rcu_bh_torture_synchronize(v
 }
 
 static struct rcu_torture_ops rcu_bh_ops = {
-	.init = NULL,
-	.cleanup = NULL,
 	.readlock = rcu_bh_torture_read_lock,
 	.readdelay = rcu_read_delay,  /* just reuse rcu's version. */
 	.readunlock = rcu_bh_torture_read_unlock,
 	.completed = rcu_bh_torture_completed,
 	.deferredfree = rcu_bh_torture_deferred_free,
 	.sync = rcu_bh_torture_synchronize,
-	.stats = NULL,
 	.name = "rcu_bh"
 };
 
 static struct rcu_torture_ops rcu_bh_sync_ops = {
 	.init = rcu_sync_torture_init,
-	.cleanup = NULL,
 	.readlock = rcu_bh_torture_read_lock,
 	.readdelay = rcu_read_delay,  /* just reuse rcu's version. */
 	.readunlock = rcu_bh_torture_read_unlock,
 	.completed = rcu_bh_torture_completed,
 	.deferredfree = rcu_sync_torture_deferred_free,
 	.sync = rcu_bh_torture_synchronize,
-	.stats = NULL,
 	.name = "rcu_bh_sync"
 };
 
@@ -495,14 +551,12 @@ static void sched_torture_synchronize(vo
 
 static struct rcu_torture_ops sched_ops = {
 	.init = rcu_sync_torture_init,
-	.cleanup = NULL,
 	.readlock = sched_torture_read_lock,
 	.readdelay = rcu_read_delay,  /* just reuse rcu's version. */
 	.readunlock = sched_torture_read_unlock,
 	.completed = sched_torture_completed,
 	.deferredfree = rcu_sync_torture_deferred_free,
 	.sync = sched_torture_synchronize,
-	.stats = NULL,
 	.name = "sched"
 };
 
@@ -801,9 +855,10 @@ rcu_torture_print_module_parms(char *tag
 	printk(KERN_ALERT "%s" TORTURE_FLAG
 		"--- %s: nreaders=%d nfakewriters=%d "
 		"stat_interval=%d verbose=%d test_no_idle_hz=%d "
-		"shuffle_interval = %d\n",
+		"shuffle_interval=%d preempt_torture=%d\n",
 		torture_type, tag, nrealreaders, nfakewriters,
-		stat_interval, verbose, test_no_idle_hz, shuffle_interval);
+		stat_interval, verbose, test_no_idle_hz, shuffle_interval,
+		preempt_torture);
 }
 
 static void
@@ -856,6 +911,8 @@ rcu_torture_cleanup(void)
 		kthread_stop(stats_task);
 	}
 	stats_task = NULL;
+	if (preempt_torture && (cur_ops->preemptend != NULL))
+		cur_ops->preemptend();
 
 	/* Wait for all RCU callbacks to fire.  */
 	rcu_barrier();
@@ -997,6 +1054,11 @@ rcu_torture_init(void)
 			goto unwind;
 		}
 	}
+	if (preempt_torture && (cur_ops->preemptstart != NULL)) {
+		firsterr = cur_ops->preemptstart();
+		if (firsterr != 0)
+			goto unwind;
+	}
 	return 0;
 
 unwind:

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-01-26  1:52           ` Paul E. McKenney
@ 2007-01-26  6:29             ` Josh Triplett
  0 siblings, 0 replies; 18+ messages in thread
From: Josh Triplett @ 2007-01-26  6:29 UTC (permalink / raw)
  To: paulmck
  Cc: linux-kernel, mingo, tglx, dipankar, tytso, dvhltc, oleg,
	twoerner.k, billh, nielsen.esben, corbet

Paul E. McKenney wrote:
> On Thu, Jan 25, 2007 at 11:06:35AM -0800, Josh Triplett wrote:
>> Paul E. McKenney wrote:
>>> On Thu, Jan 25, 2007 at 12:47:04AM -0800, Josh Triplett wrote:
>>>> One major item: this new test feature really needs a new module parameter to
>>>> enable or disable it.
>>> CONFIG_PREEMPT_RCU_BOOST is the parameter -- if not set, then no test.
>>> This parameter is provided by the accompanying RCU-boost patch.
>> It seems useful for rcutorture to use or not use the preempting thread
>> independently of CONFIG_PREEMPT_RCU_BOOST.  That would bring you from two
>> cases to four, and the two new cases both make sense:
>>
>> * CONFIG_PREEMPT_RCU_BOOST=n, but run rcutorture with the preempting thread.
>>   This configuration allows you to demonstrate the need for
>>   CONFIG_PREEMPT_RCU_BOOST, by showing what happens when you need it and don't
>>   have it.
>>
>> * CONFIG_PREEMPT_RCU_BOOST=y, but run rcutorture without the preempting
>>   thread.  This configuration allows you to test with rcutorture while running
>>   a *real* real-time workload rather than the simple preempting thread, or
>>   just test basic RCU functionality.
>>
>> A simple boolean module_param would work here.
> 
> OK, sold!  I will add this.  Perhaps CONFIG_PREEMPT_RCU_TORTURE.

Why a config option?  Why not a module parameter, settable at module load time?

static int enable_preempter;
...
module_param(enable_preempter, bool, 0);
MODULE_PARM_DESC(enable_preempter, "Enable preempting thread, to test RCU priority boosting");
...
rcu_torture_cleanup(void)
{
...
	if (enable_preempter && cur_ops->preemptend)
		cur_ops->preemptend();
...
	if (enable_preempter && cur_ops->preemptstart)
		cur_ops->preemptstart();

Then just remove the #ifdef CONFIG_PREEMPT_RCU_BOOST from rcutorture entirely,
and always supply the preempter functions.  rcutorture then doesn't depend on
CONFIG_PREEMPT_RCU_BOOST at all, and the module parameter determines whether
to run the preempter thread.

>>>> Paul E. McKenney wrote:
>>>>> diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/kernel/rcutorture.c linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c
>>>>> --- linux-2.6.20-rc4-rt1/kernel/rcutorture.c	2007-01-09 10:59:54.000000000 -0800
>>>>> +++ linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c	2007-01-23 11:27:49.000000000 -0800
>>>>> +static int rcu_torture_preempt(void *arg)
>>>>> +{
>>>>> +	int completedstart;
>>>>> +	time_t gcstart;
>>>>> +	struct sched_param sp;
>>>>> +
>>>>> +	sp.sched_priority = MAX_RT_PRIO - 1;
>>>>> +	sched_setscheduler(current, SCHED_RR, &sp);
>>>>> +	current->flags |= PF_NOFREEZE;
>>>>> +
>>>>> +	do {
>>>>> +		completedstart = rcu_torture_completed();
>>>>> +		gcstart = xtime.tv_sec;
>>>>> +		while ((xtime.tv_sec - gcstart < 10) &&
>>>>> +		       (rcu_torture_completed() == completedstart))
>>>>> +			cond_resched();
>>>>> +		if (rcu_torture_completed() == completedstart)
>>>>> +			rcu_torture_preempt_errors++;
>>>>> +		schedule_timeout_interruptible(shuffle_interval * HZ);
>>>> Why call schedule_timeout_interruptible here without actually handling
>>>> interruptions?  So that you can send it a signal to cause the shuffle early?
>>> It allows you to kill the process in order to get the module unload to
>>> happen more quickly in case someone specified an overly long interval.
>> I didn't actually know that you could kill a kthread from userspace. :)
>>
>> That rationale makes sense.
> 
> It won't actually die, but if I understand correctly (a big "if") the
> signal would cause schedule_timeout_interruptible() to return, allowing
> the kthread_should_stop() check to happen.

Ah, that makes much more sense; thanks.

- Josh Triplett


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-01-25 19:06         ` Josh Triplett
@ 2007-01-26  1:52           ` Paul E. McKenney
  2007-01-26  6:29             ` Josh Triplett
  2007-01-29  2:11           ` Paul E. McKenney
  1 sibling, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2007-01-26  1:52 UTC (permalink / raw)
  To: Josh Triplett
  Cc: linux-kernel, mingo, tglx, dipankar, tytso, dvhltc, oleg,
	twoerner.k, billh, nielsen.esben, corbet

On Thu, Jan 25, 2007 at 11:06:35AM -0800, Josh Triplett wrote:
> Paul E. McKenney wrote:
> > On Thu, Jan 25, 2007 at 12:47:04AM -0800, Josh Triplett wrote:
> >> One major item: this new test feature really needs a new module parameter to
> >> enable or disable it.
> > 
> > CONFIG_PREEMPT_RCU_BOOST is the parameter -- if not set, then no test.
> > This parameter is provided by the accompanying RCU-boost patch.
> 
> It seems useful for rcutorture to use or not use the preempting thread
> independently of CONFIG_PREEMPT_RCU_BOOST.  That would bring you from two
> cases to four, and the two new cases both make sense:
> 
> * CONFIG_PREEMPT_RCU_BOOST=n, but run rcutorture with the preempting thread.
>   This configuration allows you to demonstrate the need for
>   CONFIG_PREEMPT_RCU_BOOST, by showing what happens when you need it and don't
>   have it.
> 
> * CONFIG_PREEMPT_RCU_BOOST=y, but run rcutorture without the preempting
>   thread.  This configuration allows you to test with rcutorture while running
>   a *real* real-time workload rather than the simple preempting thread, or
>   just test basic RCU functionality.
> 
> A simple boolean module_param would work here.

OK, sold!  I will add this.  Perhaps CONFIG_PREEMPT_RCU_TORTURE.

> At some point, we may want to add the ability to run multiple preempting
> threads, but that doesn't need to happen for this patch.

I considered that for this initial round, but you only need to preempt
a single RCU reader to force the RCU booster to do something.  ;-)

> >> Paul E. McKenney wrote:
> >>> diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/kernel/rcutorture.c linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c
> >>> --- linux-2.6.20-rc4-rt1/kernel/rcutorture.c	2007-01-09 10:59:54.000000000 -0800
> >>> +++ linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c	2007-01-23 11:27:49.000000000 -0800
> 
> >>> +static int rcu_torture_preempt(void *arg)
> >>> +{
> >>> +	int completedstart;
> >>> +	time_t gcstart;
> >>> +	struct sched_param sp;
> >>> +
> >>> +	sp.sched_priority = MAX_RT_PRIO - 1;
> >>> +	sched_setscheduler(current, SCHED_RR, &sp);
> >>> +	current->flags |= PF_NOFREEZE;
> >>> +
> >>> +	do {
> >>> +		completedstart = rcu_torture_completed();
> >>> +		gcstart = xtime.tv_sec;
> >>> +		while ((xtime.tv_sec - gcstart < 10) &&
> >>> +		       (rcu_torture_completed() == completedstart))
> >>> +			cond_resched();
> >>> +		if (rcu_torture_completed() == completedstart)
> >>> +			rcu_torture_preempt_errors++;
> >>> +		schedule_timeout_interruptible(shuffle_interval * HZ);
> >> Why call schedule_timeout_interruptible here without actually handling
> >> interruptions?  So that you can send it a signal to cause the shuffle early?
> > 
> > It allows you to kill the process in order to get the module unload to
> > happen more quickly in case someone specified an overly long interval.
> 
> I didn't actually know that you could kill a kthread from userspace. :)
> 
> That rationale makes sense.

It won't actually die, but if I understand correctly (a big "if") the
signal would cause schedule_timeout_interruptible() to return, allowing
the kthread_should_stop() check to happen.

> > But now that you mention this, a simple one-second sleep is probably
> > appropriate here.
> 
> OK.
> 
> >>> +	} while (!kthread_should_stop());
> >>> +	return NULL;
> >>> +}
> >>> +
> >>> +static void rcu_preempt_start(void)
> >>> +{
> >>> +	rcu_preeempt_task = kthread_run(rcu_torture_preempt, NULL,
> >>> +					"rcu_torture_preempt");
> >>> +	if (IS_ERR(rcu_preeempt_task)) {
> >>> +		VERBOSE_PRINTK_ERRSTRING("Failed to create preempter");
> >> This ought to include the errno value, PTR_ERR(rcu_preempt_task).
> > 
> > Good point -- what I should do is return this value so that
> > rcu_torture_init() can return it, failing the module-load process
> > and unwinding.
> 
> Even better, yes.
> 
> >>> +		rcu_preeempt_task = NULL;
> >>> +	}
> >>> +}
> >>> +
> >>> +static void rcu_preempt_end(void)
> >>> +{
> >>> +	if (rcu_preeempt_task != NULL) {
> >> if (rcu_preempt_task) would work just as well here.
> > 
> > True, but was being consistent with usage elsewhere in this file.
> 
> Fair enough; don't worry about it for this patch, then.  I'll deal with that
> particular style cleanup later, throughout rcutorture.

Sounds good to me!  ;-)

> >>>  static struct rcu_torture_ops rcu_ops = {
> >>>  	.init = NULL,
> >>>  	.cleanup = NULL,
> >>> @@ -267,7 +334,9 @@ static struct rcu_torture_ops rcu_ops = 
> >>>  	.completed = rcu_torture_completed,
> >>>  	.deferredfree = rcu_torture_deferred_free,
> >>>  	.sync = synchronize_rcu,
> >>> -	.stats = NULL,
> >>> +	.preemptstart = rcu_preempt_start,
> >>> +	.preemptend = rcu_preempt_end,
> >>> +	.stats = rcu_preempt_stats,
> >>>  	.name = "rcu"
> >>>  };
> >>>  
> >>> @@ -306,6 +375,8 @@ static struct rcu_torture_ops rcu_sync_o
> >>>  	.completed = rcu_torture_completed,
> >>>  	.deferredfree = rcu_sync_torture_deferred_free,
> >>>  	.sync = synchronize_rcu,
> >>> +	.preemptstart = NULL,
> >>> +	.preemptend = NULL,
> >>>  	.stats = NULL,
> >>>  	.name = "rcu_sync"
> >>>  };
> >> Much like other common structures such as struct file_operations, no need to
> >> explicitly specify members as NULL here; any member you don't specify will get
> >> a NULL value.  That avoids the need to update every use of this structure
> >> whenever you add a new member used by only some of them.
> > 
> > Untrusting, aren't I?  ;-) 
> 
> Heh.  I have that problem as well; I always hestitate to trust the compiler to
> initialize values.
> 
> > I removed all the "= NULL" entries.
> 
> Thanks.
> 
> >>> @@ -856,6 +935,8 @@ rcu_torture_cleanup(void)
> >>>  		kthread_stop(stats_task);
> >>>  	}
> >>>  	stats_task = NULL;
> >>> +	if (cur_ops->preemptend != NULL)
> >> if (cur_ops->preemptend) would work as well.
> > 
> > True, though again there is a lot of existing "!= NULL" in this file
> > and elsewhere.  Many thousands of them through the kernel.  ;-)
> 
> As before, don't worry about it for this patch then.
> 
> > I will run this through the mill and repost.

But with the new kernel parameter this time.  ;-)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-01-25 18:01       ` Paul E. McKenney
@ 2007-01-25 19:06         ` Josh Triplett
  2007-01-26  1:52           ` Paul E. McKenney
  2007-01-29  2:11           ` Paul E. McKenney
  0 siblings, 2 replies; 18+ messages in thread
From: Josh Triplett @ 2007-01-25 19:06 UTC (permalink / raw)
  To: paulmck
  Cc: linux-kernel, mingo, tglx, dipankar, tytso, dvhltc, oleg,
	twoerner.k, billh, nielsen.esben, corbet

Paul E. McKenney wrote:
> On Thu, Jan 25, 2007 at 12:47:04AM -0800, Josh Triplett wrote:
>> One major item: this new test feature really needs a new module parameter to
>> enable or disable it.
> 
> CONFIG_PREEMPT_RCU_BOOST is the parameter -- if not set, then no test.
> This parameter is provided by the accompanying RCU-boost patch.

It seems useful for rcutorture to use or not use the preempting thread
independently of CONFIG_PREEMPT_RCU_BOOST.  That would bring you from two
cases to four, and the two new cases both make sense:

* CONFIG_PREEMPT_RCU_BOOST=n, but run rcutorture with the preempting thread.
  This configuration allows you to demonstrate the need for
  CONFIG_PREEMPT_RCU_BOOST, by showing what happens when you need it and don't
  have it.

* CONFIG_PREEMPT_RCU_BOOST=y, but run rcutorture without the preempting
  thread.  This configuration allows you to test with rcutorture while running
  a *real* real-time workload rather than the simple preempting thread, or
  just test basic RCU functionality.

A simple boolean module_param would work here.

At some point, we may want to add the ability to run multiple preempting
threads, but that doesn't need to happen for this patch.

>> Paul E. McKenney wrote:
>>> diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/kernel/rcutorture.c linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c
>>> --- linux-2.6.20-rc4-rt1/kernel/rcutorture.c	2007-01-09 10:59:54.000000000 -0800
>>> +++ linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c	2007-01-23 11:27:49.000000000 -0800

>>> +static int rcu_torture_preempt(void *arg)
>>> +{
>>> +	int completedstart;
>>> +	time_t gcstart;
>>> +	struct sched_param sp;
>>> +
>>> +	sp.sched_priority = MAX_RT_PRIO - 1;
>>> +	sched_setscheduler(current, SCHED_RR, &sp);
>>> +	current->flags |= PF_NOFREEZE;
>>> +
>>> +	do {
>>> +		completedstart = rcu_torture_completed();
>>> +		gcstart = xtime.tv_sec;
>>> +		while ((xtime.tv_sec - gcstart < 10) &&
>>> +		       (rcu_torture_completed() == completedstart))
>>> +			cond_resched();
>>> +		if (rcu_torture_completed() == completedstart)
>>> +			rcu_torture_preempt_errors++;
>>> +		schedule_timeout_interruptible(shuffle_interval * HZ);
>> Why call schedule_timeout_interruptible here without actually handling
>> interruptions?  So that you can send it a signal to cause the shuffle early?
> 
> It allows you to kill the process in order to get the module unload to
> happen more quickly in case someone specified an overly long interval.

I didn't actually know that you could kill a kthread from userspace. :)

That rationale makes sense.

> But now that you mention this, a simple one-second sleep is probably
> appropriate here.

OK.

>>> +	} while (!kthread_should_stop());
>>> +	return NULL;
>>> +}
>>> +
>>> +static void rcu_preempt_start(void)
>>> +{
>>> +	rcu_preeempt_task = kthread_run(rcu_torture_preempt, NULL,
>>> +					"rcu_torture_preempt");
>>> +	if (IS_ERR(rcu_preeempt_task)) {
>>> +		VERBOSE_PRINTK_ERRSTRING("Failed to create preempter");
>> This ought to include the errno value, PTR_ERR(rcu_preempt_task).
> 
> Good point -- what I should do is return this value so that
> rcu_torture_init() can return it, failing the module-load process
> and unwinding.

Even better, yes.

>>> +		rcu_preeempt_task = NULL;
>>> +	}
>>> +}
>>> +
>>> +static void rcu_preempt_end(void)
>>> +{
>>> +	if (rcu_preeempt_task != NULL) {
>> if (rcu_preempt_task) would work just as well here.
> 
> True, but was being consistent with usage elsewhere in this file.

Fair enough; don't worry about it for this patch, then.  I'll deal with that
particular style cleanup later, throughout rcutorture.

>>>  static struct rcu_torture_ops rcu_ops = {
>>>  	.init = NULL,
>>>  	.cleanup = NULL,
>>> @@ -267,7 +334,9 @@ static struct rcu_torture_ops rcu_ops = 
>>>  	.completed = rcu_torture_completed,
>>>  	.deferredfree = rcu_torture_deferred_free,
>>>  	.sync = synchronize_rcu,
>>> -	.stats = NULL,
>>> +	.preemptstart = rcu_preempt_start,
>>> +	.preemptend = rcu_preempt_end,
>>> +	.stats = rcu_preempt_stats,
>>>  	.name = "rcu"
>>>  };
>>>  
>>> @@ -306,6 +375,8 @@ static struct rcu_torture_ops rcu_sync_o
>>>  	.completed = rcu_torture_completed,
>>>  	.deferredfree = rcu_sync_torture_deferred_free,
>>>  	.sync = synchronize_rcu,
>>> +	.preemptstart = NULL,
>>> +	.preemptend = NULL,
>>>  	.stats = NULL,
>>>  	.name = "rcu_sync"
>>>  };
>> Much like other common structures such as struct file_operations, no need to
>> explicitly specify members as NULL here; any member you don't specify will get
>> a NULL value.  That avoids the need to update every use of this structure
>> whenever you add a new member used by only some of them.
> 
> Untrusting, aren't I?  ;-) 

Heh.  I have that problem as well; I always hestitate to trust the compiler to
initialize values.

> I removed all the "= NULL" entries.

Thanks.

>>> @@ -856,6 +935,8 @@ rcu_torture_cleanup(void)
>>>  		kthread_stop(stats_task);
>>>  	}
>>>  	stats_task = NULL;
>>> +	if (cur_ops->preemptend != NULL)
>> if (cur_ops->preemptend) would work as well.
> 
> True, though again there is a lot of existing "!= NULL" in this file
> and elsewhere.  Many thousands of them through the kernel.  ;-)

As before, don't worry about it for this patch then.

> I will run this through the mill and repost.

Thanks!

- Josh Triplett

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-01-25  8:47     ` Josh Triplett
@ 2007-01-25 18:01       ` Paul E. McKenney
  2007-01-25 19:06         ` Josh Triplett
  0 siblings, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2007-01-25 18:01 UTC (permalink / raw)
  To: Josh Triplett
  Cc: linux-kernel, mingo, tglx, dipankar, tytso, dvhltc, oleg,
	twoerner.k, billh, nielsen.esben, corbet

On Thu, Jan 25, 2007 at 12:47:04AM -0800, Josh Triplett wrote:
> Paul E. McKenney wrote:
> > This patch adds an optional preemption kernel thread to the rcutorture
> > tests.  This thread sets itself to a low RT priority and chews up
> > CPU in 10-second bursts, verifying that grace periods progress during
> > this 10-second interval.  This has thus far passed about 30 hours of
> > RCU torture testing on a 4-CPU (a pair of 2-CPU dies) 64-bit Xeon 
> > system.
> > 
> > I am experimenting with more-vicious tests, but extra viciousness thus
> > far comes at the expense of grotesque code.
> 
> Overall, the new feature seems like a good idea, and it should exercise the
> new RCU boosting code.  Some comments below.

Thank you for the review!!!

> One major item: this new test feature really needs a new module parameter to
> enable or disable it.

CONFIG_PREEMPT_RCU_BOOST is the parameter -- if not set, then no test.
This parameter is provided by the accompanying RCU-boost patch.

> > diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/kernel/rcutorture.c linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c
> > --- linux-2.6.20-rc4-rt1/kernel/rcutorture.c	2007-01-09 10:59:54.000000000 -0800
> > +++ linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c	2007-01-23 11:27:49.000000000 -0800
> > @@ -194,6 +194,8 @@ struct rcu_torture_ops {
> >  	int (*completed)(void);
> >  	void (*deferredfree)(struct rcu_torture *p);
> >  	void (*sync)(void);
> > +	void (*preemptstart)(void);
> > +	void (*preemptend)(void);
> >  	int (*stats)(char *page);
> >  	char *name;
> >  };
> > @@ -258,6 +260,71 @@ static void rcu_torture_deferred_free(st
> >  	call_rcu(&p->rtort_rcu, rcu_torture_cb);
> >  }
> >  
> > +#ifndef CONFIG_PREEMPT_RCU_BOOST
> > +static void rcu_preempt_start(void) { }
> > +static void rcu_preempt_end(void) { }
> > +static int rcu_preempt_stats(char *page) { return 0; }
> > +#else /* #ifndef CONFIG_PREEMPT_RCU_BOOST */
> > +
> > +static struct task_struct *rcu_preeempt_task;
> > +static long rcu_torture_preempt_errors = 0;
> 
> Might as well make this an unsigned long; negative values wouldn't make sense.

Good point, fixed.

> > +static int rcu_torture_preempt(void *arg)
> > +{
> > +	int completedstart;
> > +	time_t gcstart;
> > +	struct sched_param sp;
> > +
> > +	sp.sched_priority = MAX_RT_PRIO - 1;
> > +	sched_setscheduler(current, SCHED_RR, &sp);
> > +	current->flags |= PF_NOFREEZE;
> > +
> > +	do {
> > +		completedstart = rcu_torture_completed();
> > +		gcstart = xtime.tv_sec;
> > +		while ((xtime.tv_sec - gcstart < 10) &&
> > +		       (rcu_torture_completed() == completedstart))
> > +			cond_resched();
> > +		if (rcu_torture_completed() == completedstart)
> > +			rcu_torture_preempt_errors++;
> > +		schedule_timeout_interruptible(shuffle_interval * HZ);
> 
> Why call schedule_timeout_interruptible here without actually handling
> interruptions?  So that you can send it a signal to cause the shuffle early?

It allows you to kill the process in order to get the module unload to
happen more quickly in case someone specified an overly long interval.
But now that you mention this, a simple one-second sleep is probably
appropriate here.

> > +	} while (!kthread_should_stop());
> > +	return NULL;
> > +}
> > +
> > +static void rcu_preempt_start(void)
> > +{
> > +	rcu_preeempt_task = kthread_run(rcu_torture_preempt, NULL,
> > +					"rcu_torture_preempt");
> > +	if (IS_ERR(rcu_preeempt_task)) {
> > +		VERBOSE_PRINTK_ERRSTRING("Failed to create preempter");
> 
> This ought to include the errno value, PTR_ERR(rcu_preempt_task).

Good point -- what I should do is return this value so that
rcu_torture_init() can return it, failing the module-load process
and unwinding.

> > +		rcu_preeempt_task = NULL;
> > +	}
> > +}
> > +
> > +static void rcu_preempt_end(void)
> > +{
> > +	if (rcu_preeempt_task != NULL) {
> 
> if (rcu_preempt_task) would work just as well here.

True, but was being consistent with usage elsewhere in this file.

> > +		VERBOSE_PRINTK_STRING("Stopping rcu_preempt task");
> > +		kthread_stop(rcu_preeempt_task);
> > +	}
> > +	rcu_preeempt_task = NULL;
> > +}
> > +
> > +static int rcu_preempt_stats(char *page) {
> > +	int cnt = 0;
> > +
> > +	cnt += sprintf(&page[cnt],
> > +		       "Preemption stalls: %ld\n", rcu_torture_preempt_errors);
> > +	return (cnt);
> > +}
> 
> How about just:
> return sprintf(page, ...);
> ?

Good point.

> Also, if you decide to make rcu_torture_preempt_errors an unsigned long as
> suggested above, this should use %lu.

Done.

> > +#endif /* #else #ifndef CONFIG_PREEMPT_RCU_BOOST */
> > +
> > +static void rcu_preemptstart(void)
> > +{
> > +	
> > +}
> > +
> 
> This looks like a bit of stray code.

Yep, good eyes!  Deleted.

> >  static struct rcu_torture_ops rcu_ops = {
> >  	.init = NULL,
> >  	.cleanup = NULL,
> > @@ -267,7 +334,9 @@ static struct rcu_torture_ops rcu_ops = 
> >  	.completed = rcu_torture_completed,
> >  	.deferredfree = rcu_torture_deferred_free,
> >  	.sync = synchronize_rcu,
> > -	.stats = NULL,
> > +	.preemptstart = rcu_preempt_start,
> > +	.preemptend = rcu_preempt_end,
> > +	.stats = rcu_preempt_stats,
> >  	.name = "rcu"
> >  };
> >  
> > @@ -306,6 +375,8 @@ static struct rcu_torture_ops rcu_sync_o
> >  	.completed = rcu_torture_completed,
> >  	.deferredfree = rcu_sync_torture_deferred_free,
> >  	.sync = synchronize_rcu,
> > +	.preemptstart = NULL,
> > +	.preemptend = NULL,
> >  	.stats = NULL,
> >  	.name = "rcu_sync"
> >  };
> 
> Much like other common structures such as struct file_operations, no need to
> explicitly specify members as NULL here; any member you don't specify will get
> a NULL value.  That avoids the need to update every use of this structure
> whenever you add a new member used by only some of them.

Untrusting, aren't I?  ;-) 

I removed all the "= NULL" entries.

> > @@ -370,6 +441,8 @@ static struct rcu_torture_ops rcu_bh_ops
> >  	.completed = rcu_bh_torture_completed,
> >  	.deferredfree = rcu_bh_torture_deferred_free,
> >  	.sync = rcu_bh_torture_synchronize,
> > +	.preemptstart = NULL,
> > +	.preemptend = NULL,
> >  	.stats = NULL,
> >  	.name = "rcu_bh"
> >  };
> 
> Likewise.
> 
> > @@ -383,6 +456,8 @@ static struct rcu_torture_ops rcu_bh_syn
> >  	.completed = rcu_bh_torture_completed,
> >  	.deferredfree = rcu_sync_torture_deferred_free,
> >  	.sync = rcu_bh_torture_synchronize,
> > +	.preemptstart = NULL,
> > +	.preemptend = NULL,
> >  	.stats = NULL,
> >  	.name = "rcu_bh_sync"
> >  };
> 
> Likewise.
> 
> > @@ -464,6 +539,8 @@ static struct rcu_torture_ops srcu_ops =
> >  	.completed = srcu_torture_completed,
> >  	.deferredfree = rcu_sync_torture_deferred_free,
> >  	.sync = srcu_torture_synchronize,
> > +	.preemptstart = NULL,
> > +	.preemptend = NULL,
> >  	.stats = srcu_torture_stats,
> >  	.name = "srcu"
> >  };
> 
> Likewise.
> 
> > @@ -502,6 +579,8 @@ static struct rcu_torture_ops sched_ops 
> >  	.completed = sched_torture_completed,
> >  	.deferredfree = rcu_sync_torture_deferred_free,
> >  	.sync = sched_torture_synchronize,
> > +	.preemptstart = NULL,
> > +	.preemptend = NULL,
> >  	.stats = NULL,
> >  	.name = "sched"
> 
> Likewise.
> 
> > @@ -856,6 +935,8 @@ rcu_torture_cleanup(void)
> >  		kthread_stop(stats_task);
> >  	}
> >  	stats_task = NULL;
> > +	if (cur_ops->preemptend != NULL)
> 
> if (cur_ops->preemptend) would work as well.

True, though again there is a lot of existing "!= NULL" in this file
and elsewhere.  Many thousands of them through the kernel.  ;-)

> > @@ -997,6 +1078,8 @@ rcu_torture_init(void)
> >  			goto unwind;
> >  		}
> >  	}
> > +	if (cur_ops->preemptstart != NULL)
> 
> Likewise.

I will run this through the mill and repost.

							Thanx, Paul

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-01-25  2:23   ` [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture Paul E. McKenney
@ 2007-01-25  8:47     ` Josh Triplett
  2007-01-25 18:01       ` Paul E. McKenney
  0 siblings, 1 reply; 18+ messages in thread
From: Josh Triplett @ 2007-01-25  8:47 UTC (permalink / raw)
  To: paulmck
  Cc: linux-kernel, mingo, tglx, dipankar, tytso, dvhltc, oleg,
	twoerner.k, billh, nielsen.esben, corbet

Paul E. McKenney wrote:
> This patch adds an optional preemption kernel thread to the rcutorture
> tests.  This thread sets itself to a low RT priority and chews up
> CPU in 10-second bursts, verifying that grace periods progress during
> this 10-second interval.  This has thus far passed about 30 hours of
> RCU torture testing on a 4-CPU (a pair of 2-CPU dies) 64-bit Xeon 
> system.
> 
> I am experimenting with more-vicious tests, but extra viciousness thus
> far comes at the expense of grotesque code.

Overall, the new feature seems like a good idea, and it should exercise the
new RCU boosting code.  Some comments below.

One major item: this new test feature really needs a new module parameter to
enable or disable it.

> diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/kernel/rcutorture.c linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c
> --- linux-2.6.20-rc4-rt1/kernel/rcutorture.c	2007-01-09 10:59:54.000000000 -0800
> +++ linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c	2007-01-23 11:27:49.000000000 -0800
> @@ -194,6 +194,8 @@ struct rcu_torture_ops {
>  	int (*completed)(void);
>  	void (*deferredfree)(struct rcu_torture *p);
>  	void (*sync)(void);
> +	void (*preemptstart)(void);
> +	void (*preemptend)(void);
>  	int (*stats)(char *page);
>  	char *name;
>  };
> @@ -258,6 +260,71 @@ static void rcu_torture_deferred_free(st
>  	call_rcu(&p->rtort_rcu, rcu_torture_cb);
>  }
>  
> +#ifndef CONFIG_PREEMPT_RCU_BOOST
> +static void rcu_preempt_start(void) { }
> +static void rcu_preempt_end(void) { }
> +static int rcu_preempt_stats(char *page) { return 0; }
> +#else /* #ifndef CONFIG_PREEMPT_RCU_BOOST */
> +
> +static struct task_struct *rcu_preeempt_task;
> +static long rcu_torture_preempt_errors = 0;

Might as well make this an unsigned long; negative values wouldn't make sense.

> +static int rcu_torture_preempt(void *arg)
> +{
> +	int completedstart;
> +	time_t gcstart;
> +	struct sched_param sp;
> +
> +	sp.sched_priority = MAX_RT_PRIO - 1;
> +	sched_setscheduler(current, SCHED_RR, &sp);
> +	current->flags |= PF_NOFREEZE;
> +
> +	do {
> +		completedstart = rcu_torture_completed();
> +		gcstart = xtime.tv_sec;
> +		while ((xtime.tv_sec - gcstart < 10) &&
> +		       (rcu_torture_completed() == completedstart))
> +			cond_resched();
> +		if (rcu_torture_completed() == completedstart)
> +			rcu_torture_preempt_errors++;
> +		schedule_timeout_interruptible(shuffle_interval * HZ);

Why call schedule_timeout_interruptible here without actually handling
interruptions?  So that you can send it a signal to cause the shuffle early?

> +	} while (!kthread_should_stop());
> +	return NULL;
> +}
> +
> +static void rcu_preempt_start(void)
> +{
> +	rcu_preeempt_task = kthread_run(rcu_torture_preempt, NULL,
> +					"rcu_torture_preempt");
> +	if (IS_ERR(rcu_preeempt_task)) {
> +		VERBOSE_PRINTK_ERRSTRING("Failed to create preempter");

This ought to include the errno value, PTR_ERR(rcu_preempt_task).

> +		rcu_preeempt_task = NULL;
> +	}
> +}
> +
> +static void rcu_preempt_end(void)
> +{
> +	if (rcu_preeempt_task != NULL) {

if (rcu_preempt_task) would work just as well here.

> +		VERBOSE_PRINTK_STRING("Stopping rcu_preempt task");
> +		kthread_stop(rcu_preeempt_task);
> +	}
> +	rcu_preeempt_task = NULL;
> +}
> +
> +static int rcu_preempt_stats(char *page) {
> +	int cnt = 0;
> +
> +	cnt += sprintf(&page[cnt],
> +		       "Preemption stalls: %ld\n", rcu_torture_preempt_errors);
> +	return (cnt);
> +}

How about just:
return sprintf(page, ...);
?

Also, if you decide to make rcu_torture_preempt_errors an unsigned long as
suggested above, this should use %lu.

> +#endif /* #else #ifndef CONFIG_PREEMPT_RCU_BOOST */
> +
> +static void rcu_preemptstart(void)
> +{
> +	
> +}
> +

This looks like a bit of stray code.

>  static struct rcu_torture_ops rcu_ops = {
>  	.init = NULL,
>  	.cleanup = NULL,
> @@ -267,7 +334,9 @@ static struct rcu_torture_ops rcu_ops = 
>  	.completed = rcu_torture_completed,
>  	.deferredfree = rcu_torture_deferred_free,
>  	.sync = synchronize_rcu,
> -	.stats = NULL,
> +	.preemptstart = rcu_preempt_start,
> +	.preemptend = rcu_preempt_end,
> +	.stats = rcu_preempt_stats,
>  	.name = "rcu"
>  };
>  
> @@ -306,6 +375,8 @@ static struct rcu_torture_ops rcu_sync_o
>  	.completed = rcu_torture_completed,
>  	.deferredfree = rcu_sync_torture_deferred_free,
>  	.sync = synchronize_rcu,
> +	.preemptstart = NULL,
> +	.preemptend = NULL,
>  	.stats = NULL,
>  	.name = "rcu_sync"
>  };

Much like other common structures such as struct file_operations, no need to
explicitly specify members as NULL here; any member you don't specify will get
a NULL value.  That avoids the need to update every use of this structure
whenever you add a new member used by only some of them.

> @@ -370,6 +441,8 @@ static struct rcu_torture_ops rcu_bh_ops
>  	.completed = rcu_bh_torture_completed,
>  	.deferredfree = rcu_bh_torture_deferred_free,
>  	.sync = rcu_bh_torture_synchronize,
> +	.preemptstart = NULL,
> +	.preemptend = NULL,
>  	.stats = NULL,
>  	.name = "rcu_bh"
>  };

Likewise.

> @@ -383,6 +456,8 @@ static struct rcu_torture_ops rcu_bh_syn
>  	.completed = rcu_bh_torture_completed,
>  	.deferredfree = rcu_sync_torture_deferred_free,
>  	.sync = rcu_bh_torture_synchronize,
> +	.preemptstart = NULL,
> +	.preemptend = NULL,
>  	.stats = NULL,
>  	.name = "rcu_bh_sync"
>  };

Likewise.

> @@ -464,6 +539,8 @@ static struct rcu_torture_ops srcu_ops =
>  	.completed = srcu_torture_completed,
>  	.deferredfree = rcu_sync_torture_deferred_free,
>  	.sync = srcu_torture_synchronize,
> +	.preemptstart = NULL,
> +	.preemptend = NULL,
>  	.stats = srcu_torture_stats,
>  	.name = "srcu"
>  };

Likewise.

> @@ -502,6 +579,8 @@ static struct rcu_torture_ops sched_ops 
>  	.completed = sched_torture_completed,
>  	.deferredfree = rcu_sync_torture_deferred_free,
>  	.sync = sched_torture_synchronize,
> +	.preemptstart = NULL,
> +	.preemptend = NULL,
>  	.stats = NULL,
>  	.name = "sched"

Likewise.

> @@ -856,6 +935,8 @@ rcu_torture_cleanup(void)
>  		kthread_stop(stats_task);
>  	}
>  	stats_task = NULL;
> +	if (cur_ops->preemptend != NULL)

if (cur_ops->preemptend) would work as well.

> @@ -997,6 +1078,8 @@ rcu_torture_init(void)
>  			goto unwind;
>  		}
>  	}
> +	if (cur_ops->preemptstart != NULL)

Likewise.

- Josh Triplett

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture
  2007-01-25  2:14 ` [RFC PATCH -rt 1/2] " Paul E. McKenney
@ 2007-01-25  2:23   ` Paul E. McKenney
  2007-01-25  8:47     ` Josh Triplett
  0 siblings, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2007-01-25  2:23 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, tglx, dipankar, tytso, dvhltc, oleg, twoerner.k, josh,
	billh, nielsen.esben, corbet

This patch adds an optional preemption kernel thread to the rcutorture
tests.  This thread sets itself to a low RT priority and chews up
CPU in 10-second bursts, verifying that grace periods progress during
this 10-second interval.  This has thus far passed about 30 hours of
RCU torture testing on a 4-CPU (a pair of 2-CPU dies) 64-bit Xeon 
system.

I am experimenting with more-vicious tests, but extra viciousness thus
far comes at the expense of grotesque code.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---

 rcutorture.c |   85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 84 insertions(+), 1 deletion(-)

diff -urpNa -X dontdiff linux-2.6.20-rc4-rt1/kernel/rcutorture.c linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c
--- linux-2.6.20-rc4-rt1/kernel/rcutorture.c	2007-01-09 10:59:54.000000000 -0800
+++ linux-2.6.20-rc4-rt1-rcubtorture/kernel/rcutorture.c	2007-01-23 11:27:49.000000000 -0800
@@ -194,6 +194,8 @@ struct rcu_torture_ops {
 	int (*completed)(void);
 	void (*deferredfree)(struct rcu_torture *p);
 	void (*sync)(void);
+	void (*preemptstart)(void);
+	void (*preemptend)(void);
 	int (*stats)(char *page);
 	char *name;
 };
@@ -258,6 +260,71 @@ static void rcu_torture_deferred_free(st
 	call_rcu(&p->rtort_rcu, rcu_torture_cb);
 }
 
+#ifndef CONFIG_PREEMPT_RCU_BOOST
+static void rcu_preempt_start(void) { }
+static void rcu_preempt_end(void) { }
+static int rcu_preempt_stats(char *page) { return 0; }
+#else /* #ifndef CONFIG_PREEMPT_RCU_BOOST */
+
+static struct task_struct *rcu_preeempt_task;
+static long rcu_torture_preempt_errors = 0;
+
+static int rcu_torture_preempt(void *arg)
+{
+	int completedstart;
+	time_t gcstart;
+	struct sched_param sp;
+
+	sp.sched_priority = MAX_RT_PRIO - 1;
+	sched_setscheduler(current, SCHED_RR, &sp);
+	current->flags |= PF_NOFREEZE;
+
+	do {
+		completedstart = rcu_torture_completed();
+		gcstart = xtime.tv_sec;
+		while ((xtime.tv_sec - gcstart < 10) &&
+		       (rcu_torture_completed() == completedstart))
+			cond_resched();
+		if (rcu_torture_completed() == completedstart)
+			rcu_torture_preempt_errors++;
+		schedule_timeout_interruptible(shuffle_interval * HZ);
+	} while (!kthread_should_stop());
+	return NULL;
+}
+
+static void rcu_preempt_start(void)
+{
+	rcu_preeempt_task = kthread_run(rcu_torture_preempt, NULL,
+					"rcu_torture_preempt");
+	if (IS_ERR(rcu_preeempt_task)) {
+		VERBOSE_PRINTK_ERRSTRING("Failed to create preempter");
+		rcu_preeempt_task = NULL;
+	}
+}
+
+static void rcu_preempt_end(void)
+{
+	if (rcu_preeempt_task != NULL) {
+		VERBOSE_PRINTK_STRING("Stopping rcu_preempt task");
+		kthread_stop(rcu_preeempt_task);
+	}
+	rcu_preeempt_task = NULL;
+}
+
+static int rcu_preempt_stats(char *page) {
+	int cnt = 0;
+
+	cnt += sprintf(&page[cnt],
+		       "Preemption stalls: %ld\n", rcu_torture_preempt_errors);
+	return (cnt);
+}
+#endif /* #else #ifndef CONFIG_PREEMPT_RCU_BOOST */
+
+static void rcu_preemptstart(void)
+{
+	
+}
+
 static struct rcu_torture_ops rcu_ops = {
 	.init = NULL,
 	.cleanup = NULL,
@@ -267,7 +334,9 @@ static struct rcu_torture_ops rcu_ops = 
 	.completed = rcu_torture_completed,
 	.deferredfree = rcu_torture_deferred_free,
 	.sync = synchronize_rcu,
-	.stats = NULL,
+	.preemptstart = rcu_preempt_start,
+	.preemptend = rcu_preempt_end,
+	.stats = rcu_preempt_stats,
 	.name = "rcu"
 };
 
@@ -306,6 +375,8 @@ static struct rcu_torture_ops rcu_sync_o
 	.completed = rcu_torture_completed,
 	.deferredfree = rcu_sync_torture_deferred_free,
 	.sync = synchronize_rcu,
+	.preemptstart = NULL,
+	.preemptend = NULL,
 	.stats = NULL,
 	.name = "rcu_sync"
 };
@@ -370,6 +441,8 @@ static struct rcu_torture_ops rcu_bh_ops
 	.completed = rcu_bh_torture_completed,
 	.deferredfree = rcu_bh_torture_deferred_free,
 	.sync = rcu_bh_torture_synchronize,
+	.preemptstart = NULL,
+	.preemptend = NULL,
 	.stats = NULL,
 	.name = "rcu_bh"
 };
@@ -383,6 +456,8 @@ static struct rcu_torture_ops rcu_bh_syn
 	.completed = rcu_bh_torture_completed,
 	.deferredfree = rcu_sync_torture_deferred_free,
 	.sync = rcu_bh_torture_synchronize,
+	.preemptstart = NULL,
+	.preemptend = NULL,
 	.stats = NULL,
 	.name = "rcu_bh_sync"
 };
@@ -464,6 +539,8 @@ static struct rcu_torture_ops srcu_ops =
 	.completed = srcu_torture_completed,
 	.deferredfree = rcu_sync_torture_deferred_free,
 	.sync = srcu_torture_synchronize,
+	.preemptstart = NULL,
+	.preemptend = NULL,
 	.stats = srcu_torture_stats,
 	.name = "srcu"
 };
@@ -502,6 +579,8 @@ static struct rcu_torture_ops sched_ops 
 	.completed = sched_torture_completed,
 	.deferredfree = rcu_sync_torture_deferred_free,
 	.sync = sched_torture_synchronize,
+	.preemptstart = NULL,
+	.preemptend = NULL,
 	.stats = NULL,
 	.name = "sched"
 };
@@ -856,6 +935,8 @@ rcu_torture_cleanup(void)
 		kthread_stop(stats_task);
 	}
 	stats_task = NULL;
+	if (cur_ops->preemptend != NULL)
+		cur_ops->preemptend();
 
 	/* Wait for all RCU callbacks to fire.  */
 	rcu_barrier();
@@ -997,6 +1078,8 @@ rcu_torture_init(void)
 			goto unwind;
 		}
 	}
+	if (cur_ops->preemptstart != NULL)
+		cur_ops->preemptstart();
 	return 0;
 
 unwind:

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2007-02-02 15:57 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-01  1:21 [RFC PATCH -rt 0/2] RCU priority boosting that survives vicious testing Paul E. McKenney
2007-02-01  1:24 ` [RFC PATCH -rt 1/2] " Paul E. McKenney
2007-02-01  8:23   ` Ingo Molnar
2007-02-02 15:56     ` Paul E. McKenney
2007-02-01  1:26 ` [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture Paul E. McKenney
2007-02-01  2:12   ` Nigel Cunningham
2007-02-01  2:31     ` Paul E. McKenney
2007-02-01  2:42       ` Nigel Cunningham
2007-02-01  5:46         ` Paul E. McKenney
2007-02-01 22:13           ` Nigel Cunningham
  -- strict thread matches above, loose matches on Subject: below --
2007-01-25  2:11 [RFC PATCH -rt 0/2] RCU priority boosting that survives semi-vicious testing Paul E. McKenney
2007-01-25  2:14 ` [RFC PATCH -rt 1/2] " Paul E. McKenney
2007-01-25  2:23   ` [RFC PATCH -rt 2/2] RCU priority boosting additions to rcutorture Paul E. McKenney
2007-01-25  8:47     ` Josh Triplett
2007-01-25 18:01       ` Paul E. McKenney
2007-01-25 19:06         ` Josh Triplett
2007-01-26  1:52           ` Paul E. McKenney
2007-01-26  6:29             ` Josh Triplett
2007-01-29  2:11           ` Paul E. McKenney
2007-01-29  6:05             ` Josh Triplett

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).