LKML Archive on lore.kernel.org
 help / color / Atom feed
From: "Ahmed S. Darwish" <a.darwish@linutronix.de>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>, Will Deacon <will@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	"Sebastian A. Siewior" <bigeasy@linutronix.de>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	LKML <linux-kernel@vger.kernel.org>,
	"Ahmed S. Darwish" <a.darwish@linutronix.de>
Subject: [PATCH v2 4/5] seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT support
Date: Fri,  4 Sep 2020 17:32:30 +0200
Message-ID: <20200904153231.11994-5-a.darwish@linutronix.de> (raw)
In-Reply-To: <20200904153231.11994-1-a.darwish@linutronix.de>

Preemption must be disabled before entering a sequence counter write
side critical section.  Otherwise the read side section can preempt the
write side section and spin for the entire scheduler tick.  If that
reader belongs to a real-time scheduling class, it can spin forever and
the kernel will livelock.

Disabling preemption cannot be done for PREEMPT_RT though: it can lead
to higher latencies, and the write side sections will not be able to
acquire locks which become sleeping locks (e.g. spinlock_t).

To remain preemptible, while avoiding a possible livelock caused by the
reader preempting the writer, use a different technique: let the reader
detect if a seqcount_LOCKNAME_t writer is in progress. If that's the
case, acquire then release the associated LOCKNAME writer serialization
lock. This will allow any possibly-preempted writer to make progress
until the end of its writer serialization lock critical section.

Implement this lock-unlock technique for all seqcount_LOCKNAME_t with
an associated (PREEMPT_RT) sleeping lock.

Link: https://lkml.kernel.org/r/159708609435.2571.13948681727529247231.tglx@nanos
Link: https://lkml.kernel.org/r/20200519214547.352050-1-a.darwish@linutronix.de
References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 72 +++++++++++++++++++++++++++++++++--------
 1 file changed, 58 insertions(+), 14 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index ed1c6c0ff8bb..6ac5a63fc536 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -17,6 +17,7 @@
 #include <linux/kcsan-checks.h>
 #include <linux/lockdep.h>
 #include <linux/mutex.h>
+#include <linux/ww_mutex.h>
 #include <linux/preempt.h>
 #include <linux/spinlock.h>
 
@@ -131,7 +132,23 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  * See Documentation/locking/seqlock.rst
  */
 
-#ifdef CONFIG_LOCKDEP
+/*
+ * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
+ * disable preemption. It can lead to higher latencies, and the write side
+ * sections will not be able to acquire locks which become sleeping locks
+ * (e.g. spinlock_t).
+ *
+ * To remain preemptible while avoiding a possible livelock caused by the
+ * reader preempting the writer, use a different technique: let the reader
+ * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
+ * case, acquire then release the associated LOCKNAME writer serialization
+ * lock. This will allow any possibly-preempted writer to make progress
+ * until the end of its writer serialization lock critical section.
+ *
+ * This lock-unlock technique must be implemented for all of PREEMPT_RT
+ * sleeping locks.  See Documentation/locking/locktypes.rst
+ */
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
 #define __SEQ_LOCK(expr)	expr
 #else
 #define __SEQ_LOCK(expr)
@@ -162,10 +179,12 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  *
  * @lockname:		"LOCKNAME" part of seqcount_LOCKNAME_t
  * @locktype:		LOCKNAME canonical C data type
- * @preemptible:	preemptibility of above lockname
+ * @preemptible:	preemptibility of above locktype
  * @lockmember:		argument for lockdep_assert_held()
+ * @lockbase:		associated lock release function (prefix only)
+ * @lock_acquire:	associated lock acquisition function (full call)
  */
-#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember)	\
+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \
 typedef struct seqcount_##lockname {					\
 	seqcount_t		seqcount;				\
 	__SEQ_LOCK(locktype	*lock);					\
@@ -187,7 +206,23 @@ __seqprop_seqcount_##lockname##_ptr(seqcount_##lockname##_t *s)		\
 static __always_inline unsigned						\
 __seqprop_seqcount_##lockname##_sequence(const seqcount_##lockname##_t *s)\
 {									\
-	return READ_ONCE(s->seqcount.sequence);				\
+	unsigned seq = READ_ONCE(s->seqcount.sequence);			\
+									\
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))				\
+		return seq;						\
+									\
+	if (preemptible && unlikely(seq & 1)) {				\
+		__SEQ_LOCK(lock_acquire);				\
+		__SEQ_LOCK(lockbase##_unlock(s->lock));			\
+									\
+		/*							\
+		 * Re-read the sequence counter since the (possibly	\
+		 * preempted) writer made progress.			\
+		 */							\
+		seq = READ_ONCE(s->seqcount.sequence);			\
+	}								\
+									\
+	return seq;							\
 }									\
 									\
 static __always_inline bool						\
@@ -226,11 +261,13 @@ static inline void __seqprop_seqcount_assert(const seqcount_t *s)
 	lockdep_assert_preemption_disabled();
 }
 
-SEQCOUNT_LOCKNAME(raw_spinlock,	raw_spinlock_t,		false,	s->lock)
-SEQCOUNT_LOCKNAME(spinlock,	spinlock_t,		false,	s->lock)
-SEQCOUNT_LOCKNAME(rwlock,	rwlock_t,		false,	s->lock)
-SEQCOUNT_LOCKNAME(mutex,	struct mutex,		true,	s->lock)
-SEQCOUNT_LOCKNAME(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
+#define __SEQ_RT	IS_ENABLED(CONFIG_PREEMPT_RT)
+
+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t,  false,    s->lock,        raw_spin, raw_spin_lock(s->lock))
+SEQCOUNT_LOCKNAME(spinlock,     spinlock_t,      __SEQ_RT, s->lock,        spin,     spin_lock(s->lock))
+SEQCOUNT_LOCKNAME(rwlock,       rwlock_t,        __SEQ_RT, s->lock,        read,     read_lock(s->lock))
+SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     s->lock,        mutex,    mutex_lock(s->lock))
+SEQCOUNT_LOCKNAME(ww_mutex,     struct ww_mutex, true,     &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL))
 
 /**
  * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
@@ -406,13 +443,20 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 	return __read_seqcount_t_retry(s, start);
 }
 
+/*
+ * Enforce non-preemptibility for all seqcount_LOCKNAME_t writers. Don't
+ * do it for PREEMPT_RT, for the reasons outlined at __SEQ_LOCK().
+ */
+#define __seq_enforce_writer_non_preemptibility(s)			\
+	(!IS_ENABLED(CONFIG_PREEMPT_RT) && __seqcount_lock_preemptible(s))
+
 /**
  * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
  * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  */
 #define raw_write_seqcount_begin(s)					\
 do {									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_writer_non_preemptibility(s))			\
 		preempt_disable();					\
 									\
 	raw_write_seqcount_t_begin(__seqcount_ptr(s));			\
@@ -433,7 +477,7 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
 do {									\
 	raw_write_seqcount_t_end(__seqcount_ptr(s));			\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_writer_non_preemptibility(s))			\
 		preempt_enable();					\
 } while (0)
 
@@ -456,7 +500,7 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s)
 do {									\
 	__seqcount_assert_lock_held(s);					\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_writer_non_preemptibility(s))			\
 		preempt_disable();					\
 									\
 	write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass);	\
@@ -483,7 +527,7 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
 do {									\
 	__seqcount_assert_lock_held(s);					\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_writer_non_preemptibility(s))			\
 		preempt_disable();					\
 									\
 	write_seqcount_t_begin(__seqcount_ptr(s));			\
@@ -504,7 +548,7 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
 do {									\
 	write_seqcount_t_end(__seqcount_ptr(s));			\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_writer_non_preemptibility(s))			\
 		preempt_enable();					\
 } while (0)
 
-- 
2.28.0


  parent reply index

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-04 15:32 [PATCH v2 0/5] seqlock: " Ahmed S. Darwish
2020-09-04 15:32 ` [PATCH v2 1/5] seqlock: seqcount_LOCKNAME_t: Standardize naming convention Ahmed S. Darwish
2020-09-10 15:08   ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
2020-09-04 15:32 ` [PATCH v2 2/5] seqlock: Use unique prefix for seqcount_t property accessors Ahmed S. Darwish
2020-09-08 11:41   ` peterz
2020-09-10 15:08   ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
2020-09-04 15:32 ` [PATCH v2 3/5] seqlock: seqcount_t: Implement all read APIs as statement expressions Ahmed S. Darwish
2020-09-10 15:08   ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
2020-09-04 15:32 ` Ahmed S. Darwish [this message]
2020-09-08 11:45   ` [PATCH v2 4/5] seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT support peterz
2020-09-08 12:48     ` Ahmed S. Darwish
2020-09-04 15:32 ` [PATCH v2 5/5] seqlock: PREEMPT_RT: Do not starve seqlock_t writers Ahmed S. Darwish
2020-09-10 15:08   ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
2020-09-15  0:20 ` [PATCH v2 0/5] seqlock: Introduce PREEMPT_RT support Qian Cai
2020-09-15 12:48   ` Boqun Feng
2020-09-15 13:10     ` Boqun Feng
2020-09-15 14:30     ` peterz
2020-09-16 12:52       ` Qian Cai
2020-09-16 12:54         ` peterz
2020-09-16 13:00           ` Qian Cai
2020-09-16 13:02             ` peterz
2020-09-17  2:31               ` Stephen Rothwell
2020-09-18  8:42       ` [tip: locking/core] seqlock: Unbreak lockdep tip-bot2 for peterz@infradead.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200904153231.11994-5-a.darwish@linutronix.de \
    --to=a.darwish@linutronix.de \
    --cc=bigeasy@linutronix.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lkml.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lkml.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lkml.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lkml.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lkml.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lkml.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lkml.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lkml.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lkml.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lkml.kernel.org/lkml/9 lkml/git/9.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lkml.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git