LKML Archive on lore.kernel.org
 help / color / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
						download: 
* [PATCH v4 08/24] seqlock: lockdep assert non-preemptibility on seqcount_t write
                       ` (2 preceding siblings ...)
  2020-07-20 15:55 70%   ` [PATCH v4 04/24] seqlock: Reorder seqcount_t and seqlock_t API definitions Ahmed S. Darwish
@ 2020-07-20 15:55 91%   ` Ahmed S. Darwish
  2020-07-29 14:33 77%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
    2020-07-20 15:55 36%   ` [PATCH v4 09/24] seqlock: Extend seqcount API with associated locks Ahmed S. Darwish
                     ` (16 subsequent siblings)
  20 siblings, 2 replies; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish

Preemption must be disabled before entering a sequence count write side
critical section.  Failing to do so, the seqcount read side can preempt
the write side section and spin for the entire scheduler tick.  If that
reader belongs to a real-time scheduling class, it can spin forever and
the kernel will livelock.

Assert through lockdep that preemption is disabled for seqcount writers.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index e885702d8b82..54bc20496392 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -266,6 +266,12 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
+static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass)
+{
+	raw_write_seqcount_begin(s);
+	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+}
+
 /**
  * write_seqcount_begin_nested() - start a seqcount_t write section with
  *                                 custom lockdep nesting level
@@ -276,8 +282,19 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
  */
 static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
 {
-	raw_write_seqcount_begin(s);
-	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+	lockdep_assert_preemption_disabled();
+	__write_seqcount_begin_nested(s, subclass);
+}
+
+/*
+ * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks.
+ *
+ * Use for internal seqlock.h code where it's known that preemption is
+ * already disabled. For example, seqlock_t write side functions.
+ */
+static inline void __write_seqcount_begin(seqcount_t *s)
+{
+	__write_seqcount_begin_nested(s, 0);
 }
 
 /**
@@ -575,7 +592,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 static inline void write_seqlock(seqlock_t *sl)
 {
 	spin_lock(&sl->lock);
-	write_seqcount_begin(&sl->seqcount);
+	__write_seqcount_begin(&sl->seqcount);
 }
 
 /**
@@ -601,7 +618,7 @@ static inline void write_sequnlock(seqlock_t *sl)
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
-	write_seqcount_begin(&sl->seqcount);
+	__write_seqcount_begin(&sl->seqcount);
 }
 
 /**
@@ -628,7 +645,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
-	write_seqcount_begin(&sl->seqcount);
+	__write_seqcount_begin(&sl->seqcount);
 }
 
 /**
@@ -649,7 +666,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sl->lock, flags);
-	write_seqcount_begin(&sl->seqcount);
+	__write_seqcount_begin(&sl->seqcount);
 	return flags;
 }
 
-- 
2.20.1


^ permalink raw reply	[relevance 91%]

* [PATCH v4 09/24] seqlock: Extend seqcount API with associated locks
                       ` (3 preceding siblings ...)
  2020-07-20 15:55 91%   ` [PATCH v4 08/24] seqlock: lockdep assert non-preemptibility on seqcount_t write Ahmed S. Darwish
@ 2020-07-20 15:55 36%   ` Ahmed S. Darwish
  2020-07-29 14:33 28%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 96%   ` [PATCH v4 10/24] seqlock: Align multi-line macros newline escapes at 72 columns Ahmed S. Darwish
                     ` (15 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. If the serialization primitive is
not disabling preemption implicitly, preemption has to be explicitly
disabled before entering the write side critical section.

There is no built-in debugging mechanism to verify that the lock used
for writer serialization is held and preemption is disabled. Some usage
sites like dma-buf have explicit lockdep checks for the writer-side
lock, but this covers only a small portion of the sequence counter usage
in the kernel.

Add new sequence counter types which allows to associate a lock to the
sequence counter at initialization time. The seqcount API functions are
extended to provide appropriate lockdep assertions depending on the
seqcount/lock type.

For sequence counters with associated locks that do not implicitly
disable preemption, preemption protection is enforced in the sequence
counter write side functions. This removes the need to explicitly add
preempt_disable/enable() around the write side critical sections: the
write_begin/end() functions for these new sequence counter types
automatically do this.

Introduce the following seqcount types with associated locks:

     seqcount_spinlock_t
     seqcount_raw_spinlock_t
     seqcount_rwlock_t
     seqcount_mutex_t
     seqcount_ww_mutex_t

Extend the seqcount read and write functions to branch out to the
specific seqcount_LOCKTYPE_t implementation at compile-time. This avoids
kernel API explosion per each new seqcount_LOCKTYPE_t added. Add such
compile-time type detection logic into a new, internal, seqlock header.

Document the proper seqcount_LOCKTYPE_t usage, and rationale, at
Documentation/locking/seqlock.rst.

If lockdep is disabled, this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 Documentation/locking/seqlock.rst |  52 ++++
 include/linux/seqlock.h           | 462 +++++++++++++++++++++++++-----
 2 files changed, 446 insertions(+), 68 deletions(-)

diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst
index 366dd368d90a..62c5ad98c11c 100644
--- a/Documentation/locking/seqlock.rst
+++ b/Documentation/locking/seqlock.rst
@@ -87,6 +87,58 @@ Read path::
 	} while (read_seqcount_retry(&foo_seqcount, seq));
 
 
+.. _seqcount_locktype_t:
+
+Sequence counters with associated locks (``seqcount_LOCKTYPE_t``)
+-----------------------------------------------------------------
+
+As discussed at :ref:`seqcount_t`, sequence count write side critical
+sections must be serialized and non-preemptible. This variant of
+sequence counters associate the lock used for writer serialization at
+initialization time, which enables lockdep to validate that the write
+side critical sections are properly serialized.
+
+This lock association is a NOOP if lockdep is disabled and has neither
+storage nor runtime overhead. If lockdep is enabled, the lock pointer is
+stored in struct seqcount and lockdep's "lock is held" assertions are
+injected at the beginning of the write side critical section to validate
+that it is properly protected.
+
+For lock types which do not implicitly disable preemption, preemption
+protection is enforced in the write side function.
+
+The following sequence counters with associated locks are defined:
+
+  - ``seqcount_spinlock_t``
+  - ``seqcount_raw_spinlock_t``
+  - ``seqcount_rwlock_t``
+  - ``seqcount_mutex_t``
+  - ``seqcount_ww_mutex_t``
+
+The plain seqcount read and write APIs branch out to the specific
+seqcount_LOCKTYPE_t implementation at compile-time. This avoids kernel
+API explosion per each new seqcount LOCKTYPE.
+
+Initialization (replace "LOCKTYPE" with one of the supported locks)::
+
+	/* dynamic */
+	seqcount_LOCKTYPE_t foo_seqcount;
+	seqcount_LOCKTYPE_init(&foo_seqcount, &lock);
+
+	/* static */
+	static seqcount_LOCKTYPE_t foo_seqcount =
+		SEQCNT_LOCKTYPE_ZERO(foo_seqcount, &lock);
+
+	/* C99 struct init */
+	struct {
+		.seq   = SEQCNT_LOCKTYPE_ZERO(foo.seq, &lock),
+	} foo;
+
+Write path: same as in :ref:`seqcount_t`, while running from a context
+with the associated LOCKTYPE lock acquired.
+
+Read path: same as in :ref:`seqcount_t`.
+
 .. _seqlock_t:
 
 Sequential locks (``seqlock_t``)
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 54bc20496392..8c16a494c968 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -10,13 +10,17 @@
  *
  * Copyrights:
  * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
+ * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH
  */
 
-#include <linux/spinlock.h>
-#include <linux/preempt.h>
-#include <linux/lockdep.h>
 #include <linux/compiler.h>
 #include <linux/kcsan-checks.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/preempt.h>
+#include <linux/spinlock.h>
+#include <linux/ww_mutex.h>
+
 #include <asm/processor.h>
 
 /*
@@ -48,6 +52,10 @@
  * This mechanism can't be used if the protected data contains pointers,
  * as the writer can invalidate a pointer that a reader is following.
  *
+ * If the write serialization mechanism is one of the common kernel
+ * locking primitives, use a sequence counter with associated lock
+ * (seqcount_LOCKTYPE_t) instead.
+ *
  * If it's desired to automatically handle the sequence counter writer
  * serialization and non-preemptibility requirements, use a sequential
  * lock (seqlock_t) instead.
@@ -108,9 +116,267 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  */
 #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
 
+/*
+ * Sequence counters with associated locks (seqcount_LOCKTYPE_t)
+ *
+ * A sequence counter which associates the lock used for writer
+ * serialization at initialization time. This enables lockdep to validate
+ * that the write side critical section is properly serialized.
+ *
+ * For associated locks which do not implicitly disable preemption,
+ * preemption protection is enforced in the write side function.
+ *
+ * Lockdep is never used in any for the raw write variants.
+ *
+ * See Documentation/locking/seqlock.rst
+ */
+
+#ifdef CONFIG_LOCKDEP
+#define __SEQ_LOCKDEP(expr)	expr
+#else
+#define __SEQ_LOCKDEP(expr)
+#endif
+
+#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) {			\
+	.seqcount		= SEQCNT_ZERO(seq_name.seqcount),	\
+	__SEQ_LOCKDEP(.lock	= (assoc_lock))				\
+}
+
+#define seqcount_locktype_init(s, assoc_lock)				\
+do {									\
+	seqcount_init(&(s)->seqcount);					\
+	__SEQ_LOCKDEP((s)->lock = (assoc_lock));			\
+} while (0)
+
+/**
+ * typedef seqcount_spinlock_t - sequence counter with spinlock associated
+ * @seqcount:	The real sequence counter
+ * @lock:	Pointer to the associated spinlock
+ *
+ * A plain sequence counter with external writer synchronization by a
+ * spinlock. The spinlock is associated to the sequence count in the
+ * static initializer or init function. This enables lockdep to validate
+ * that the write side critical section is properly serialized.
+ */
+typedef struct seqcount_spinlock {
+	seqcount_t	seqcount;
+	__SEQ_LOCKDEP(spinlock_t	*lock);
+} seqcount_spinlock_t;
+
+/**
+ * SEQCNT_SPINLOCK_ZERO - static initializer for seqcount_spinlock_t
+ * @name:	Name of the seqcount_spinlock_t instance
+ * @lock:	Pointer to the associated spinlock
+ */
+#define SEQCNT_SPINLOCK_ZERO(name, lock)				\
+	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
+
+/**
+ * seqcount_spinlock_init - runtime initializer for seqcount_spinlock_t
+ * @s:		Pointer to the seqcount_spinlock_t instance
+ * @lock:	Pointer to the associated spinlock
+ */
+#define seqcount_spinlock_init(s, lock)					\
+	seqcount_locktype_init(s, lock)
+
+/**
+ * typedef seqcount_raw_spinlock_t - sequence count with raw spinlock associated
+ * @seqcount:	The real sequence counter
+ * @lock:	Pointer to the associated raw spinlock
+ *
+ * A plain sequence counter with external writer synchronization by a
+ * raw spinlock. The raw spinlock is associated to the sequence count in
+ * the static initializer or init function. This enables lockdep to
+ * validate that the write side critical section is properly serialized.
+ */
+typedef struct seqcount_raw_spinlock {
+	seqcount_t      seqcount;
+	__SEQ_LOCKDEP(raw_spinlock_t	*lock);
+} seqcount_raw_spinlock_t;
+
+/**
+ * SEQCNT_RAW_SPINLOCK_ZERO - static initializer for seqcount_raw_spinlock_t
+ * @name:	Name of the seqcount_raw_spinlock_t instance
+ * @lock:	Pointer to the associated raw_spinlock
+ */
+#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock)				\
+	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
+
+/**
+ * seqcount_raw_spinlock_init - runtime initializer for seqcount_raw_spinlock_t
+ * @s:		Pointer to the seqcount_raw_spinlock_t instance
+ * @lock:	Pointer to the associated raw_spinlock
+ */
+#define seqcount_raw_spinlock_init(s, lock)				\
+	seqcount_locktype_init(s, lock)
+
+/**
+ * typedef seqcount_rwlock_t - sequence count with rwlock associated
+ * @seqcount:	The real sequence counter
+ * @lock:	Pointer to the associated rwlock
+ *
+ * A plain sequence counter with external writer synchronization by a
+ * rwlock. The rwlock is associated to the sequence count in the static
+ * initializer or init function. This enables lockdep to validate that
+ * the write side critical section is properly serialized.
+ */
+typedef struct seqcount_rwlock {
+	seqcount_t      seqcount;
+	__SEQ_LOCKDEP(rwlock_t		*lock);
+} seqcount_rwlock_t;
+
+/**
+ * SEQCNT_RWLOCK_ZERO - static initializer for seqcount_rwlock_t
+ * @name:	Name of the seqcount_rwlock_t instance
+ * @lock:	Pointer to the associated rwlock
+ */
+#define SEQCNT_RWLOCK_ZERO(name, lock)					\
+	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
+
+/**
+ * seqcount_rwlock_init - runtime initializer for seqcount_rwlock_t
+ * @s:		Pointer to the seqcount_rwlock_t instance
+ * @lock:	Pointer to the associated rwlock
+ */
+#define seqcount_rwlock_init(s, lock)					\
+	seqcount_locktype_init(s, lock)
+
+/**
+ * typedef seqcount_mutex_t - sequence count with mutex associated
+ * @seqcount:	The real sequence counter
+ * @lock:	Pointer to the associated mutex
+ *
+ * A plain sequence counter with external writer synchronization by a
+ * mutex. The mutex is associated to the sequence counter in the static
+ * initializer or init function. This enables lockdep to validate that
+ * the write side critical section is properly serialized.
+ *
+ * The write side API functions write_seqcount_begin()/end() automatically
+ * disable and enable preemption when used with seqcount_mutex_t.
+ */
+typedef struct seqcount_mutex {
+	seqcount_t      seqcount;
+	__SEQ_LOCKDEP(struct mutex	*lock);
+} seqcount_mutex_t;
+
+/**
+ * SEQCNT_MUTEX_ZERO - static initializer for seqcount_mutex_t
+ * @name:	Name of the seqcount_mutex_t instance
+ * @lock:	Pointer to the associated mutex
+ */
+#define SEQCNT_MUTEX_ZERO(name, lock)					\
+	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
+
+/**
+ * seqcount_mutex_init - runtime initializer for seqcount_mutex_t
+ * @s:		Pointer to the seqcount_mutex_t instance
+ * @lock:	Pointer to the associated mutex
+ */
+#define seqcount_mutex_init(s, lock)					\
+	seqcount_locktype_init(s, lock)
+
+/**
+ * typedef seqcount_ww_mutex_t - sequence count with ww_mutex associated
+ * @seqcount:	The real sequence counter
+ * @lock:	Pointer to the associated ww_mutex
+ *
+ * A plain sequence counter with external writer synchronization by a
+ * ww_mutex. The ww_mutex is associated to the sequence counter in the static
+ * initializer or init function. This enables lockdep to validate that
+ * the write side critical section is properly serialized.
+ *
+ * The write side API functions write_seqcount_begin()/end() automatically
+ * disable and enable preemption when used with seqcount_ww_mutex_t.
+ */
+typedef struct seqcount_ww_mutex {
+	seqcount_t      seqcount;
+	__SEQ_LOCKDEP(struct ww_mutex	*lock);
+} seqcount_ww_mutex_t;
+
+/**
+ * SEQCNT_WW_MUTEX_ZERO - static initializer for seqcount_ww_mutex_t
+ * @name:	Name of the seqcount_ww_mutex_t instance
+ * @lock:	Pointer to the associated ww_mutex
+ */
+#define SEQCNT_WW_MUTEX_ZERO(name, lock)				\
+	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
+
+/**
+ * seqcount_ww_mutex_init - runtime initializer for seqcount_ww_mutex_t
+ * @s:		Pointer to the seqcount_ww_mutex_t instance
+ * @lock:	Pointer to the associated ww_mutex
+ */
+#define seqcount_ww_mutex_init(s, lock)					\
+	seqcount_locktype_init(s, lock)
+
+/*
+ * @preempt: Is the associated write serialization lock preemtpible?
+ */
+#define SEQCOUNT_LOCKTYPE(locktype, preempt, lockmember)		\
+static inline seqcount_t *						\
+__seqcount_##locktype##_ptr(seqcount_##locktype##_t *s)			\
+{									\
+	return &s->seqcount;						\
+}									\
+									\
+static inline bool							\
+__seqcount_##locktype##_preemptible(seqcount_##locktype##_t *s)		\
+{									\
+	return preempt;							\
+}									\
+									\
+static inline void							\
+__seqcount_##locktype##_assert(seqcount_##locktype##_t *s)		\
+{									\
+	__SEQ_LOCKDEP(lockdep_assert_held(lockmember));			\
+}
+
+/*
+ * Similar hooks, but for plain seqcount_t
+ */
+
+static inline seqcount_t *__seqcount_ptr(seqcount_t *s)
+{
+	return s;
+}
+
+static inline bool __seqcount_preemptible(seqcount_t *s)
+{
+	return false;
+}
+
+static inline void __seqcount_assert(seqcount_t *s)
+{
+	lockdep_assert_preemption_disabled();
+}
+
+/*
+ * @s: Pointer to seqcount_locktype_t, generated hooks first parameter.
+ */
+SEQCOUNT_LOCKTYPE(raw_spinlock,	false,	s->lock)
+SEQCOUNT_LOCKTYPE(spinlock,	false,	s->lock)
+SEQCOUNT_LOCKTYPE(rwlock,	false,	s->lock)
+SEQCOUNT_LOCKTYPE(mutex,	true,	s->lock)
+SEQCOUNT_LOCKTYPE(ww_mutex,	true,	&s->lock->base)
+
+#define __seqprop_case(s, locktype, prop)				\
+	seqcount_##locktype##_t: __seqcount_##locktype##_##prop((void *)(s))
+
+#define __seqprop(s, prop) _Generic(*(s),				\
+	seqcount_t:		__seqcount_##prop((void *)(s)),		\
+	__seqprop_case((s),	raw_spinlock,	prop),			\
+	__seqprop_case((s),	spinlock,	prop),			\
+	__seqprop_case((s),	rwlock,		prop),			\
+	__seqprop_case((s),	mutex,		prop),			\
+	__seqprop_case((s),	ww_mutex,	prop))
+
+#define __to_seqcount_t(s)				__seqprop(s, ptr)
+#define __associated_lock_exists_and_is_preemptible(s)	__seqprop(s, preemptible)
+#define __assert_write_section_is_protected(s)		__seqprop(s, assert)
+
 /**
  * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -122,7 +388,10 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  *
  * Return: count to be passed to read_seqcount_retry()
  */
-static inline unsigned __read_seqcount_begin(const seqcount_t *s)
+#define __read_seqcount_begin(s)					\
+	__read_seqcount_t_begin(__to_seqcount_t(s))
+
+static inline unsigned __read_seqcount_t_begin(const seqcount_t *s)
 {
 	unsigned ret;
 
@@ -138,32 +407,38 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s)
 
 /**
  * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * Return: count to be passed to read_seqcount_retry()
  */
-static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
+#define raw_read_seqcount_begin(s)					\
+	raw_read_seqcount_t_begin(__to_seqcount_t(s))
+
+static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
 {
-	unsigned ret = __read_seqcount_begin(s);
+	unsigned ret = __read_seqcount_t_begin(s);
 	smp_rmb();
 	return ret;
 }
 
 /**
  * read_seqcount_begin() - begin a seqcount_t read critical section
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * Return: count to be passed to read_seqcount_retry()
  */
-static inline unsigned read_seqcount_begin(const seqcount_t *s)
+#define read_seqcount_begin(s)						\
+	read_seqcount_t_begin(__to_seqcount_t(s))
+
+static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
 {
 	seqcount_lockdep_reader_access(s);
-	return raw_read_seqcount_begin(s);
+	return raw_read_seqcount_t_begin(s);
 }
 
 /**
  * raw_read_seqcount() - read the raw seqcount_t counter value
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * raw_read_seqcount opens a read critical section of the given
  * seqcount_t, without any lockdep checking, and without checking or
@@ -172,7 +447,10 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
  *
  * Return: count to be passed to read_seqcount_retry()
  */
-static inline unsigned raw_read_seqcount(const seqcount_t *s)
+#define raw_read_seqcount(s)						\
+	raw_read_seqcount_t(__to_seqcount_t(s))
+
+static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
 {
 	unsigned ret = READ_ONCE(s->sequence);
 	smp_rmb();
@@ -183,7 +461,7 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s)
 /**
  * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
  *                        lockdep and w/o counter stabilization
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * raw_seqcount_begin opens a read critical section of the given
  * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
@@ -197,18 +475,21 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s)
  *
  * Return: count to be passed to read_seqcount_retry()
  */
-static inline unsigned raw_seqcount_begin(const seqcount_t *s)
+#define raw_seqcount_begin(s)						\
+	raw_seqcount_t_begin(__to_seqcount_t(s))
+
+static inline unsigned raw_seqcount_t_begin(const seqcount_t *s)
 {
 	/*
 	 * If the counter is odd, let read_seqcount_retry() fail
 	 * by decrementing the counter.
 	 */
-	return raw_read_seqcount(s) & ~1;
+	return raw_read_seqcount_t(s) & ~1;
 }
 
 /**
  * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  * @start: count, from read_seqcount_begin()
  *
  * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
@@ -221,7 +502,10 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s)
  *
  * Return: true if a read section retry is required, else false
  */
-static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
+#define __read_seqcount_retry(s, start)					\
+	__read_seqcount_t_retry(__to_seqcount_t(s), start)
+
+static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 {
 	kcsan_atomic_next(0);
 	return unlikely(READ_ONCE(s->sequence) != start);
@@ -229,7 +513,7 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
 
 /**
  * read_seqcount_retry() - end a seqcount_t read critical section
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  * @start: count, from read_seqcount_begin()
  *
  * read_seqcount_retry closes the read critical section of given
@@ -238,17 +522,28 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
  *
  * Return: true if a read section retry is required, else false
  */
-static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
+#define read_seqcount_retry(s, start)					\
+	read_seqcount_t_retry(__to_seqcount_t(s), start)
+
+static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 {
 	smp_rmb();
-	return __read_seqcount_retry(s, start);
+	return __read_seqcount_t_retry(s, start);
 }
 
 /**
  * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  */
-static inline void raw_write_seqcount_begin(seqcount_t *s)
+#define raw_write_seqcount_begin(s)					\
+do {									\
+	if (__associated_lock_exists_and_is_preemptible(s))		\
+		preempt_disable();					\
+									\
+	raw_write_seqcount_t_begin(__to_seqcount_t(s));			\
+} while (0)
+
+static inline void raw_write_seqcount_t_begin(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
 	s->sequence++;
@@ -257,49 +552,50 @@ static inline void raw_write_seqcount_begin(seqcount_t *s)
 
 /**
  * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  */
-static inline void raw_write_seqcount_end(seqcount_t *s)
+#define raw_write_seqcount_end(s)					\
+do {									\
+	raw_write_seqcount_t_end(__to_seqcount_t(s));			\
+									\
+	if (__associated_lock_exists_and_is_preemptible(s))		\
+		preempt_enable();					\
+} while (0)
+
+static inline void raw_write_seqcount_t_end(seqcount_t *s)
 {
 	smp_wmb();
 	s->sequence++;
 	kcsan_nestable_atomic_end();
 }
 
-static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass)
-{
-	raw_write_seqcount_begin(s);
-	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
-}
-
 /**
  * write_seqcount_begin_nested() - start a seqcount_t write section with
  *                                 custom lockdep nesting level
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  * @subclass: lockdep nesting level
  *
  * See Documentation/locking/lockdep-design.rst
  */
-static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
-{
-	lockdep_assert_preemption_disabled();
-	__write_seqcount_begin_nested(s, subclass);
-}
+#define write_seqcount_begin_nested(s, subclass)			\
+do {									\
+	__assert_write_section_is_protected(s);				\
+									\
+	if (__associated_lock_exists_and_is_preemptible(s))		\
+		preempt_disable();					\
+									\
+	write_seqcount_t_begin_nested(__to_seqcount_t(s), subclass);	\
+} while (0)
 
-/*
- * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks.
- *
- * Use for internal seqlock.h code where it's known that preemption is
- * already disabled. For example, seqlock_t write side functions.
- */
-static inline void __write_seqcount_begin(seqcount_t *s)
+static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
 {
-	__write_seqcount_begin_nested(s, 0);
+	raw_write_seqcount_t_begin(s);
+	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
 }
 
 /**
  * write_seqcount_begin() - start a seqcount_t write side critical section
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * write_seqcount_begin opens a write side critical section of the given
  * seqcount_t.
@@ -308,26 +604,44 @@ static inline void __write_seqcount_begin(seqcount_t *s)
  * non-preemptible. If readers can be invoked from hardirq or softirq
  * context, interrupts or bottom halves must be respectively disabled.
  */
-static inline void write_seqcount_begin(seqcount_t *s)
+#define write_seqcount_begin(s)						\
+do {									\
+	__assert_write_section_is_protected(s);				\
+									\
+	if (__associated_lock_exists_and_is_preemptible(s))		\
+		preempt_disable();					\
+									\
+	write_seqcount_t_begin(__to_seqcount_t(s));			\
+} while (0)
+
+static inline void write_seqcount_t_begin(seqcount_t *s)
 {
-	write_seqcount_begin_nested(s, 0);
+	write_seqcount_t_begin_nested(s, 0);
 }
 
 /**
  * write_seqcount_end() - end a seqcount_t write side critical section
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * The write section must've been opened with write_seqcount_begin().
  */
-static inline void write_seqcount_end(seqcount_t *s)
+#define write_seqcount_end(s)						\
+do {									\
+	write_seqcount_t_end(__to_seqcount_t(s));			\
+									\
+	if (__associated_lock_exists_and_is_preemptible(s))		\
+		preempt_enable();					\
+} while (0)
+
+static inline void write_seqcount_t_end(seqcount_t *s)
 {
 	seqcount_release(&s->dep_map, _RET_IP_);
-	raw_write_seqcount_end(s);
+	raw_write_seqcount_t_end(s);
 }
 
 /**
  * raw_write_seqcount_barrier() - do a seqcount_t write barrier
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * This can be used to provide an ordering guarantee instead of the usual
  * consistency guarantee. It is one wmb cheaper, because it can collapse
@@ -366,7 +680,10 @@ static inline void write_seqcount_end(seqcount_t *s)
  *		WRITE_ONCE(X, false);
  *      }
  */
-static inline void raw_write_seqcount_barrier(seqcount_t *s)
+#define raw_write_seqcount_barrier(s)					\
+	raw_write_seqcount_t_barrier(__to_seqcount_t(s))
+
+static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
 	s->sequence++;
@@ -378,12 +695,15 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s)
 /**
  * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
  *                               side operations
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * After write_seqcount_invalidate, no seqcount_t read side operations
  * will complete successfully and see data older than this.
  */
-static inline void write_seqcount_invalidate(seqcount_t *s)
+#define write_seqcount_invalidate(s)					\
+	write_seqcount_t_invalidate(__to_seqcount_t(s))
+
+static inline void write_seqcount_t_invalidate(seqcount_t *s)
 {
 	smp_wmb();
 	kcsan_nestable_atomic_begin();
@@ -393,7 +713,7 @@ static inline void write_seqcount_invalidate(seqcount_t *s)
 
 /**
  * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * Use seqcount_t latching to switch between two storage places protected
  * by a sequence counter. Doing so allows having interruptible, preemptible,
@@ -406,7 +726,10 @@ static inline void write_seqcount_invalidate(seqcount_t *s)
  * picking which data copy to read. The full counter value must then be
  * checked with read_seqcount_retry().
  */
-static inline int raw_read_seqcount_latch(seqcount_t *s)
+#define raw_read_seqcount_latch(s)					\
+	raw_read_seqcount_t_latch(__to_seqcount_t(s))
+
+static inline int raw_read_seqcount_t_latch(seqcount_t *s)
 {
 	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
 	int seq = READ_ONCE(s->sequence); /* ^^^ */
@@ -415,7 +738,7 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
 
 /**
  * raw_write_seqcount_latch() - redirect readers to even/odd copy
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * The latch technique is a multiversion concurrency control method that allows
  * queries during non-atomic modifications. If you can guarantee queries never
@@ -494,7 +817,10 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
  *	When data is a dynamic data structure; one should use regular RCU
  *	patterns to manage the lifetimes of the objects within.
  */
-static inline void raw_write_seqcount_latch(seqcount_t *s)
+#define raw_write_seqcount_latch(s)					\
+	raw_write_seqcount_t_latch(__to_seqcount_t(s))
+
+static inline void raw_write_seqcount_t_latch(seqcount_t *s)
 {
        smp_wmb();      /* prior stores before incrementing "sequence" */
        s->sequence++;
@@ -592,7 +918,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 static inline void write_seqlock(seqlock_t *sl)
 {
 	spin_lock(&sl->lock);
-	__write_seqcount_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount);
 }
 
 /**
@@ -604,7 +930,7 @@ static inline void write_seqlock(seqlock_t *sl)
  */
 static inline void write_sequnlock(seqlock_t *sl)
 {
-	write_seqcount_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount);
 	spin_unlock(&sl->lock);
 }
 
@@ -618,7 +944,7 @@ static inline void write_sequnlock(seqlock_t *sl)
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
-	__write_seqcount_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount);
 }
 
 /**
@@ -631,7 +957,7 @@ static inline void write_seqlock_bh(seqlock_t *sl)
  */
 static inline void write_sequnlock_bh(seqlock_t *sl)
 {
-	write_seqcount_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount);
 	spin_unlock_bh(&sl->lock);
 }
 
@@ -645,7 +971,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
-	__write_seqcount_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount);
 }
 
 /**
@@ -657,7 +983,7 @@ static inline void write_seqlock_irq(seqlock_t *sl)
  */
 static inline void write_sequnlock_irq(seqlock_t *sl)
 {
-	write_seqcount_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount);
 	spin_unlock_irq(&sl->lock);
 }
 
@@ -666,7 +992,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sl->lock, flags);
-	__write_seqcount_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount);
 	return flags;
 }
 
@@ -695,13 +1021,13 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 static inline void
 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 {
-	write_seqcount_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount);
 	spin_unlock_irqrestore(&sl->lock, flags);
 }
 
 /**
  * read_seqlock_excl() - begin a seqlock_t locking reader section
- * @sl: Pointer to seqlock_t
+ * @sl:	Pointer to seqlock_t
  *
  * read_seqlock_excl opens a seqlock_t locking reader critical section.  A
  * locking reader exclusively locks out *both* other writers *and* other
-- 
2.20.1


^ permalink raw reply	[relevance 36%]

* [PATCH v4 10/24] seqlock: Align multi-line macros newline escapes at 72 columns
                       ` (4 preceding siblings ...)
  2020-07-20 15:55 36%   ` [PATCH v4 09/24] seqlock: Extend seqcount API with associated locks Ahmed S. Darwish
@ 2020-07-20 15:55 96%   ` Ahmed S. Darwish
  2020-07-29 14:33 82%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 91%   ` [PATCH v4 11/24] dma-buf: Remove custom seqcount lockdep class key Ahmed S. Darwish
                     ` (14 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish

Parent commit, "seqlock: Extend seqcount API with associated locks",
introduced a big number of multi-line macros that are newline-escaped
at 72 columns.

For overall cohesion, align the earlier-existing macros similarly.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 8c16a494c968..b48729988325 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -80,17 +80,18 @@ static inline void __seqcount_init(seqcount_t *s, const char *name,
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define SEQCOUNT_DEP_MAP_INIT(lockname) \
-		.dep_map = { .name = #lockname } \
+
+# define SEQCOUNT_DEP_MAP_INIT(lockname)				\
+		.dep_map = { .name = #lockname }
 
 /**
  * seqcount_init() - runtime initializer for seqcount_t
  * @s: Pointer to the seqcount_t instance
  */
-# define seqcount_init(s)				\
-	do {						\
-		static struct lock_class_key __key;	\
-		__seqcount_init((s), #s, &__key);	\
+# define seqcount_init(s)						\
+	do {								\
+		static struct lock_class_key __key;			\
+		__seqcount_init((s), #s, &__key);			\
 	} while (0)
 
 static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
@@ -842,20 +843,20 @@ typedef struct {
 	spinlock_t lock;
 } seqlock_t;
 
-#define __SEQLOCK_UNLOCKED(lockname)			\
-	{						\
-		.seqcount = SEQCNT_ZERO(lockname),	\
-		.lock =	__SPIN_LOCK_UNLOCKED(lockname)	\
+#define __SEQLOCK_UNLOCKED(lockname)					\
+	{								\
+		.seqcount = SEQCNT_ZERO(lockname),			\
+		.lock =	__SPIN_LOCK_UNLOCKED(lockname)			\
 	}
 
 /**
  * seqlock_init() - dynamic initializer for seqlock_t
  * @sl: Pointer to the seqlock_t instance
  */
-#define seqlock_init(sl)				\
-	do {						\
-		seqcount_init(&(sl)->seqcount);		\
-		spin_lock_init(&(sl)->lock);		\
+#define seqlock_init(sl)						\
+	do {								\
+		seqcount_init(&(sl)->seqcount);				\
+		spin_lock_init(&(sl)->lock);				\
 	} while (0)
 
 /**
-- 
2.20.1


^ permalink raw reply	[relevance 96%]

* [PATCH v4 17/24] timekeeping: Use sequence counter with associated raw spinlock
                       ` (11 preceding siblings ...)
  2020-07-20 15:55 89%   ` [PATCH v4 16/24] xfrm: policy: Use sequence counters with associated lock Ahmed S. Darwish
@ 2020-07-20 15:55 82%   ` Ahmed S. Darwish
  2020-07-29 14:33 69%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 88%   ` [PATCH v4 18/24] vfs: Use sequence counter with associated spinlock Ahmed S. Darwish
                     ` (7 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, John Stultz,
	Stephen Boyd

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_raw_spinlock_t data type, which allows to associate
a raw spinlock with the sequence counter. This enables lockdep to verify
that the raw spinlock used for writer serialization is held when the
write side critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 kernel/time/timekeeping.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d20d489841c8..05ecfd8a3314 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -39,18 +39,19 @@ enum timekeeping_adv_mode {
 	TK_ADV_FREQ
 };
 
+static DEFINE_RAW_SPINLOCK(timekeeper_lock);
+
 /*
  * The most important data for readout fits into a single 64 byte
  * cache line.
  */
 static struct {
-	seqcount_t		seq;
+	seqcount_raw_spinlock_t	seq;
 	struct timekeeper	timekeeper;
 } tk_core ____cacheline_aligned = {
-	.seq = SEQCNT_ZERO(tk_core.seq),
+	.seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_core.seq, &timekeeper_lock),
 };
 
-static DEFINE_RAW_SPINLOCK(timekeeper_lock);
 static struct timekeeper shadow_timekeeper;
 
 /**
@@ -63,7 +64,7 @@ static struct timekeeper shadow_timekeeper;
  * See @update_fast_timekeeper() below.
  */
 struct tk_fast {
-	seqcount_t		seq;
+	seqcount_raw_spinlock_t	seq;
 	struct tk_read_base	base[2];
 };
 
@@ -80,11 +81,13 @@ static struct clocksource dummy_clock = {
 };
 
 static struct tk_fast tk_fast_mono ____cacheline_aligned = {
+	.seq     = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_mono.seq, &timekeeper_lock),
 	.base[0] = { .clock = &dummy_clock, },
 	.base[1] = { .clock = &dummy_clock, },
 };
 
 static struct tk_fast tk_fast_raw  ____cacheline_aligned = {
+	.seq     = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_raw.seq, &timekeeper_lock),
 	.base[0] = { .clock = &dummy_clock, },
 	.base[1] = { .clock = &dummy_clock, },
 };
@@ -157,7 +160,7 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
  * tk_clock_read - atomic clocksource read() helper
  *
  * This helper is necessary to use in the read paths because, while the
- * seqlock ensures we don't return a bad value while structures are updated,
+ * seqcount ensures we don't return a bad value while structures are updated,
  * it doesn't protect from potential crashes. There is the possibility that
  * the tkr's clocksource may change between the read reference, and the
  * clock reference passed to the read function.  This can cause crashes if
@@ -222,10 +225,10 @@ static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
 	unsigned int seq;
 
 	/*
-	 * Since we're called holding a seqlock, the data may shift
+	 * Since we're called holding a seqcount, the data may shift
 	 * under us while we're doing the calculation. This can cause
 	 * false positives, since we'd note a problem but throw the
-	 * results away. So nest another seqlock here to atomically
+	 * results away. So nest another seqcount here to atomically
 	 * grab the points we are checking with.
 	 */
 	do {
@@ -486,7 +489,7 @@ EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
  *
  * To keep it NMI safe since we're accessing from tracing, we're not using a
  * separate timekeeper with updates to monotonic clock and boot offset
- * protected with seqlocks. This has the following minor side effects:
+ * protected with seqcounts. This has the following minor side effects:
  *
  * (1) Its possible that a timestamp be taken after the boot offset is updated
  * but before the timekeeper is updated. If this happens, the new boot offset
-- 
2.20.1


^ permalink raw reply	[relevance 82%]

* [PATCH v4 18/24] vfs: Use sequence counter with associated spinlock
                       ` (12 preceding siblings ...)
  2020-07-20 15:55 82%   ` [PATCH v4 17/24] timekeeping: Use sequence counter with associated raw spinlock Ahmed S. Darwish
@ 2020-07-20 15:55 88%   ` Ahmed S. Darwish
  2020-07-29 14:33 76%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 94%   ` [PATCH v4 19/24] raid5: " Ahmed S. Darwish
                     ` (6 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Alexander Viro,
	Mauro Carvalho Chehab, Jonathan Corbet, linux-fsdevel

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 fs/dcache.c               | 2 +-
 fs/fs_struct.c            | 4 ++--
 include/linux/dcache.h    | 2 +-
 include/linux/fs_struct.h | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 361ea7ab30ea..ea0485861d93 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1746,7 +1746,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 	dentry->d_lockref.count = 1;
 	dentry->d_flags = 0;
 	spin_lock_init(&dentry->d_lock);
-	seqcount_init(&dentry->d_seq);
+	seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock);
 	dentry->d_inode = NULL;
 	dentry->d_parent = dentry;
 	dentry->d_sb = sb;
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index ca639ed967b7..04b3f5b9c629 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -117,7 +117,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 		fs->users = 1;
 		fs->in_exec = 0;
 		spin_lock_init(&fs->lock);
-		seqcount_init(&fs->seq);
+		seqcount_spinlock_init(&fs->seq, &fs->lock);
 		fs->umask = old->umask;
 
 		spin_lock(&old->lock);
@@ -163,6 +163,6 @@ EXPORT_SYMBOL(current_umask);
 struct fs_struct init_fs = {
 	.users		= 1,
 	.lock		= __SPIN_LOCK_UNLOCKED(init_fs.lock),
-	.seq		= SEQCNT_ZERO(init_fs.seq),
+	.seq		= SEQCNT_SPINLOCK_ZERO(init_fs.seq, &init_fs.lock),
 	.umask		= 0022,
 };
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index a81f0c3cf352..65d975bf9390 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -89,7 +89,7 @@ extern struct dentry_stat_t dentry_stat;
 struct dentry {
 	/* RCU lookup touched fields */
 	unsigned int d_flags;		/* protected by d_lock */
-	seqcount_t d_seq;		/* per dentry seqlock */
+	seqcount_spinlock_t d_seq;	/* per dentry seqlock */
 	struct hlist_bl_node d_hash;	/* lookup hash list */
 	struct dentry *d_parent;	/* parent directory */
 	struct qstr d_name;
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index cf1015abfbf2..783b48dedb72 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -9,7 +9,7 @@
 struct fs_struct {
 	int users;
 	spinlock_t lock;
-	seqcount_t seq;
+	seqcount_spinlock_t seq;
 	int umask;
 	int in_exec;
 	struct path root, pwd;
-- 
2.20.1


^ permalink raw reply	[relevance 88%]

* [PATCH v4 21/24] NFSv4: Use sequence counter with associated spinlock
                       ` (15 preceding siblings ...)
  2020-07-20 15:55 94%   ` [PATCH v4 20/24] iocost: " Ahmed S. Darwish
@ 2020-07-20 15:55 95%   ` Ahmed S. Darwish
  2020-07-29 14:33 82%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 95%   ` [PATCH v4 22/24] userfaultfd: " Ahmed S. Darwish
                     ` (3 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Trond Myklebust,
	Anna Schumaker, linux-nfs

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 fs/nfs/nfs4_fs.h   | 2 +-
 fs/nfs/nfs4state.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 2b7f6dcd2eb8..210e590e1f71 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -117,7 +117,7 @@ struct nfs4_state_owner {
 	unsigned long	     so_flags;
 	struct list_head     so_states;
 	struct nfs_seqid_counter so_seqid;
-	seqcount_t	     so_reclaim_seqcount;
+	seqcount_spinlock_t  so_reclaim_seqcount;
 	struct mutex	     so_delegreturn_mutex;
 };
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a8dc25ce48bb..b1dba24918f8 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -509,7 +509,7 @@ nfs4_alloc_state_owner(struct nfs_server *server,
 	nfs4_init_seqid_counter(&sp->so_seqid);
 	atomic_set(&sp->so_count, 1);
 	INIT_LIST_HEAD(&sp->so_lru);
-	seqcount_init(&sp->so_reclaim_seqcount);
+	seqcount_spinlock_init(&sp->so_reclaim_seqcount, &sp->so_lock);
 	mutex_init(&sp->so_delegreturn_mutex);
 	return sp;
 }
-- 
2.20.1


^ permalink raw reply	[relevance 95%]

* [PATCH v4 24/24] hrtimer: Use sequence counter with associated raw spinlock
                       ` (18 preceding siblings ...)
  2020-07-20 15:55 94%   ` [PATCH v4 23/24] kvm/eventfd: " Ahmed S. Darwish
@ 2020-07-20 15:55 93%   ` Ahmed S. Darwish
  2020-07-29 14:33 79%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
    20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_raw_spinlock_t data type, which allows to associate
a raw spinlock with the sequence counter. This enables lockdep to verify
that the raw spinlock used for writer serialization is held when the
write side critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/hrtimer.h |  2 +-
 kernel/time/hrtimer.c   | 13 ++++++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 15c8ac313678..25993b86ac5c 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -159,7 +159,7 @@ struct hrtimer_clock_base {
 	struct hrtimer_cpu_base	*cpu_base;
 	unsigned int		index;
 	clockid_t		clockid;
-	seqcount_t		seq;
+	seqcount_raw_spinlock_t	seq;
 	struct hrtimer		*running;
 	struct timerqueue_head	active;
 	ktime_t			(*get_time)(void);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index d89da1c7e005..c4038511d5c9 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -135,7 +135,11 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
  * timer->base->cpu_base
  */
 static struct hrtimer_cpu_base migration_cpu_base = {
-	.clock_base = { { .cpu_base = &migration_cpu_base, }, },
+	.clock_base = { {
+		.cpu_base = &migration_cpu_base,
+		.seq      = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq,
+						     &migration_cpu_base.lock),
+	}, },
 };
 
 #define migration_base	migration_cpu_base.clock_base[0]
@@ -1998,8 +2002,11 @@ int hrtimers_prepare_cpu(unsigned int cpu)
 	int i;
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-		cpu_base->clock_base[i].cpu_base = cpu_base;
-		timerqueue_init_head(&cpu_base->clock_base[i].active);
+		struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i];
+
+		clock_b->cpu_base = cpu_base;
+		seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock);
+		timerqueue_init_head(&clock_b->active);
 	}
 
 	cpu_base->cpu = cpu;
-- 
2.20.1


^ permalink raw reply	[relevance 93%]

* [PATCH v4 23/24] kvm/eventfd: Use sequence counter with associated spinlock
                       ` (17 preceding siblings ...)
  2020-07-20 15:55 95%   ` [PATCH v4 22/24] userfaultfd: " Ahmed S. Darwish
@ 2020-07-20 15:55 94%   ` Ahmed S. Darwish
  2020-07-29 14:33 81%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 93%   ` [PATCH v4 24/24] hrtimer: Use sequence counter with associated raw spinlock Ahmed S. Darwish
    20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Paolo Bonzini

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_irqfd.h | 2 +-
 virt/kvm/eventfd.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index dc1da020305b..dac047abdba7 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -42,7 +42,7 @@ struct kvm_kernel_irqfd {
 	wait_queue_entry_t wait;
 	/* Update side is protected by irqfds.lock */
 	struct kvm_kernel_irq_routing_entry irq_entry;
-	seqcount_t irq_entry_sc;
+	seqcount_spinlock_t irq_entry_sc;
 	/* Used for level IRQ fast-path */
 	int gsi;
 	struct work_struct inject;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index ef7ed916ad4a..d6408bb497dc 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -303,7 +303,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	INIT_LIST_HEAD(&irqfd->list);
 	INIT_WORK(&irqfd->inject, irqfd_inject);
 	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
-	seqcount_init(&irqfd->irq_entry_sc);
+	seqcount_spinlock_init(&irqfd->irq_entry_sc, &kvm->irqfds.lock);
 
 	f = fdget(args->fd);
 	if (!f.file) {
-- 
2.20.1


^ permalink raw reply	[relevance 94%]

* [PATCH v4 19/24] raid5: Use sequence counter with associated spinlock
                       ` (13 preceding siblings ...)
  2020-07-20 15:55 88%   ` [PATCH v4 18/24] vfs: Use sequence counter with associated spinlock Ahmed S. Darwish
@ 2020-07-20 15:55 94%   ` Ahmed S. Darwish
  2020-07-29 14:33 80%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 94%   ` [PATCH v4 20/24] iocost: " Ahmed S. Darwish
                     ` (5 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Song Liu, linux-raid

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 drivers/md/raid5.c | 2 +-
 drivers/md/raid5.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ab8067f9ce8c..892aefe88fa7 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6935,7 +6935,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 	} else
 		goto abort;
 	spin_lock_init(&conf->device_lock);
-	seqcount_init(&conf->gen_lock);
+	seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock);
 	mutex_init(&conf->cache_size_mutex);
 	init_waitqueue_head(&conf->wait_for_quiescent);
 	init_waitqueue_head(&conf->wait_for_stripe);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index f90e0704bed9..a2c9e9e9f5ac 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -589,7 +589,7 @@ struct r5conf {
 	int			prev_chunk_sectors;
 	int			prev_algo;
 	short			generation; /* increments with every reshape */
-	seqcount_t		gen_lock;	/* lock against generation changes */
+	seqcount_spinlock_t	gen_lock;	/* lock against generation changes */
 	unsigned long		reshape_checkpoint; /* Time we last updated
 						     * metadata */
 	long long		min_offset_diff; /* minimum difference between
-- 
2.20.1


^ permalink raw reply	[relevance 94%]

* [PATCH v4 22/24] userfaultfd: Use sequence counter with associated spinlock
                       ` (16 preceding siblings ...)
  2020-07-20 15:55 95%   ` [PATCH v4 21/24] NFSv4: " Ahmed S. Darwish
@ 2020-07-20 15:55 95%   ` Ahmed S. Darwish
  2020-07-29 14:33 82%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 94%   ` [PATCH v4 23/24] kvm/eventfd: " Ahmed S. Darwish
                     ` (2 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Alexander Viro,
	linux-fsdevel

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 fs/userfaultfd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 52de29000c7e..26e8b23594fb 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -61,7 +61,7 @@ struct userfaultfd_ctx {
 	/* waitqueue head for events */
 	wait_queue_head_t event_wqh;
 	/* a refile sequence protected by fault_pending_wqh lock */
-	struct seqcount refile_seq;
+	seqcount_spinlock_t refile_seq;
 	/* pseudo fd refcounting */
 	refcount_t refcount;
 	/* userfaultfd syscall flags */
@@ -1998,7 +1998,7 @@ static void init_once_userfaultfd_ctx(void *mem)
 	init_waitqueue_head(&ctx->fault_wqh);
 	init_waitqueue_head(&ctx->event_wqh);
 	init_waitqueue_head(&ctx->fd_wqh);
-	seqcount_init(&ctx->refile_seq);
+	seqcount_spinlock_init(&ctx->refile_seq, &ctx->fault_pending_wqh.lock);
 }
 
 SYSCALL_DEFINE1(userfaultfd, int, flags)
-- 
2.20.1


^ permalink raw reply	[relevance 95%]

* [PATCH v4 20/24] iocost: Use sequence counter with associated spinlock
                       ` (14 preceding siblings ...)
  2020-07-20 15:55 94%   ` [PATCH v4 19/24] raid5: " Ahmed S. Darwish
@ 2020-07-20 15:55 94%   ` Ahmed S. Darwish
  2020-07-29 14:33 80%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 95%   ` [PATCH v4 21/24] NFSv4: " Ahmed S. Darwish
                     ` (4 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Daniel Wagner

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Reviewed-by: Daniel Wagner <dwagner@suse.de>
---
 block/blk-iocost.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 8ac4aad66ebc..8e940c27c27c 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -406,7 +406,7 @@ struct ioc {
 	enum ioc_running		running;
 	atomic64_t			vtime_rate;
 
-	seqcount_t			period_seqcount;
+	seqcount_spinlock_t		period_seqcount;
 	u32				period_at;	/* wallclock starttime */
 	u64				period_at_vtime; /* vtime starttime */
 
@@ -873,7 +873,6 @@ static void ioc_now(struct ioc *ioc, struct ioc_now *now)
 
 static void ioc_start_period(struct ioc *ioc, struct ioc_now *now)
 {
-	lockdep_assert_held(&ioc->lock);
 	WARN_ON_ONCE(ioc->running != IOC_RUNNING);
 
 	write_seqcount_begin(&ioc->period_seqcount);
@@ -2001,7 +2000,7 @@ static int blk_iocost_init(struct request_queue *q)
 
 	ioc->running = IOC_IDLE;
 	atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC);
-	seqcount_init(&ioc->period_seqcount);
+	seqcount_spinlock_init(&ioc->period_seqcount, &ioc->lock);
 	ioc->period_at = ktime_to_us(ktime_get());
 	atomic64_set(&ioc->cur_period, 0);
 	atomic_set(&ioc->hweight_gen, 0);
-- 
2.20.1


^ permalink raw reply	[relevance 94%]

* [PATCH v4 16/24] xfrm: policy: Use sequence counters with associated lock
                       ` (10 preceding siblings ...)
  2020-07-20 15:55 94%   ` [PATCH v4 15/24] netfilter: nft_set_rbtree: Use sequence counter with associated rwlock Ahmed S. Darwish
@ 2020-07-20 15:55 89%   ` Ahmed S. Darwish
  2020-07-29 14:33 77%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 82%   ` [PATCH v4 17/24] timekeeping: Use sequence counter with associated raw spinlock Ahmed S. Darwish
                     ` (8 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Steffen Klassert,
	Herbert Xu, David S. Miller, Jakub Kicinski, netdev

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. If the serialization primitive is
not disabling preemption implicitly, preemption has to be explicitly
disabled before entering the sequence counter write side critical
section.

A plain seqcount_t does not contain the information of which lock must
be held when entering a write side critical section.

Use the new seqcount_spinlock_t and seqcount_mutex_t data types instead,
which allow to associate a lock with the sequence counter. This enables
lockdep to verify that the lock used for writer serialization is held
when the write side critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 net/xfrm/xfrm_policy.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 564aa6492e7c..732a940468b0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -122,7 +122,7 @@ struct xfrm_pol_inexact_bin {
 	/* list containing '*:*' policies */
 	struct hlist_head hhead;
 
-	seqcount_t count;
+	seqcount_spinlock_t count;
 	/* tree sorted by daddr/prefix */
 	struct rb_root root_d;
 
@@ -155,7 +155,7 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
 
 static struct kmem_cache *xfrm_dst_cache __ro_after_init;
-static __read_mostly seqcount_t xfrm_policy_hash_generation;
+static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation;
 
 static struct rhashtable xfrm_policy_inexact_table;
 static const struct rhashtable_params xfrm_pol_inexact_params;
@@ -719,7 +719,7 @@ xfrm_policy_inexact_alloc_bin(const struct xfrm_policy *pol, u8 dir)
 	INIT_HLIST_HEAD(&bin->hhead);
 	bin->root_d = RB_ROOT;
 	bin->root_s = RB_ROOT;
-	seqcount_init(&bin->count);
+	seqcount_spinlock_init(&bin->count, &net->xfrm.xfrm_policy_lock);
 
 	prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table,
 						&bin->k, &bin->head,
@@ -1906,7 +1906,7 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
 
 static struct xfrm_pol_inexact_node *
 xfrm_policy_lookup_inexact_addr(const struct rb_root *r,
-				seqcount_t *count,
+				seqcount_spinlock_t *count,
 				const xfrm_address_t *addr, u16 family)
 {
 	const struct rb_node *parent;
@@ -4153,7 +4153,7 @@ void __init xfrm_init(void)
 {
 	register_pernet_subsys(&xfrm_net_ops);
 	xfrm_dev_init();
-	seqcount_init(&xfrm_policy_hash_generation);
+	seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex);
 	xfrm_input_init();
 
 #ifdef CONFIG_INET_ESPINTCP
-- 
2.20.1


^ permalink raw reply	[relevance 89%]

* [PATCH v4 15/24] netfilter: nft_set_rbtree: Use sequence counter with associated rwlock
                       ` (9 preceding siblings ...)
  2020-07-20 15:55 91%   ` [PATCH v4 14/24] netfilter: conntrack: " Ahmed S. Darwish
@ 2020-07-20 15:55 94%   ` Ahmed S. Darwish
  2020-07-29 14:33 83%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 89%   ` [PATCH v4 16/24] xfrm: policy: Use sequence counters with associated lock Ahmed S. Darwish
                     ` (9 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Pablo Neira Ayuso,
	Jozsef Kadlecsik, Florian Westphal, David S. Miller,
	Jakub Kicinski, netfilter-devel, coreteam, netdev

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_rwlock_t data type, which allows to associate a
rwlock with the sequence counter. This enables lockdep to verify that
the rwlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 net/netfilter/nft_set_rbtree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index b6aad3fc46c3..4b2834fd17b2 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -18,7 +18,7 @@
 struct nft_rbtree {
 	struct rb_root		root;
 	rwlock_t		lock;
-	seqcount_t		count;
+	seqcount_rwlock_t	count;
 	struct delayed_work	gc_work;
 };
 
@@ -523,7 +523,7 @@ static int nft_rbtree_init(const struct nft_set *set,
 	struct nft_rbtree *priv = nft_set_priv(set);
 
 	rwlock_init(&priv->lock);
-	seqcount_init(&priv->count);
+	seqcount_rwlock_init(&priv->count, &priv->lock);
 	priv->root = RB_ROOT;
 
 	INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
-- 
2.20.1


^ permalink raw reply	[relevance 94%]

* [PATCH v4 14/24] netfilter: conntrack: Use sequence counter with associated spinlock
                       ` (8 preceding siblings ...)
  2020-07-20 15:55 92%   ` [PATCH v4 13/24] sched: tasks: Use sequence counter with associated spinlock Ahmed S. Darwish
@ 2020-07-20 15:55 91%   ` Ahmed S. Darwish
  2020-07-29 14:33 80%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 94%   ` [PATCH v4 15/24] netfilter: nft_set_rbtree: Use sequence counter with associated rwlock Ahmed S. Darwish
                     ` (10 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Pablo Neira Ayuso,
	Jozsef Kadlecsik, Florian Westphal, David S. Miller,
	Jakub Kicinski, netfilter-devel, coreteam, netdev

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/net/netfilter/nf_conntrack.h | 2 +-
 net/netfilter/nf_conntrack_core.c    | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 90690e37a56f..ea4e2010b246 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -286,7 +286,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize);
 
 extern struct hlist_nulls_head *nf_conntrack_hash;
 extern unsigned int nf_conntrack_htable_size;
-extern seqcount_t nf_conntrack_generation;
+extern seqcount_spinlock_t nf_conntrack_generation;
 extern unsigned int nf_conntrack_max;
 
 /* must be called with rcu read lock held */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 79cd9dde457b..b8c54d390f93 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -180,7 +180,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 
 unsigned int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
-seqcount_t nf_conntrack_generation __read_mostly;
+seqcount_spinlock_t nf_conntrack_generation __read_mostly;
 static unsigned int nf_conntrack_hash_rnd __read_mostly;
 
 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
@@ -2598,7 +2598,8 @@ int nf_conntrack_init_start(void)
 	/* struct nf_ct_ext uses u8 to store offsets/size */
 	BUILD_BUG_ON(total_extension_size() > 255u);
 
-	seqcount_init(&nf_conntrack_generation);
+	seqcount_spinlock_init(&nf_conntrack_generation,
+			       &nf_conntrack_locks_all_lock);
 
 	for (i = 0; i < CONNTRACK_LOCKS; i++)
 		spin_lock_init(&nf_conntrack_locks[i]);
-- 
2.20.1


^ permalink raw reply	[relevance 91%]

* [PATCH v4 13/24] sched: tasks: Use sequence counter with associated spinlock
                       ` (7 preceding siblings ...)
  2020-07-20 15:55 82%   ` [PATCH v4 12/24] dma-buf: Use sequence counter with associated wound/wait mutex Ahmed S. Darwish
@ 2020-07-20 15:55 92%   ` Ahmed S. Darwish
  2020-07-29 14:33 80%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 91%   ` [PATCH v4 14/24] netfilter: conntrack: " Ahmed S. Darwish
                     ` (11 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Al Viro

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/sched.h | 2 +-
 init/init_task.c      | 3 ++-
 kernel/fork.c         | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3903a9500926..02b7fbd17bf6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1054,7 +1054,7 @@ struct task_struct {
 	/* Protected by ->alloc_lock: */
 	nodemask_t			mems_allowed;
 	/* Seqence number to catch updates: */
-	seqcount_t			mems_allowed_seq;
+	seqcount_spinlock_t		mems_allowed_seq;
 	int				cpuset_mem_spread_rotor;
 	int				cpuset_slab_spread_rotor;
 #endif
diff --git a/init/init_task.c b/init/init_task.c
index 15089d15010a..94fe3ba1bb60 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -154,7 +154,8 @@ struct task_struct init_task
 	.trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
 #endif
 #ifdef CONFIG_CPUSETS
-	.mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq),
+	.mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq,
+						 &init_task.alloc_lock),
 #endif
 #ifdef CONFIG_RT_MUTEXES
 	.pi_waiters	= RB_ROOT_CACHED,
diff --git a/kernel/fork.c b/kernel/fork.c
index 70d9d0a4de2a..fc72f09a61b2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2032,7 +2032,7 @@ static __latent_entropy struct task_struct *copy_process(
 #ifdef CONFIG_CPUSETS
 	p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
 	p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
-	seqcount_init(&p->mems_allowed_seq);
+	seqcount_spinlock_init(&p->mems_allowed_seq, &p->alloc_lock);
 #endif
 #ifdef CONFIG_TRACE_IRQFLAGS
 	p->irq_events = 0;
-- 
2.20.1


^ permalink raw reply	[relevance 92%]

* [PATCH v4 12/24] dma-buf: Use sequence counter with associated wound/wait mutex
                       ` (6 preceding siblings ...)
  2020-07-20 15:55 91%   ` [PATCH v4 11/24] dma-buf: Remove custom seqcount lockdep class key Ahmed S. Darwish
@ 2020-07-20 15:55 82%   ` Ahmed S. Darwish
  2020-07-29 14:33 69%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 92%   ` [PATCH v4 13/24] sched: tasks: Use sequence counter with associated spinlock Ahmed S. Darwish
                     ` (12 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Daniel Vetter

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. If the serialization primitive is
not disabling preemption implicitly, preemption has to be explicitly
disabled before entering the sequence counter write side critical
section.

The dma-buf reservation subsystem uses plain sequence counters to manage
updates to reservations. Writer serialization is accomplished through a
wound/wait mutex.

Acquiring a wound/wait mutex does not disable preemption, so this needs
to be done manually before and after the write side critical section.

Use the newly-added seqcount_ww_mutex_t instead:

  - It associates the ww_mutex with the sequence count, which enables
    lockdep to validate that the write side critical section is properly
    serialized.

  - It removes the need to explicitly add preempt_disable/enable()
    around the write side critical section because the write_begin/end()
    functions for this new data type automatically do this.

If lockdep is disabled this ww_mutex lock association is compiled out
and has neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/dma-buf/dma-resv.c                       | 8 +-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 --
 include/linux/dma-resv.h                         | 2 +-
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 15efa0c2dacb..a7631352a486 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -129,7 +129,7 @@ subsys_initcall(dma_resv_lockdep);
 void dma_resv_init(struct dma_resv *obj)
 {
 	ww_mutex_init(&obj->lock, &reservation_ww_class);
-	seqcount_init(&obj->seq);
+	seqcount_ww_mutex_init(&obj->seq, &obj->lock);
 
 	RCU_INIT_POINTER(obj->fence, NULL);
 	RCU_INIT_POINTER(obj->fence_excl, NULL);
@@ -260,7 +260,6 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
 	fobj = dma_resv_get_list(obj);
 	count = fobj->shared_count;
 
-	preempt_disable();
 	write_seqcount_begin(&obj->seq);
 
 	for (i = 0; i < count; ++i) {
@@ -282,7 +281,6 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
 	smp_store_mb(fobj->shared_count, count);
 
 	write_seqcount_end(&obj->seq);
-	preempt_enable();
 	dma_fence_put(old);
 }
 EXPORT_SYMBOL(dma_resv_add_shared_fence);
@@ -309,14 +307,12 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence)
 	if (fence)
 		dma_fence_get(fence);
 
-	preempt_disable();
 	write_seqcount_begin(&obj->seq);
 	/* write_seqcount_begin provides the necessary memory barrier */
 	RCU_INIT_POINTER(obj->fence_excl, fence);
 	if (old)
 		old->shared_count = 0;
 	write_seqcount_end(&obj->seq);
-	preempt_enable();
 
 	/* inplace update, no shared fences */
 	while (i--)
@@ -394,13 +390,11 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
 	src_list = dma_resv_get_list(dst);
 	old = dma_resv_get_excl(dst);
 
-	preempt_disable();
 	write_seqcount_begin(&dst->seq);
 	/* write_seqcount_begin provides the necessary memory barrier */
 	RCU_INIT_POINTER(dst->fence_excl, new);
 	RCU_INIT_POINTER(dst->fence, dst_list);
 	write_seqcount_end(&dst->seq);
-	preempt_enable();
 
 	dma_resv_list_free(src_list);
 	dma_fence_put(old);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b91b5171270f..ff4b583cb96a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -258,11 +258,9 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
 	new->shared_count = k;
 
 	/* Install the new fence list, seqcount provides the barriers */
-	preempt_disable();
 	write_seqcount_begin(&resv->seq);
 	RCU_INIT_POINTER(resv->fence, new);
 	write_seqcount_end(&resv->seq);
-	preempt_enable();
 
 	/* Drop the references to the removed fences or move them to ef_list */
 	for (i = j, k = 0; i < old->shared_count; ++i) {
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index a6538ae7d93f..d44a77e8a7e3 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -69,7 +69,7 @@ struct dma_resv_list {
  */
 struct dma_resv {
 	struct ww_mutex lock;
-	seqcount_t seq;
+	seqcount_ww_mutex_t seq;
 
 	struct dma_fence __rcu *fence_excl;
 	struct dma_resv_list __rcu *fence;
-- 
2.20.1


^ permalink raw reply	[relevance 82%]

* [PATCH v4 11/24] dma-buf: Remove custom seqcount lockdep class key
                       ` (5 preceding siblings ...)
  2020-07-20 15:55 96%   ` [PATCH v4 10/24] seqlock: Align multi-line macros newline escapes at 72 columns Ahmed S. Darwish
@ 2020-07-20 15:55 91%   ` Ahmed S. Darwish
  2020-07-29 14:33 77%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 82%   ` [PATCH v4 12/24] dma-buf: Use sequence counter with associated wound/wait mutex Ahmed S. Darwish
                     ` (13 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Daniel Vetter

Commit 3c3b177a9369 ("reservation: add support for read-only access
using rcu") introduced a sequence counter to manage updates to
reservations. Back then, the reservation object initializer
reservation_object_init() was always inlined.

Having the sequence counter initialization inlined meant that each of
the call sites would have a different lockdep class key, which would've
broken lockdep's deadlock detection. The aforementioned commit thus
introduced, and exported, a custom seqcount lockdep class key and name.

The commit 8735f16803f00 ("dma-buf: cleanup reservation_object_init...")
transformed the reservation object initializer to a normal non-inlined C
function. seqcount_init(), which automatically defines the seqcount
lockdep class key and must be called non-inlined, can now be safely used.

Remove the seqcount custom lockdep class key, name, and export. Use
seqcount_init() inside the dma reservation object initializer.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/dma-buf/dma-resv.c | 9 +--------
 include/linux/dma-resv.h   | 2 --
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index b45f8514dc82..15efa0c2dacb 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -51,12 +51,6 @@
 DEFINE_WD_CLASS(reservation_ww_class);
 EXPORT_SYMBOL(reservation_ww_class);
 
-struct lock_class_key reservation_seqcount_class;
-EXPORT_SYMBOL(reservation_seqcount_class);
-
-const char reservation_seqcount_string[] = "reservation_seqcount";
-EXPORT_SYMBOL(reservation_seqcount_string);
-
 /**
  * dma_resv_list_alloc - allocate fence list
  * @shared_max: number of fences we need space for
@@ -135,9 +129,8 @@ subsys_initcall(dma_resv_lockdep);
 void dma_resv_init(struct dma_resv *obj)
 {
 	ww_mutex_init(&obj->lock, &reservation_ww_class);
+	seqcount_init(&obj->seq);
 
-	__seqcount_init(&obj->seq, reservation_seqcount_string,
-			&reservation_seqcount_class);
 	RCU_INIT_POINTER(obj->fence, NULL);
 	RCU_INIT_POINTER(obj->fence_excl, NULL);
 }
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index ee50d10f052b..a6538ae7d93f 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -46,8 +46,6 @@
 #include <linux/rcupdate.h>
 
 extern struct ww_class reservation_ww_class;
-extern struct lock_class_key reservation_seqcount_class;
-extern const char reservation_seqcount_string[];
 
 /**
  * struct dma_resv_list - a list of shared fences
-- 
2.20.1


^ permalink raw reply	[relevance 91%]

* [PATCH v4 04/24] seqlock: Reorder seqcount_t and seqlock_t API definitions
    2020-07-20 15:55 54%   ` [PATCH v4 01/24] Documentation: locking: Describe seqlock design and usage Ahmed S. Darwish
  2020-07-20 15:55 75%   ` [PATCH v4 02/24] seqlock: Properly format kernel-doc code samples Ahmed S. Darwish
@ 2020-07-20 15:55 70%   ` Ahmed S. Darwish
  2020-07-29 14:33 55%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 91%   ` [PATCH v4 08/24] seqlock: lockdep assert non-preemptibility on seqcount_t write Ahmed S. Darwish
                     ` (17 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish

The seqlock.h seqcount_t and seqlock_t API definitions are presented in
the chronological order of their development rather than the order that
makes most sense to readers. This makes it hard to follow and understand
the header file code.

Group and reorder all of the exported seqlock.h functions according to
their function.

First, group together the seqcount_t standard read path functions:

    - __read_seqcount_begin()
    - raw_read_seqcount_begin()
    - read_seqcount_begin()

since each function is implemented exactly in terms of the one above
it. Then, group the special-case seqcount_t readers on their own as:

    - raw_read_seqcount()
    - raw_seqcount_begin()

since the only difference between the two functions is that the second
one masks the sequence counter LSB while the first one does not. Note
that raw_seqcount_begin() can actually be implemented in terms of
raw_read_seqcount(), which will be done in a follow-up commit.

Then, group the seqcount_t write path functions, instead of injecting
unrelated seqcount_t latch functions between them, and order them as:

    - raw_write_seqcount_begin()
    - raw_write_seqcount_end()
    - write_seqcount_begin_nested()
    - write_seqcount_begin()
    - write_seqcount_end()
    - raw_write_seqcount_barrier()
    - write_seqcount_invalidate()

which is the expected natural order. This also isolates the seqcount_t
latch functions into their own area, at the end of the sequence counters
section, and before jumping to the next one: sequential locks
(seqlock_t).

Do a similar grouping and reordering for seqlock_t "locking" readers vs.
the "conditionally locking or lockless" ones.

No implementation code was changed in any of the reordering above.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 174 ++++++++++++++++++++--------------------
 1 file changed, 86 insertions(+), 88 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index d724b5e5408d..4c1456008d89 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -128,23 +128,6 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s)
 	return ret;
 }
 
-/**
- * raw_read_seqcount - Read the raw seqcount
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
- *
- * raw_read_seqcount opens a read critical section of the given
- * seqcount without any lockdep checking and without checking or
- * masking the LSB. Calling code is responsible for handling that.
- */
-static inline unsigned raw_read_seqcount(const seqcount_t *s)
-{
-	unsigned ret = READ_ONCE(s->sequence);
-	smp_rmb();
-	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
-	return ret;
-}
-
 /**
  * raw_read_seqcount_begin - start seq-read critical section w/o lockdep
  * @s: pointer to seqcount_t
@@ -176,6 +159,23 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
 	return raw_read_seqcount_begin(s);
 }
 
+/**
+ * raw_read_seqcount - Read the raw seqcount
+ * @s: pointer to seqcount_t
+ * Returns: count to be passed to read_seqcount_retry
+ *
+ * raw_read_seqcount opens a read critical section of the given
+ * seqcount without any lockdep checking and without checking or
+ * masking the LSB. Calling code is responsible for handling that.
+ */
+static inline unsigned raw_read_seqcount(const seqcount_t *s)
+{
+	unsigned ret = READ_ONCE(s->sequence);
+	smp_rmb();
+	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
+	return ret;
+}
+
 /**
  * raw_seqcount_begin - begin a seq-read critical section
  * @s: pointer to seqcount_t
@@ -234,8 +234,6 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
 	return __read_seqcount_retry(s, start);
 }
 
-
-
 static inline void raw_write_seqcount_begin(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
@@ -250,6 +248,23 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
+static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
+{
+	raw_write_seqcount_begin(s);
+	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+}
+
+static inline void write_seqcount_begin(seqcount_t *s)
+{
+	write_seqcount_begin_nested(s, 0);
+}
+
+static inline void write_seqcount_end(seqcount_t *s)
+{
+	seqcount_release(&s->dep_map, _RET_IP_);
+	raw_write_seqcount_end(s);
+}
+
 /**
  * raw_write_seqcount_barrier - do a seq write barrier
  * @s: pointer to seqcount_t
@@ -300,6 +315,21 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
+/**
+ * write_seqcount_invalidate - invalidate in-progress read-side seq operations
+ * @s: pointer to seqcount_t
+ *
+ * After write_seqcount_invalidate, no read-side seq operations will complete
+ * successfully and see data older than this.
+ */
+static inline void write_seqcount_invalidate(seqcount_t *s)
+{
+	smp_wmb();
+	kcsan_nestable_atomic_begin();
+	s->sequence+=2;
+	kcsan_nestable_atomic_end();
+}
+
 static inline int raw_read_seqcount_latch(seqcount_t *s)
 {
 	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
@@ -395,38 +425,6 @@ static inline void raw_write_seqcount_latch(seqcount_t *s)
        smp_wmb();      /* increment "sequence" before following stores */
 }
 
-static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
-{
-	raw_write_seqcount_begin(s);
-	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
-}
-
-static inline void write_seqcount_begin(seqcount_t *s)
-{
-	write_seqcount_begin_nested(s, 0);
-}
-
-static inline void write_seqcount_end(seqcount_t *s)
-{
-	seqcount_release(&s->dep_map, _RET_IP_);
-	raw_write_seqcount_end(s);
-}
-
-/**
- * write_seqcount_invalidate - invalidate in-progress read-side seq operations
- * @s: pointer to seqcount_t
- *
- * After write_seqcount_invalidate, no read-side seq operations will complete
- * successfully and see data older than this.
- */
-static inline void write_seqcount_invalidate(seqcount_t *s)
-{
-	smp_wmb();
-	kcsan_nestable_atomic_begin();
-	s->sequence+=2;
-	kcsan_nestable_atomic_end();
-}
-
 /*
  * Sequential locks (seqlock_t)
  *
@@ -555,6 +553,43 @@ static inline void read_sequnlock_excl(seqlock_t *sl)
 	spin_unlock(&sl->lock);
 }
 
+static inline void read_seqlock_excl_bh(seqlock_t *sl)
+{
+	spin_lock_bh(&sl->lock);
+}
+
+static inline void read_sequnlock_excl_bh(seqlock_t *sl)
+{
+	spin_unlock_bh(&sl->lock);
+}
+
+static inline void read_seqlock_excl_irq(seqlock_t *sl)
+{
+	spin_lock_irq(&sl->lock);
+}
+
+static inline void read_sequnlock_excl_irq(seqlock_t *sl)
+{
+	spin_unlock_irq(&sl->lock);
+}
+
+static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sl->lock, flags);
+	return flags;
+}
+
+#define read_seqlock_excl_irqsave(lock, flags)				\
+	do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
+
+static inline void
+read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
+{
+	spin_unlock_irqrestore(&sl->lock, flags);
+}
+
 /**
  * read_seqbegin_or_lock - begin a sequence number check or locking block
  * @lock: sequence lock
@@ -584,43 +619,6 @@ static inline void done_seqretry(seqlock_t *lock, int seq)
 		read_sequnlock_excl(lock);
 }
 
-static inline void read_seqlock_excl_bh(seqlock_t *sl)
-{
-	spin_lock_bh(&sl->lock);
-}
-
-static inline void read_sequnlock_excl_bh(seqlock_t *sl)
-{
-	spin_unlock_bh(&sl->lock);
-}
-
-static inline void read_seqlock_excl_irq(seqlock_t *sl)
-{
-	spin_lock_irq(&sl->lock);
-}
-
-static inline void read_sequnlock_excl_irq(seqlock_t *sl)
-{
-	spin_unlock_irq(&sl->lock);
-}
-
-static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&sl->lock, flags);
-	return flags;
-}
-
-#define read_seqlock_excl_irqsave(lock, flags)				\
-	do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
-
-static inline void
-read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
-{
-	spin_unlock_irqrestore(&sl->lock, flags);
-}
-
 static inline unsigned long
 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
 {
-- 
2.20.1


^ permalink raw reply	[relevance 70%]

* [PATCH v4 02/24] seqlock: Properly format kernel-doc code samples
    2020-07-20 15:55 54%   ` [PATCH v4 01/24] Documentation: locking: Describe seqlock design and usage Ahmed S. Darwish
@ 2020-07-20 15:55 75%   ` Ahmed S. Darwish
  2020-07-29 14:33 62%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-07-20 15:55 70%   ` [PATCH v4 04/24] seqlock: Reorder seqcount_t and seqlock_t API definitions Ahmed S. Darwish
                     ` (18 subsequent siblings)
  20 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Jonathan Corbet,
	linux-doc

Align the code samples and note sections inside kernel-doc comments with
tabs. This way they can be properly parsed and rendered by Sphinx. It
also makes the code samples easier to read from text editors.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 108 +++++++++++++++++++++-------------------
 1 file changed, 56 insertions(+), 52 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 299d68f10325..6c4f68ef1393 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -263,32 +263,32 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
  * atomically, avoiding compiler optimizations; b) to document which writes are
  * meant to propagate to the reader critical section. This is necessary because
  * neither writes before and after the barrier are enclosed in a seq-writer
- * critical section that would ensure readers are aware of ongoing writes.
+ * critical section that would ensure readers are aware of ongoing writes::
  *
- *      seqcount_t seq;
- *      bool X = true, Y = false;
+ *	seqcount_t seq;
+ *	bool X = true, Y = false;
  *
- *      void read(void)
- *      {
- *              bool x, y;
+ *	void read(void)
+ *	{
+ *		bool x, y;
  *
- *              do {
- *                      int s = read_seqcount_begin(&seq);
+ *		do {
+ *			int s = read_seqcount_begin(&seq);
  *
- *                      x = X; y = Y;
+ *			x = X; y = Y;
  *
- *              } while (read_seqcount_retry(&seq, s));
+ *		} while (read_seqcount_retry(&seq, s));
  *
- *              BUG_ON(!x && !y);
+ *		BUG_ON(!x && !y);
  *      }
  *
  *      void write(void)
  *      {
- *              WRITE_ONCE(Y, true);
+ *		WRITE_ONCE(Y, true);
  *
- *              raw_write_seqcount_barrier(seq);
+ *		raw_write_seqcount_barrier(seq);
  *
- *              WRITE_ONCE(X, false);
+ *		WRITE_ONCE(X, false);
  *      }
  */
 static inline void raw_write_seqcount_barrier(seqcount_t *s)
@@ -325,64 +325,68 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
  * Very simply put: we first modify one copy and then the other. This ensures
  * there is always one copy in a stable state, ready to give us an answer.
  *
- * The basic form is a data structure like:
+ * The basic form is a data structure like::
  *
- * struct latch_struct {
- *	seqcount_t		seq;
- *	struct data_struct	data[2];
- * };
+ *	struct latch_struct {
+ *		seqcount_t		seq;
+ *		struct data_struct	data[2];
+ *	};
  *
  * Where a modification, which is assumed to be externally serialized, does the
- * following:
+ * following::
  *
- * void latch_modify(struct latch_struct *latch, ...)
- * {
- *	smp_wmb();	<- Ensure that the last data[1] update is visible
- *	latch->seq++;
- *	smp_wmb();	<- Ensure that the seqcount update is visible
+ *	void latch_modify(struct latch_struct *latch, ...)
+ *	{
+ *		smp_wmb();	// Ensure that the last data[1] update is visible
+ *		latch->seq++;
+ *		smp_wmb();	// Ensure that the seqcount update is visible
  *
- *	modify(latch->data[0], ...);
+ *		modify(latch->data[0], ...);
  *
- *	smp_wmb();	<- Ensure that the data[0] update is visible
- *	latch->seq++;
- *	smp_wmb();	<- Ensure that the seqcount update is visible
+ *		smp_wmb();	// Ensure that the data[0] update is visible
+ *		latch->seq++;
+ *		smp_wmb();	// Ensure that the seqcount update is visible
  *
- *	modify(latch->data[1], ...);
- * }
+ *		modify(latch->data[1], ...);
+ *	}
  *
- * The query will have a form like:
+ * The query will have a form like::
  *
- * struct entry *latch_query(struct latch_struct *latch, ...)
- * {
- *	struct entry *entry;
- *	unsigned seq, idx;
+ *	struct entry *latch_query(struct latch_struct *latch, ...)
+ *	{
+ *		struct entry *entry;
+ *		unsigned seq, idx;
  *
- *	do {
- *		seq = raw_read_seqcount_latch(&latch->seq);
+ *		do {
+ *			seq = raw_read_seqcount_latch(&latch->seq);
  *
- *		idx = seq & 0x01;
- *		entry = data_query(latch->data[idx], ...);
+ *			idx = seq & 0x01;
+ *			entry = data_query(latch->data[idx], ...);
  *
- *		smp_rmb();
- *	} while (seq != latch->seq);
+ *			smp_rmb();
+ *		} while (seq != latch->seq);
  *
- *	return entry;
- * }
+ *		return entry;
+ *	}
  *
  * So during the modification, queries are first redirected to data[1]. Then we
  * modify data[0]. When that is complete, we redirect queries back to data[0]
  * and we can modify data[1].
  *
- * NOTE: The non-requirement for atomic modifications does _NOT_ include
- *       the publishing of new entries in the case where data is a dynamic
- *       data structure.
+ * NOTE:
  *
- *       An iteration might start in data[0] and get suspended long enough
- *       to miss an entire modification sequence, once it resumes it might
- *       observe the new entry.
+ *	The non-requirement for atomic modifications does _NOT_ include
+ *	the publishing of new entries in the case where data is a dynamic
+ *	data structure.
  *
- * NOTE: When data is a dynamic data structure; one should use regular RCU
- *       patterns to manage the lifetimes of the objects within.
+ *	An iteration might start in data[0] and get suspended long enough
+ *	to miss an entire modification sequence, once it resumes it might
+ *	observe the new entry.
+ *
+ * NOTE:
+ *
+ *	When data is a dynamic data structure; one should use regular RCU
+ *	patterns to manage the lifetimes of the objects within.
  */
 static inline void raw_write_seqcount_latch(seqcount_t *s)
 {
-- 
2.20.1


^ permalink raw reply	[relevance 75%]

* [PATCH v4 01/24] Documentation: locking: Describe seqlock design and usage
  @ 2020-07-20 15:55 54%   ` Ahmed S. Darwish
                         ` (2 more replies)
  2020-07-20 15:55 75%   ` [PATCH v4 02/24] seqlock: Properly format kernel-doc code samples Ahmed S. Darwish
                     ` (19 subsequent siblings)
  20 siblings, 3 replies; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 15:55 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Paul E. McKenney, Sebastian A. Siewior,
	Steven Rostedt, LKML, Ahmed S. Darwish, Jonathan Corbet,
	linux-doc

Proper documentation for the design and usage of sequence counters and
sequential locks does not exist. Complete the seqlock.h documentation as
follows:

  - Divide all documentation on a seqcount_t vs. seqlock_t basis. The
    description for both mechanisms was intermingled, which is incorrect
    since the usage constrains for each type are vastly different.

  - Add an introductory paragraph describing the internal design of, and
    rationale for, sequence counters.

  - Document seqcount_t writer non-preemptibility requirement, which was
    not previously documented anywhere, and provide a clear rationale.

  - Provide template code for seqcount_t and seqlock_t initialization
    and reader/writer critical sections.

  - Recommend using seqlock_t by default. It implicitly handles the
    serialization and non-preemptibility requirements of writers.

At seqlock.h:

  - Remove references to brlocks as they've long been removed from the
    kernel.

  - Remove references to gcc-3.x since the kernel's minimum supported
    gcc version is 4.9.

References: 0f6ed63b1707 ("no need to keep brlock macros anymore...")
References: 6ec4476ac825 ("Raise gcc version requirement to 4.9")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 Documentation/locking/index.rst   |   1 +
 Documentation/locking/seqlock.rst | 170 ++++++++++++++++++++++++++++++
 include/linux/seqlock.h           |  81 +++++++-------
 3 files changed, 209 insertions(+), 43 deletions(-)
 create mode 100644 Documentation/locking/seqlock.rst

diff --git a/Documentation/locking/index.rst b/Documentation/locking/index.rst
index d785878cad65..7003bd5aeff4 100644
--- a/Documentation/locking/index.rst
+++ b/Documentation/locking/index.rst
@@ -14,6 +14,7 @@ locking
     mutex-design
     rt-mutex-design
     rt-mutex
+    seqlock
     spinlocks
     ww-mutex-design
     preempt-locking
diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst
new file mode 100644
index 000000000000..366dd368d90a
--- /dev/null
+++ b/Documentation/locking/seqlock.rst
@@ -0,0 +1,170 @@
+======================================
+Sequence counters and sequential locks
+======================================
+
+Introduction
+============
+
+Sequence counters are a reader-writer consistency mechanism with
+lockless readers (read-only retry loops), and no writer starvation. They
+are used for data that's rarely written to (e.g. system time), where the
+reader wants a consistent set of information and is willing to retry if
+that information changes.
+
+A data set is consistent when the sequence count at the beginning of the
+read side critical section is even and the same sequence count value is
+read again at the end of the critical section. The data in the set must
+be copied out inside the read side critical section. If the sequence
+count has changed between the start and the end of the critical section,
+the reader must retry.
+
+Writers increment the sequence count at the start and the end of their
+critical section. After starting the critical section the sequence count
+is odd and indicates to the readers that an update is in progress. At
+the end of the write side critical section the sequence count becomes
+even again which lets readers make progress.
+
+A sequence counter write side critical section must never be preempted
+or interrupted by read side sections. Otherwise the reader will spin for
+the entire scheduler tick due to the odd sequence count value and the
+interrupted writer. If that reader belongs to a real-time scheduling
+class, it can spin forever and the kernel will livelock.
+
+This mechanism cannot be used if the protected data contains pointers,
+as the writer can invalidate a pointer that the reader is following.
+
+
+.. _seqcount_t:
+
+Sequence counters (``seqcount_t``)
+==================================
+
+This is the the raw counting mechanism, which does not protect against
+multiple writers.  Write side critical sections must thus be serialized
+by an external lock.
+
+If the write serialization primitive is not implicitly disabling
+preemption, preemption must be explicitly disabled before entering the
+write side section. If the read section can be invoked from hardirq or
+softirq contexts, interrupts or bottom halves must also be respectively
+disabled before entering the write section.
+
+If it's desired to automatically handle the sequence counter
+requirements of writer serialization and non-preemptibility, use
+:ref:`seqlock_t` instead.
+
+Initialization::
+
+	/* dynamic */
+	seqcount_t foo_seqcount;
+	seqcount_init(&foo_seqcount);
+
+	/* static */
+	static seqcount_t foo_seqcount = SEQCNT_ZERO(foo_seqcount);
+
+	/* C99 struct init */
+	struct {
+		.seq   = SEQCNT_ZERO(foo.seq),
+	} foo;
+
+Write path::
+
+	/* Serialized context with disabled preemption */
+
+	write_seqcount_begin(&foo_seqcount);
+
+	/* ... [[write-side critical section]] ... */
+
+	write_seqcount_end(&foo_seqcount);
+
+Read path::
+
+	do {
+		seq = read_seqcount_begin(&foo_seqcount);
+
+		/* ... [[read-side critical section]] ... */
+
+	} while (read_seqcount_retry(&foo_seqcount, seq));
+
+
+.. _seqlock_t:
+
+Sequential locks (``seqlock_t``)
+================================
+
+This contains the :ref:`seqcount_t` mechanism earlier discussed, plus an
+embedded spinlock for writer serialization and non-preemptibility.
+
+If the read side section can be invoked from hardirq or softirq context,
+use the write side function variants which disable interrupts or bottom
+halves respectively.
+
+Initialization::
+
+	/* dynamic */
+	seqlock_t foo_seqlock;
+	seqlock_init(&foo_seqlock);
+
+	/* static */
+	static DEFINE_SEQLOCK(foo_seqlock);
+
+	/* C99 struct init */
+	struct {
+		.seql   = __SEQLOCK_UNLOCKED(foo.seql)
+	} foo;
+
+Write path::
+
+	write_seqlock(&foo_seqlock);
+
+	/* ... [[write-side critical section]] ... */
+
+	write_sequnlock(&foo_seqlock);
+
+Read path, three categories:
+
+1. Normal Sequence readers which never block a writer but they must
+   retry if a writer is in progress by detecting change in the sequence
+   number.  Writers do not wait for a sequence reader::
+
+	do {
+		seq = read_seqbegin(&foo_seqlock);
+
+		/* ... [[read-side critical section]] ... */
+
+	} while (read_seqretry(&foo_seqlock, seq));
+
+2. Locking readers which will wait if a writer or another locking reader
+   is in progress. A locking reader in progress will also block a writer
+   from entering its critical section. This read lock is
+   exclusive. Unlike rwlock_t, only one locking reader can acquire it::
+
+	read_seqlock_excl(&foo_seqlock);
+
+	/* ... [[read-side critical section]] ... */
+
+	read_sequnlock_excl(&foo_seqlock);
+
+3. Conditional lockless reader (as in 1), or locking reader (as in 2),
+   according to a passed marker. This is used to avoid lockless readers
+   starvation (too much retry loops) in case of a sharp spike in write
+   activity. First, a lockless read is tried (even marker passed). If
+   that trial fails (odd sequence counter is returned, which is used as
+   the next iteration marker), the lockless read is transformed to a
+   full locking read and no retry loop is necessary::
+
+	/* marker; even initialization */
+	int seq = 0;
+	do {
+		read_seqbegin_or_lock(&foo_seqlock, &seq);
+
+		/* ... [[read-side critical section]] ... */
+
+	} while (need_seqretry(&foo_seqlock, seq));
+	done_seqretry(&foo_seqlock, seq);
+
+
+API documentation
+=================
+
+.. kernel-doc:: include/linux/seqlock.h
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 8b97204f35a7..299d68f10325 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -1,36 +1,15 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __LINUX_SEQLOCK_H
 #define __LINUX_SEQLOCK_H
+
 /*
- * Reader/writer consistent mechanism without starving writers. This type of
- * lock for data where the reader wants a consistent set of information
- * and is willing to retry if the information changes. There are two types
- * of readers:
- * 1. Sequence readers which never block a writer but they may have to retry
- *    if a writer is in progress by detecting change in sequence number.
- *    Writers do not wait for a sequence reader.
- * 2. Locking readers which will wait if a writer or another locking reader
- *    is in progress. A locking reader in progress will also block a writer
- *    from going forward. Unlike the regular rwlock, the read lock here is
- *    exclusive so that only one locking reader can get it.
+ * seqcount_t / seqlock_t - a reader-writer consistency mechanism with
+ * lockless readers (read-only retry loops), and no writer starvation.
  *
- * This is not as cache friendly as brlock. Also, this may not work well
- * for data that contains pointers, because any writer could
- * invalidate a pointer that a reader was following.
+ * See Documentation/locking/seqlock.rst
  *
- * Expected non-blocking reader usage:
- * 	do {
- *	    seq = read_seqbegin(&foo);
- * 	...
- *      } while (read_seqretry(&foo, seq));
- *
- *
- * On non-SMP the spin locks disappear but the writer still needs
- * to increment the sequence variables because an interrupt routine could
- * change the state of the data.
- *
- * Based on x86_64 vsyscall gettimeofday 
- * by Keith Owens and Andrea Arcangeli
+ * Copyrights:
+ * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
  */
 
 #include <linux/spinlock.h>
@@ -41,8 +20,8 @@
 #include <asm/processor.h>
 
 /*
- * The seqlock interface does not prescribe a precise sequence of read
- * begin/retry/end. For readers, typically there is a call to
+ * The seqlock seqcount_t interface does not prescribe a precise sequence of
+ * read begin/retry/end. For readers, typically there is a call to
  * read_seqcount_begin() and read_seqcount_retry(), however, there are more
  * esoteric cases which do not follow this pattern.
  *
@@ -50,16 +29,30 @@
  * via seqcount_t under KCSAN: upon beginning a seq-reader critical section,
  * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as
  * atomics; if there is a matching read_seqcount_retry() call, no following
- * memory operations are considered atomic. Usage of seqlocks via seqlock_t
- * interface is not affected.
+ * memory operations are considered atomic. Usage of the seqlock_t interface
+ * is not affected.
  */
 #define KCSAN_SEQLOCK_REGION_MAX 1000
 
 /*
- * Version using sequence counter only.
- * This can be used when code has its own mutex protecting the
- * updating starting before the write_seqcountbeqin() and ending
- * after the write_seqcount_end().
+ * Sequence counters (seqcount_t)
+ *
+ * This is the raw counting mechanism, without any writer protection.
+ *
+ * Write side critical sections must be serialized and non-preemptible.
+ *
+ * If readers can be invoked from hardirq or softirq contexts,
+ * interrupts or bottom halves must also be respectively disabled before
+ * entering the write section.
+ *
+ * This mechanism can't be used if the protected data contains pointers,
+ * as the writer can invalidate a pointer that a reader is following.
+ *
+ * If it's desired to automatically handle the sequence counter writer
+ * serialization and non-preemptibility requirements, use a sequential
+ * lock (seqlock_t) instead.
+ *
+ * See Documentation/locking/seqlock.rst
  */
 typedef struct seqcount {
 	unsigned sequence;
@@ -398,10 +391,6 @@ static inline void raw_write_seqcount_latch(seqcount_t *s)
        smp_wmb();      /* increment "sequence" before following stores */
 }
 
-/*
- * Sequence counter only version assumes that callers are using their
- * own mutexing.
- */
 static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
 {
 	raw_write_seqcount_begin(s);
@@ -434,15 +423,21 @@ static inline void write_seqcount_invalidate(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
+/*
+ * Sequential locks (seqlock_t)
+ *
+ * Sequence counters with an embedded spinlock for writer serialization
+ * and non-preemptibility.
+ *
+ * For more info, see:
+ *    - Comments on top of seqcount_t
+ *    - Documentation/locking/seqlock.rst
+ */
 typedef struct {
 	struct seqcount seqcount;
 	spinlock_t lock;
 } seqlock_t;
 
-/*
- * These macros triggered gcc-3.x compile-time problems.  We think these are
- * OK now.  Be cautious.
- */
 #define __SEQLOCK_UNLOCKED(lockname)			\
 	{						\
 		.seqcount = SEQCNT_ZERO(lockname),	\
-- 
2.20.1


^ permalink raw reply	[relevance 54%]

* Re: [PATCH v4 00/24] seqlock: Extend seqcount API with associated locks
  @ 2020-07-20 17:33 99%     ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-07-20 17:33 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Peter Zijlstra, Ingo Molnar, Will Deacon, Thomas Gleixner,
	Paul E. McKenney, Sebastian A. Siewior, Steven Rostedt, LKML,
	Jonathan Corbet, linux-doc, David S. Miller, netdev,
	Alexander Viro, linux-fsdevel

On Mon, Jul 20, 2020 at 09:49:12AM -0700, Eric Biggers wrote:
> On Mon, Jul 20, 2020 at 05:55:06PM +0200, Ahmed S. Darwish wrote:
> > Hi,
> >
> > This is v4 of the seqlock patch series:
> >
> >    [PATCH v1 00/25]
> >    https://lore.kernel.org/lkml/20200519214547.352050-1-a.darwish@linutronix.de
> >
> >    [PATCH v2 00/06] (bugfixes-only, merged)
> >    https://lore.kernel.org/lkml/20200603144949.1122421-1-a.darwish@linutronix.de
> >
> >    [PATCH v2 00/18]
> >    https://lore.kernel.org/lkml/20200608005729.1874024-1-a.darwish@linutronix.de
> >
> >    [PATCH v3 00/20]
> >    https://lore.kernel.org/lkml/20200630054452.3675847-1-a.darwish@linutronix.de
> >
> > It is based over:
> >
> >    git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git :: locking/core
> >
>
> Please include an explanation of the patch series in the cover letter.  It looks
> like you sent it in v1 and then stopped including it.  That doesn't work;
> reviewers shouldn't have to read all previous versions too and try to guess
> which parts are still up-to-date.
>

Noted. Thanks for the heads-up.

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* Re: [PATCH v4 01/24] Documentation: locking: Describe seqlock design and usage
  @ 2020-07-21  5:34 99%       ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-07-21  5:34 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Peter Zijlstra, Ingo Molnar, Will Deacon, Thomas Gleixner,
	Paul E. McKenney, Sebastian A. Siewior, LKML, Jonathan Corbet,
	linux-doc

On Mon, Jul 20, 2020 at 09:35:51PM -0400, Steven Rostedt wrote:
> On Mon, 20 Jul 2020 17:55:07 +0200
> "Ahmed S. Darwish" <a.darwish@linutronix.de> wrote:
> > +Read path, three categories:
> > +
> > +1. Normal Sequence readers which never block a writer but they must
> > +   retry if a writer is in progress by detecting change in the sequence
> > +   number.  Writers do not wait for a sequence reader::
> > +
> > +	do {
> > +		seq = read_seqbegin(&foo_seqlock);
> > +
> > +		/* ... [[read-side critical section]] ... */
> > +
> > +	} while (read_seqretry(&foo_seqlock, seq));
> > +
> > +2. Locking readers which will wait if a writer or another locking reader
> > +   is in progress. A locking reader in progress will also block a writer
> > +   from entering its critical section. This read lock is
> > +   exclusive. Unlike rwlock_t, only one locking reader can acquire it::
>
> Nit, but I would mention that this acts similar to a normal spin_lock,
> and even disables preeption (in the non-RT case).

will do.

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* Re: [PATCH v4 01/24] Documentation: locking: Describe seqlock design and usage
  @ 2020-07-21  7:15 85%         ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-07-21  7:15 UTC (permalink / raw)
  To: Steven Rostedt, Peter Zijlstra
  Cc: Ingo Molnar, Will Deacon, Thomas Gleixner, Paul E. McKenney,
	Sebastian A. Siewior, LKML, Jonathan Corbet, linux-doc

On Mon, Jul 20, 2020 at 09:51:15PM -0400, Steven Rostedt wrote:
> On Mon, 20 Jul 2020 21:44:00 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
>
> > > - * This is not as cache friendly as brlock. Also, this may not work well
> > > - * for data that contains pointers, because any writer could
> > > - * invalidate a pointer that a reader was following.
> > > + * See Documentation/locking/seqlock.rst
> >
> > I absolutely hate it when I see this.
> >
> > I much rather have the documentation next to the code. Because
> > honestly, I trust that comments next to the code will get updated if
> > the code changes much more likely than comments buried in the
> > Documentation directory.
> >
> > It's also more likely that I wont even bother looking at the doc
> > (because I wont trust it to be up to date) and just read the code and
> > try to figure it out. Or look at how others have used it.
>
> Never mind,
>
> I see that kerneldoc is added in patch 5, which helps.
>

Even looking at the current patch #1 *in isolation*, no relevant
comments were removed from seqlock.h at all. They were just moved closer
to the seqcount_t and seqlock_t structure definitions.

The original comments were horrible. They intermingled seqcount_t and
seqlock_t descriptions in a *very* ambiguous, sometimes even in the same
sentence. It was misleading, as the usage constrains for each data type
(especially on the write side) are vastly different.

Even the new KCSAN comment, which was freshly merged this release cycle,
got tainted by such already-existing incoherence. It kept talking about
the "seqlock interface" while it actually meant the seqcount_t
interface.  Then at the end, it realized the semantical ambiguity and
noted how the "seqlock interface" does *not* cover seqlock_t.

Moreover, this seqcount_t/seqlock_t documentation intermingling led to
the critical usage constraint of disabling preemption before entering a
seqcount_t write side critical section never getting explicitly
mentioned. This has (naturally) led to a considerable number of buggy
call sites, and the fixes are now merged.

I've tried to fix the problem at the roots, and basically identified
three major problems:

  1. The seqcount_t/seqlock_t intermingling is confusing everyone. That
     is, both of call-site developers *and* core kernel engineers.

  2. The big picture of seqlock.h was never expressed in a sufficient
     detail.

  3. The usage constraints for the exported seqlock.h APIs were never
     explicitly mentioned.. leading to buggy call sites.

To fix #1, I've changed the all of the existing seqlock.h comments to
explicitly mention either "seqcount_t" or "seqlock_t". Then, divided the
the top seqlock.h comment into a seqcount_t part and a seqlock_t one.
Afterwards, the whole seqlock.h header file code was grouped into
"sections", as seen in patch #4.

To fix #2, Documentation/locking/seqlock.rst was introduced and boldly
referenced at the header-file top comment.

To fix #3, kernel-doc was added for all of the seqlock.h exported APIs.

@Peter, kindly note that all of the above *is exactly why* I've resisted
hiding the new seqcount_LOCKTYPE_t APIs introduced by this patch series
inside generative macros. All the parts that will be referenced by
call-site developers are kept both explicit and kernel-doc commented.

Thanks!

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 85%]

* Re: [PATCH 1/5] seqlock: s/__SEQ_LOCKDEP/__SEQ_LOCK/g
  @ 2020-07-29 14:29 99%   ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-07-29 14:29 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, will, tglx, paulmck, bigeasy, rostedt, linux-kernel,
	corbet, davem, netdev, linux-doc, viro, linux-fsdevel

On Wed, Jul 29, 2020 at 03:52:50PM +0200, Peter Zijlstra wrote:
> __SEQ_LOCKDEP() is an expression gate for the
> seqcount_LOCKNAME_t::lock member. Rename it to be about the member,
> not the gate condition.
>
> Later (PREEMPT_RT) patches will make the member available for !LOCKDEP
> configs.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---

Acked-by: Ahmed S. Darwish <a.darwish@linutronix.de>

^ permalink raw reply	[relevance 99%]

* [tip: locking/core] userfaultfd: Use sequence counter with associated spinlock
  2020-07-20 15:55 95%   ` [PATCH v4 22/24] userfaultfd: " Ahmed S. Darwish
@ 2020-07-29 14:33 82%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     2ca97ac8bdcc31fdc868f40c73c017f0a648dc07
Gitweb:        https://git.kernel.org/tip/2ca97ac8bdcc31fdc868f40c73c017f0a648dc07
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:28 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:28 +02:00

userfaultfd: Use sequence counter with associated spinlock

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-23-a.darwish@linutronix.de
---
 fs/userfaultfd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 52de290..26e8b23 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -61,7 +61,7 @@ struct userfaultfd_ctx {
 	/* waitqueue head for events */
 	wait_queue_head_t event_wqh;
 	/* a refile sequence protected by fault_pending_wqh lock */
-	struct seqcount refile_seq;
+	seqcount_spinlock_t refile_seq;
 	/* pseudo fd refcounting */
 	refcount_t refcount;
 	/* userfaultfd syscall flags */
@@ -1998,7 +1998,7 @@ static void init_once_userfaultfd_ctx(void *mem)
 	init_waitqueue_head(&ctx->fault_wqh);
 	init_waitqueue_head(&ctx->event_wqh);
 	init_waitqueue_head(&ctx->fd_wqh);
-	seqcount_init(&ctx->refile_seq);
+	seqcount_spinlock_init(&ctx->refile_seq, &ctx->fault_pending_wqh.lock);
 }
 
 SYSCALL_DEFINE1(userfaultfd, int, flags)

^ permalink raw reply	[relevance 82%]

* [tip: locking/core] NFSv4: Use sequence counter with associated spinlock
  2020-07-20 15:55 95%   ` [PATCH v4 21/24] NFSv4: " Ahmed S. Darwish
@ 2020-07-29 14:33 82%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     76246c9219726c71e3f470344d8c6a566fa2535b
Gitweb:        https://git.kernel.org/tip/76246c9219726c71e3f470344d8c6a566fa2535b
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:27 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:28 +02:00

NFSv4: Use sequence counter with associated spinlock

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-22-a.darwish@linutronix.de
---
 fs/nfs/nfs4_fs.h   | 2 +-
 fs/nfs/nfs4state.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 2b7f6dc..210e590 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -117,7 +117,7 @@ struct nfs4_state_owner {
 	unsigned long	     so_flags;
 	struct list_head     so_states;
 	struct nfs_seqid_counter so_seqid;
-	seqcount_t	     so_reclaim_seqcount;
+	seqcount_spinlock_t  so_reclaim_seqcount;
 	struct mutex	     so_delegreturn_mutex;
 };
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a8dc25c..b1dba24 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -509,7 +509,7 @@ nfs4_alloc_state_owner(struct nfs_server *server,
 	nfs4_init_seqid_counter(&sp->so_seqid);
 	atomic_set(&sp->so_count, 1);
 	INIT_LIST_HEAD(&sp->so_lru);
-	seqcount_init(&sp->so_reclaim_seqcount);
+	seqcount_spinlock_init(&sp->so_reclaim_seqcount, &sp->so_lock);
 	mutex_init(&sp->so_delegreturn_mutex);
 	return sp;
 }

^ permalink raw reply	[relevance 82%]

* [tip: locking/core] timekeeping: Use sequence counter with associated raw spinlock
  2020-07-20 15:55 82%   ` [PATCH v4 17/24] timekeeping: Use sequence counter with associated raw spinlock Ahmed S. Darwish
@ 2020-07-29 14:33 69%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     025e82bcbc34cd071390e72fd0b31593282f9425
Gitweb:        https://git.kernel.org/tip/025e82bcbc34cd071390e72fd0b31593282f9425
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:23 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:27 +02:00

timekeeping: Use sequence counter with associated raw spinlock

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_raw_spinlock_t data type, which allows to associate
a raw spinlock with the sequence counter. This enables lockdep to verify
that the raw spinlock used for writer serialization is held when the
write side critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-18-a.darwish@linutronix.de
---
 kernel/time/timekeeping.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d20d489..05ecfd8 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -39,18 +39,19 @@ enum timekeeping_adv_mode {
 	TK_ADV_FREQ
 };
 
+static DEFINE_RAW_SPINLOCK(timekeeper_lock);
+
 /*
  * The most important data for readout fits into a single 64 byte
  * cache line.
  */
 static struct {
-	seqcount_t		seq;
+	seqcount_raw_spinlock_t	seq;
 	struct timekeeper	timekeeper;
 } tk_core ____cacheline_aligned = {
-	.seq = SEQCNT_ZERO(tk_core.seq),
+	.seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_core.seq, &timekeeper_lock),
 };
 
-static DEFINE_RAW_SPINLOCK(timekeeper_lock);
 static struct timekeeper shadow_timekeeper;
 
 /**
@@ -63,7 +64,7 @@ static struct timekeeper shadow_timekeeper;
  * See @update_fast_timekeeper() below.
  */
 struct tk_fast {
-	seqcount_t		seq;
+	seqcount_raw_spinlock_t	seq;
 	struct tk_read_base	base[2];
 };
 
@@ -80,11 +81,13 @@ static struct clocksource dummy_clock = {
 };
 
 static struct tk_fast tk_fast_mono ____cacheline_aligned = {
+	.seq     = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_mono.seq, &timekeeper_lock),
 	.base[0] = { .clock = &dummy_clock, },
 	.base[1] = { .clock = &dummy_clock, },
 };
 
 static struct tk_fast tk_fast_raw  ____cacheline_aligned = {
+	.seq     = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_raw.seq, &timekeeper_lock),
 	.base[0] = { .clock = &dummy_clock, },
 	.base[1] = { .clock = &dummy_clock, },
 };
@@ -157,7 +160,7 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
  * tk_clock_read - atomic clocksource read() helper
  *
  * This helper is necessary to use in the read paths because, while the
- * seqlock ensures we don't return a bad value while structures are updated,
+ * seqcount ensures we don't return a bad value while structures are updated,
  * it doesn't protect from potential crashes. There is the possibility that
  * the tkr's clocksource may change between the read reference, and the
  * clock reference passed to the read function.  This can cause crashes if
@@ -222,10 +225,10 @@ static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
 	unsigned int seq;
 
 	/*
-	 * Since we're called holding a seqlock, the data may shift
+	 * Since we're called holding a seqcount, the data may shift
 	 * under us while we're doing the calculation. This can cause
 	 * false positives, since we'd note a problem but throw the
-	 * results away. So nest another seqlock here to atomically
+	 * results away. So nest another seqcount here to atomically
 	 * grab the points we are checking with.
 	 */
 	do {
@@ -486,7 +489,7 @@ EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
  *
  * To keep it NMI safe since we're accessing from tracing, we're not using a
  * separate timekeeper with updates to monotonic clock and boot offset
- * protected with seqlocks. This has the following minor side effects:
+ * protected with seqcounts. This has the following minor side effects:
  *
  * (1) Its possible that a timestamp be taken after the boot offset is updated
  * but before the timekeeper is updated. If this happens, the new boot offset

^ permalink raw reply	[relevance 69%]

* [tip: locking/core] netfilter: nft_set_rbtree: Use sequence counter with associated rwlock
  2020-07-20 15:55 94%   ` [PATCH v4 15/24] netfilter: nft_set_rbtree: Use sequence counter with associated rwlock Ahmed S. Darwish
@ 2020-07-29 14:33 83%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     b901892b51317b321c7bc96e2ccd2f522d1380ee
Gitweb:        https://git.kernel.org/tip/b901892b51317b321c7bc96e2ccd2f522d1380ee
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:21 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:26 +02:00

netfilter: nft_set_rbtree: Use sequence counter with associated rwlock

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_rwlock_t data type, which allows to associate a
rwlock with the sequence counter. This enables lockdep to verify that
the rwlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-16-a.darwish@linutronix.de
---
 net/netfilter/nft_set_rbtree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index b6aad3f..4b2834f 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -18,7 +18,7 @@
 struct nft_rbtree {
 	struct rb_root		root;
 	rwlock_t		lock;
-	seqcount_t		count;
+	seqcount_rwlock_t	count;
 	struct delayed_work	gc_work;
 };
 
@@ -523,7 +523,7 @@ static int nft_rbtree_init(const struct nft_set *set,
 	struct nft_rbtree *priv = nft_set_priv(set);
 
 	rwlock_init(&priv->lock);
-	seqcount_init(&priv->count);
+	seqcount_rwlock_init(&priv->count, &priv->lock);
 	priv->root = RB_ROOT;
 
 	INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);

^ permalink raw reply	[relevance 83%]

* [tip: locking/core] seqlock: lockdep assert non-preemptibility on seqcount_t write
  2020-07-20 15:55 91%   ` [PATCH v4 08/24] seqlock: lockdep assert non-preemptibility on seqcount_t write Ahmed S. Darwish
@ 2020-07-29 14:33 77%     ` tip-bot2 for Ahmed S. Darwish
    1 sibling, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     859247d39fb008ea812e8f0c398a58a20c12899e
Gitweb:        https://git.kernel.org/tip/859247d39fb008ea812e8f0c398a58a20c12899e
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:14 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:24 +02:00

seqlock: lockdep assert non-preemptibility on seqcount_t write

Preemption must be disabled before entering a sequence count write side
critical section.  Failing to do so, the seqcount read side can preempt
the write side section and spin for the entire scheduler tick.  If that
reader belongs to a real-time scheduling class, it can spin forever and
the kernel will livelock.

Assert through lockdep that preemption is disabled for seqcount writers.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-9-a.darwish@linutronix.de
---
 include/linux/seqlock.h | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index e885702..54bc204 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -266,6 +266,12 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
+static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass)
+{
+	raw_write_seqcount_begin(s);
+	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+}
+
 /**
  * write_seqcount_begin_nested() - start a seqcount_t write section with
  *                                 custom lockdep nesting level
@@ -276,8 +282,19 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
  */
 static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
 {
-	raw_write_seqcount_begin(s);
-	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+	lockdep_assert_preemption_disabled();
+	__write_seqcount_begin_nested(s, subclass);
+}
+
+/*
+ * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks.
+ *
+ * Use for internal seqlock.h code where it's known that preemption is
+ * already disabled. For example, seqlock_t write side functions.
+ */
+static inline void __write_seqcount_begin(seqcount_t *s)
+{
+	__write_seqcount_begin_nested(s, 0);
 }
 
 /**
@@ -575,7 +592,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 static inline void write_seqlock(seqlock_t *sl)
 {
 	spin_lock(&sl->lock);
-	write_seqcount_begin(&sl->seqcount);
+	__write_seqcount_begin(&sl->seqcount);
 }
 
 /**
@@ -601,7 +618,7 @@ static inline void write_sequnlock(seqlock_t *sl)
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
-	write_seqcount_begin(&sl->seqcount);
+	__write_seqcount_begin(&sl->seqcount);
 }
 
 /**
@@ -628,7 +645,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
-	write_seqcount_begin(&sl->seqcount);
+	__write_seqcount_begin(&sl->seqcount);
 }
 
 /**
@@ -649,7 +666,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sl->lock, flags);
-	write_seqcount_begin(&sl->seqcount);
+	__write_seqcount_begin(&sl->seqcount);
 	return flags;
 }
 

^ permalink raw reply	[relevance 77%]

* [tip: locking/core] lockdep: Add preemption enabled/disabled assertion APIs
  @ 2020-07-29 14:33 81% ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     8fd8ad5c5dfcb09cf62abadd4043eaf1afbbd0ce
Gitweb:        https://git.kernel.org/tip/8fd8ad5c5dfcb09cf62abadd4043eaf1afbbd0ce
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:13 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:24 +02:00

lockdep: Add preemption enabled/disabled assertion APIs

Asserting that preemption is enabled or disabled is a critical sanity
check.  Developers are usually reluctant to add such a check in a
fastpath as reading the preemption count can be costly.

Extend the lockdep API with macros asserting that preemption is disabled
or enabled. If lockdep is disabled, or if the underlying architecture
does not support kernel preemption, this assert has no runtime overhead.

References: f54bb2ec02c8 ("locking/lockdep: Add IRQs disabled/enabled assertion APIs: ...")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-8-a.darwish@linutronix.de
---
 include/linux/lockdep.h | 19 +++++++++++++++++++
 lib/Kconfig.debug       |  1 +
 2 files changed, 20 insertions(+)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 7aafba0..39a3569 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -549,6 +549,22 @@ do {									\
 	WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context));	\
 } while (0)
 
+#define lockdep_assert_preemption_enabled()				\
+do {									\
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT)	&&		\
+		     debug_locks			&&		\
+		     (preempt_count() != 0		||		\
+		      !this_cpu_read(hardirqs_enabled)));		\
+} while (0)
+
+#define lockdep_assert_preemption_disabled()				\
+do {									\
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT)	&&		\
+		     debug_locks			&&		\
+		     (preempt_count() == 0		&&		\
+		      this_cpu_read(hardirqs_enabled)));		\
+} while (0)
+
 #else
 # define might_lock(lock) do { } while (0)
 # define might_lock_read(lock) do { } while (0)
@@ -557,6 +573,9 @@ do {									\
 # define lockdep_assert_irqs_enabled() do { } while (0)
 # define lockdep_assert_irqs_disabled() do { } while (0)
 # define lockdep_assert_in_irq() do { } while (0)
+
+# define lockdep_assert_preemption_enabled() do { } while (0)
+# define lockdep_assert_preemption_disabled() do { } while (0)
 #endif
 
 #ifdef CONFIG_PROVE_RAW_LOCK_NESTING
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9ad9210..5379931 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1117,6 +1117,7 @@ config PROVE_LOCKING
 	select DEBUG_RWSEMS
 	select DEBUG_WW_MUTEX_SLOWPATH
 	select DEBUG_LOCK_ALLOC
+	select PREEMPT_COUNT if !ARCH_NO_PREEMPT
 	select TRACE_IRQFLAGS
 	default n
 	help

^ permalink raw reply	[relevance 81%]

* [tip: locking/core] seqlock: Implement raw_seqcount_begin() in terms of raw_read_seqcount()
  @ 2020-07-29 14:33 86% ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     932e46365226324d2cf26d8bdec8b51ceb296948
Gitweb:        https://git.kernel.org/tip/932e46365226324d2cf26d8bdec8b51ceb296948
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:12 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:24 +02:00

seqlock: Implement raw_seqcount_begin() in terms of raw_read_seqcount()

raw_seqcount_begin() has the same code as raw_read_seqcount(), with the
exception of masking the sequence counter's LSB before returning it to
the caller.

Note, raw_seqcount_begin() masks the counter's LSB before returning it
to the caller so that read_seqcount_retry() can fail if the counter is
odd -- without the overhead of an extra branching instruction.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-7-a.darwish@linutronix.de
---
 include/linux/seqlock.h |  9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 85fb3ac..e885702 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -199,10 +199,11 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s)
  */
 static inline unsigned raw_seqcount_begin(const seqcount_t *s)
 {
-	unsigned ret = READ_ONCE(s->sequence);
-	smp_rmb();
-	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
-	return ret & ~1;
+	/*
+	 * If the counter is odd, let read_seqcount_retry() fail
+	 * by decrementing the counter.
+	 */
+	return raw_read_seqcount(s) & ~1;
 }
 
 /**

^ permalink raw reply	[relevance 86%]

* [tip: locking/core] seqlock: Extend seqcount API with associated locks
  2020-07-20 15:55 36%   ` [PATCH v4 09/24] seqlock: Extend seqcount API with associated locks Ahmed S. Darwish
@ 2020-07-29 14:33 28%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     55f3560df975f557c48aa6afc636808f31ecb87a
Gitweb:        https://git.kernel.org/tip/55f3560df975f557c48aa6afc636808f31ecb87a
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:15 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:25 +02:00

seqlock: Extend seqcount API with associated locks

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. If the serialization primitive is
not disabling preemption implicitly, preemption has to be explicitly
disabled before entering the write side critical section.

There is no built-in debugging mechanism to verify that the lock used
for writer serialization is held and preemption is disabled. Some usage
sites like dma-buf have explicit lockdep checks for the writer-side
lock, but this covers only a small portion of the sequence counter usage
in the kernel.

Add new sequence counter types which allows to associate a lock to the
sequence counter at initialization time. The seqcount API functions are
extended to provide appropriate lockdep assertions depending on the
seqcount/lock type.

For sequence counters with associated locks that do not implicitly
disable preemption, preemption protection is enforced in the sequence
counter write side functions. This removes the need to explicitly add
preempt_disable/enable() around the write side critical sections: the
write_begin/end() functions for these new sequence counter types
automatically do this.

Introduce the following seqcount types with associated locks:

     seqcount_spinlock_t
     seqcount_raw_spinlock_t
     seqcount_rwlock_t
     seqcount_mutex_t
     seqcount_ww_mutex_t

Extend the seqcount read and write functions to branch out to the
specific seqcount_LOCKTYPE_t implementation at compile-time. This avoids
kernel API explosion per each new seqcount_LOCKTYPE_t added. Add such
compile-time type detection logic into a new, internal, seqlock header.

Document the proper seqcount_LOCKTYPE_t usage, and rationale, at
Documentation/locking/seqlock.rst.

If lockdep is disabled, this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-10-a.darwish@linutronix.de
---
 Documentation/locking/seqlock.rst |  52 +++-
 include/linux/seqlock.h           | 464 ++++++++++++++++++++++++-----
 2 files changed, 447 insertions(+), 69 deletions(-)

diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst
index 366dd36..62c5ad9 100644
--- a/Documentation/locking/seqlock.rst
+++ b/Documentation/locking/seqlock.rst
@@ -87,6 +87,58 @@ Read path::
 	} while (read_seqcount_retry(&foo_seqcount, seq));
 
 
+.. _seqcount_locktype_t:
+
+Sequence counters with associated locks (``seqcount_LOCKTYPE_t``)
+-----------------------------------------------------------------
+
+As discussed at :ref:`seqcount_t`, sequence count write side critical
+sections must be serialized and non-preemptible. This variant of
+sequence counters associate the lock used for writer serialization at
+initialization time, which enables lockdep to validate that the write
+side critical sections are properly serialized.
+
+This lock association is a NOOP if lockdep is disabled and has neither
+storage nor runtime overhead. If lockdep is enabled, the lock pointer is
+stored in struct seqcount and lockdep's "lock is held" assertions are
+injected at the beginning of the write side critical section to validate
+that it is properly protected.
+
+For lock types which do not implicitly disable preemption, preemption
+protection is enforced in the write side function.
+
+The following sequence counters with associated locks are defined:
+
+  - ``seqcount_spinlock_t``
+  - ``seqcount_raw_spinlock_t``
+  - ``seqcount_rwlock_t``
+  - ``seqcount_mutex_t``
+  - ``seqcount_ww_mutex_t``
+
+The plain seqcount read and write APIs branch out to the specific
+seqcount_LOCKTYPE_t implementation at compile-time. This avoids kernel
+API explosion per each new seqcount LOCKTYPE.
+
+Initialization (replace "LOCKTYPE" with one of the supported locks)::
+
+	/* dynamic */
+	seqcount_LOCKTYPE_t foo_seqcount;
+	seqcount_LOCKTYPE_init(&foo_seqcount, &lock);
+
+	/* static */
+	static seqcount_LOCKTYPE_t foo_seqcount =
+		SEQCNT_LOCKTYPE_ZERO(foo_seqcount, &lock);
+
+	/* C99 struct init */
+	struct {
+		.seq   = SEQCNT_LOCKTYPE_ZERO(foo.seq, &lock),
+	} foo;
+
+Write path: same as in :ref:`seqcount_t`, while running from a context
+with the associated LOCKTYPE lock acquired.
+
+Read path: same as in :ref:`seqcount_t`.
+
 .. _seqlock_t:
 
 Sequential locks (``seqlock_t``)
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 54bc204..8c16a49 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -10,13 +10,17 @@
  *
  * Copyrights:
  * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
+ * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH
  */
 
-#include <linux/spinlock.h>
-#include <linux/preempt.h>
-#include <linux/lockdep.h>
 #include <linux/compiler.h>
 #include <linux/kcsan-checks.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/preempt.h>
+#include <linux/spinlock.h>
+#include <linux/ww_mutex.h>
+
 #include <asm/processor.h>
 
 /*
@@ -48,6 +52,10 @@
  * This mechanism can't be used if the protected data contains pointers,
  * as the writer can invalidate a pointer that a reader is following.
  *
+ * If the write serialization mechanism is one of the common kernel
+ * locking primitives, use a sequence counter with associated lock
+ * (seqcount_LOCKTYPE_t) instead.
+ *
  * If it's desired to automatically handle the sequence counter writer
  * serialization and non-preemptibility requirements, use a sequential
  * lock (seqlock_t) instead.
@@ -108,9 +116,267 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  */
 #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
 
+/*
+ * Sequence counters with associated locks (seqcount_LOCKTYPE_t)
+ *
+ * A sequence counter which associates the lock used for writer
+ * serialization at initialization time. This enables lockdep to validate
+ * that the write side critical section is properly serialized.
+ *
+ * For associated locks which do not implicitly disable preemption,
+ * preemption protection is enforced in the write side function.
+ *
+ * Lockdep is never used in any for the raw write variants.
+ *
+ * See Documentation/locking/seqlock.rst
+ */
+
+#ifdef CONFIG_LOCKDEP
+#define __SEQ_LOCKDEP(expr)	expr
+#else
+#define __SEQ_LOCKDEP(expr)
+#endif
+
+#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) {			\
+	.seqcount		= SEQCNT_ZERO(seq_name.seqcount),	\
+	__SEQ_LOCKDEP(.lock	= (assoc_lock))				\
+}
+
+#define seqcount_locktype_init(s, assoc_lock)				\
+do {									\
+	seqcount_init(&(s)->seqcount);					\
+	__SEQ_LOCKDEP((s)->lock = (assoc_lock));			\
+} while (0)
+
+/**
+ * typedef seqcount_spinlock_t - sequence counter with spinlock associated
+ * @seqcount:	The real sequence counter
+ * @lock:	Pointer to the associated spinlock
+ *
+ * A plain sequence counter with external writer synchronization by a
+ * spinlock. The spinlock is associated to the sequence count in the
+ * static initializer or init function. This enables lockdep to validate
+ * that the write side critical section is properly serialized.
+ */
+typedef struct seqcount_spinlock {
+	seqcount_t	seqcount;
+	__SEQ_LOCKDEP(spinlock_t	*lock);
+} seqcount_spinlock_t;
+
+/**
+ * SEQCNT_SPINLOCK_ZERO - static initializer for seqcount_spinlock_t
+ * @name:	Name of the seqcount_spinlock_t instance
+ * @lock:	Pointer to the associated spinlock
+ */
+#define SEQCNT_SPINLOCK_ZERO(name, lock)				\
+	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
+
+/**
+ * seqcount_spinlock_init - runtime initializer for seqcount_spinlock_t
+ * @s:		Pointer to the seqcount_spinlock_t instance
+ * @lock:	Pointer to the associated spinlock
+ */
+#define seqcount_spinlock_init(s, lock)					\
+	seqcount_locktype_init(s, lock)
+
+/**
+ * typedef seqcount_raw_spinlock_t - sequence count with raw spinlock associated
+ * @seqcount:	The real sequence counter
+ * @lock:	Pointer to the associated raw spinlock
+ *
+ * A plain sequence counter with external writer synchronization by a
+ * raw spinlock. The raw spinlock is associated to the sequence count in
+ * the static initializer or init function. This enables lockdep to
+ * validate that the write side critical section is properly serialized.
+ */
+typedef struct seqcount_raw_spinlock {
+	seqcount_t      seqcount;
+	__SEQ_LOCKDEP(raw_spinlock_t	*lock);
+} seqcount_raw_spinlock_t;
+
+/**
+ * SEQCNT_RAW_SPINLOCK_ZERO - static initializer for seqcount_raw_spinlock_t
+ * @name:	Name of the seqcount_raw_spinlock_t instance
+ * @lock:	Pointer to the associated raw_spinlock
+ */
+#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock)				\
+	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
+
+/**
+ * seqcount_raw_spinlock_init - runtime initializer for seqcount_raw_spinlock_t
+ * @s:		Pointer to the seqcount_raw_spinlock_t instance
+ * @lock:	Pointer to the associated raw_spinlock
+ */
+#define seqcount_raw_spinlock_init(s, lock)				\
+	seqcount_locktype_init(s, lock)
+
+/**
+ * typedef seqcount_rwlock_t - sequence count with rwlock associated
+ * @seqcount:	The real sequence counter
+ * @lock:	Pointer to the associated rwlock
+ *
+ * A plain sequence counter with external writer synchronization by a
+ * rwlock. The rwlock is associated to the sequence count in the static
+ * initializer or init function. This enables lockdep to validate that
+ * the write side critical section is properly serialized.
+ */
+typedef struct seqcount_rwlock {
+	seqcount_t      seqcount;
+	__SEQ_LOCKDEP(rwlock_t		*lock);
+} seqcount_rwlock_t;
+
+/**
+ * SEQCNT_RWLOCK_ZERO - static initializer for seqcount_rwlock_t
+ * @name:	Name of the seqcount_rwlock_t instance
+ * @lock:	Pointer to the associated rwlock
+ */
+#define SEQCNT_RWLOCK_ZERO(name, lock)					\
+	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
+
+/**
+ * seqcount_rwlock_init - runtime initializer for seqcount_rwlock_t
+ * @s:		Pointer to the seqcount_rwlock_t instance
+ * @lock:	Pointer to the associated rwlock
+ */
+#define seqcount_rwlock_init(s, lock)					\
+	seqcount_locktype_init(s, lock)
+
+/**
+ * typedef seqcount_mutex_t - sequence count with mutex associated
+ * @seqcount:	The real sequence counter
+ * @lock:	Pointer to the associated mutex
+ *
+ * A plain sequence counter with external writer synchronization by a
+ * mutex. The mutex is associated to the sequence counter in the static
+ * initializer or init function. This enables lockdep to validate that
+ * the write side critical section is properly serialized.
+ *
+ * The write side API functions write_seqcount_begin()/end() automatically
+ * disable and enable preemption when used with seqcount_mutex_t.
+ */
+typedef struct seqcount_mutex {
+	seqcount_t      seqcount;
+	__SEQ_LOCKDEP(struct mutex	*lock);
+} seqcount_mutex_t;
+
+/**
+ * SEQCNT_MUTEX_ZERO - static initializer for seqcount_mutex_t
+ * @name:	Name of the seqcount_mutex_t instance
+ * @lock:	Pointer to the associated mutex
+ */
+#define SEQCNT_MUTEX_ZERO(name, lock)					\
+	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
+
+/**
+ * seqcount_mutex_init - runtime initializer for seqcount_mutex_t
+ * @s:		Pointer to the seqcount_mutex_t instance
+ * @lock:	Pointer to the associated mutex
+ */
+#define seqcount_mutex_init(s, lock)					\
+	seqcount_locktype_init(s, lock)
+
+/**
+ * typedef seqcount_ww_mutex_t - sequence count with ww_mutex associated
+ * @seqcount:	The real sequence counter
+ * @lock:	Pointer to the associated ww_mutex
+ *
+ * A plain sequence counter with external writer synchronization by a
+ * ww_mutex. The ww_mutex is associated to the sequence counter in the static
+ * initializer or init function. This enables lockdep to validate that
+ * the write side critical section is properly serialized.
+ *
+ * The write side API functions write_seqcount_begin()/end() automatically
+ * disable and enable preemption when used with seqcount_ww_mutex_t.
+ */
+typedef struct seqcount_ww_mutex {
+	seqcount_t      seqcount;
+	__SEQ_LOCKDEP(struct ww_mutex	*lock);
+} seqcount_ww_mutex_t;
+
+/**
+ * SEQCNT_WW_MUTEX_ZERO - static initializer for seqcount_ww_mutex_t
+ * @name:	Name of the seqcount_ww_mutex_t instance
+ * @lock:	Pointer to the associated ww_mutex
+ */
+#define SEQCNT_WW_MUTEX_ZERO(name, lock)				\
+	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
+
+/**
+ * seqcount_ww_mutex_init - runtime initializer for seqcount_ww_mutex_t
+ * @s:		Pointer to the seqcount_ww_mutex_t instance
+ * @lock:	Pointer to the associated ww_mutex
+ */
+#define seqcount_ww_mutex_init(s, lock)					\
+	seqcount_locktype_init(s, lock)
+
+/*
+ * @preempt: Is the associated write serialization lock preemtpible?
+ */
+#define SEQCOUNT_LOCKTYPE(locktype, preempt, lockmember)		\
+static inline seqcount_t *						\
+__seqcount_##locktype##_ptr(seqcount_##locktype##_t *s)			\
+{									\
+	return &s->seqcount;						\
+}									\
+									\
+static inline bool							\
+__seqcount_##locktype##_preemptible(seqcount_##locktype##_t *s)		\
+{									\
+	return preempt;							\
+}									\
+									\
+static inline void							\
+__seqcount_##locktype##_assert(seqcount_##locktype##_t *s)		\
+{									\
+	__SEQ_LOCKDEP(lockdep_assert_held(lockmember));			\
+}
+
+/*
+ * Similar hooks, but for plain seqcount_t
+ */
+
+static inline seqcount_t *__seqcount_ptr(seqcount_t *s)
+{
+	return s;
+}
+
+static inline bool __seqcount_preemptible(seqcount_t *s)
+{
+	return false;
+}
+
+static inline void __seqcount_assert(seqcount_t *s)
+{
+	lockdep_assert_preemption_disabled();
+}
+
+/*
+ * @s: Pointer to seqcount_locktype_t, generated hooks first parameter.
+ */
+SEQCOUNT_LOCKTYPE(raw_spinlock,	false,	s->lock)
+SEQCOUNT_LOCKTYPE(spinlock,	false,	s->lock)
+SEQCOUNT_LOCKTYPE(rwlock,	false,	s->lock)
+SEQCOUNT_LOCKTYPE(mutex,	true,	s->lock)
+SEQCOUNT_LOCKTYPE(ww_mutex,	true,	&s->lock->base)
+
+#define __seqprop_case(s, locktype, prop)				\
+	seqcount_##locktype##_t: __seqcount_##locktype##_##prop((void *)(s))
+
+#define __seqprop(s, prop) _Generic(*(s),				\
+	seqcount_t:		__seqcount_##prop((void *)(s)),		\
+	__seqprop_case((s),	raw_spinlock,	prop),			\
+	__seqprop_case((s),	spinlock,	prop),			\
+	__seqprop_case((s),	rwlock,		prop),			\
+	__seqprop_case((s),	mutex,		prop),			\
+	__seqprop_case((s),	ww_mutex,	prop))
+
+#define __to_seqcount_t(s)				__seqprop(s, ptr)
+#define __associated_lock_exists_and_is_preemptible(s)	__seqprop(s, preemptible)
+#define __assert_write_section_is_protected(s)		__seqprop(s, assert)
+
 /**
  * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -122,7 +388,10 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  *
  * Return: count to be passed to read_seqcount_retry()
  */
-static inline unsigned __read_seqcount_begin(const seqcount_t *s)
+#define __read_seqcount_begin(s)					\
+	__read_seqcount_t_begin(__to_seqcount_t(s))
+
+static inline unsigned __read_seqcount_t_begin(const seqcount_t *s)
 {
 	unsigned ret;
 
@@ -138,32 +407,38 @@ repeat:
 
 /**
  * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * Return: count to be passed to read_seqcount_retry()
  */
-static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
+#define raw_read_seqcount_begin(s)					\
+	raw_read_seqcount_t_begin(__to_seqcount_t(s))
+
+static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
 {
-	unsigned ret = __read_seqcount_begin(s);
+	unsigned ret = __read_seqcount_t_begin(s);
 	smp_rmb();
 	return ret;
 }
 
 /**
  * read_seqcount_begin() - begin a seqcount_t read critical section
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * Return: count to be passed to read_seqcount_retry()
  */
-static inline unsigned read_seqcount_begin(const seqcount_t *s)
+#define read_seqcount_begin(s)						\
+	read_seqcount_t_begin(__to_seqcount_t(s))
+
+static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
 {
 	seqcount_lockdep_reader_access(s);
-	return raw_read_seqcount_begin(s);
+	return raw_read_seqcount_t_begin(s);
 }
 
 /**
  * raw_read_seqcount() - read the raw seqcount_t counter value
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * raw_read_seqcount opens a read critical section of the given
  * seqcount_t, without any lockdep checking, and without checking or
@@ -172,7 +447,10 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
  *
  * Return: count to be passed to read_seqcount_retry()
  */
-static inline unsigned raw_read_seqcount(const seqcount_t *s)
+#define raw_read_seqcount(s)						\
+	raw_read_seqcount_t(__to_seqcount_t(s))
+
+static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
 {
 	unsigned ret = READ_ONCE(s->sequence);
 	smp_rmb();
@@ -183,7 +461,7 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s)
 /**
  * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
  *                        lockdep and w/o counter stabilization
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * raw_seqcount_begin opens a read critical section of the given
  * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
@@ -197,18 +475,21 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s)
  *
  * Return: count to be passed to read_seqcount_retry()
  */
-static inline unsigned raw_seqcount_begin(const seqcount_t *s)
+#define raw_seqcount_begin(s)						\
+	raw_seqcount_t_begin(__to_seqcount_t(s))
+
+static inline unsigned raw_seqcount_t_begin(const seqcount_t *s)
 {
 	/*
 	 * If the counter is odd, let read_seqcount_retry() fail
 	 * by decrementing the counter.
 	 */
-	return raw_read_seqcount(s) & ~1;
+	return raw_read_seqcount_t(s) & ~1;
 }
 
 /**
  * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  * @start: count, from read_seqcount_begin()
  *
  * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
@@ -221,7 +502,10 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s)
  *
  * Return: true if a read section retry is required, else false
  */
-static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
+#define __read_seqcount_retry(s, start)					\
+	__read_seqcount_t_retry(__to_seqcount_t(s), start)
+
+static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 {
 	kcsan_atomic_next(0);
 	return unlikely(READ_ONCE(s->sequence) != start);
@@ -229,7 +513,7 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
 
 /**
  * read_seqcount_retry() - end a seqcount_t read critical section
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  * @start: count, from read_seqcount_begin()
  *
  * read_seqcount_retry closes the read critical section of given
@@ -238,17 +522,28 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
  *
  * Return: true if a read section retry is required, else false
  */
-static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
+#define read_seqcount_retry(s, start)					\
+	read_seqcount_t_retry(__to_seqcount_t(s), start)
+
+static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 {
 	smp_rmb();
-	return __read_seqcount_retry(s, start);
+	return __read_seqcount_t_retry(s, start);
 }
 
 /**
  * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  */
-static inline void raw_write_seqcount_begin(seqcount_t *s)
+#define raw_write_seqcount_begin(s)					\
+do {									\
+	if (__associated_lock_exists_and_is_preemptible(s))		\
+		preempt_disable();					\
+									\
+	raw_write_seqcount_t_begin(__to_seqcount_t(s));			\
+} while (0)
+
+static inline void raw_write_seqcount_t_begin(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
 	s->sequence++;
@@ -257,49 +552,50 @@ static inline void raw_write_seqcount_begin(seqcount_t *s)
 
 /**
  * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  */
-static inline void raw_write_seqcount_end(seqcount_t *s)
+#define raw_write_seqcount_end(s)					\
+do {									\
+	raw_write_seqcount_t_end(__to_seqcount_t(s));			\
+									\
+	if (__associated_lock_exists_and_is_preemptible(s))		\
+		preempt_enable();					\
+} while (0)
+
+static inline void raw_write_seqcount_t_end(seqcount_t *s)
 {
 	smp_wmb();
 	s->sequence++;
 	kcsan_nestable_atomic_end();
 }
 
-static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass)
-{
-	raw_write_seqcount_begin(s);
-	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
-}
-
 /**
  * write_seqcount_begin_nested() - start a seqcount_t write section with
  *                                 custom lockdep nesting level
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  * @subclass: lockdep nesting level
  *
  * See Documentation/locking/lockdep-design.rst
  */
-static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
-{
-	lockdep_assert_preemption_disabled();
-	__write_seqcount_begin_nested(s, subclass);
-}
-
-/*
- * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks.
- *
- * Use for internal seqlock.h code where it's known that preemption is
- * already disabled. For example, seqlock_t write side functions.
- */
-static inline void __write_seqcount_begin(seqcount_t *s)
+#define write_seqcount_begin_nested(s, subclass)			\
+do {									\
+	__assert_write_section_is_protected(s);				\
+									\
+	if (__associated_lock_exists_and_is_preemptible(s))		\
+		preempt_disable();					\
+									\
+	write_seqcount_t_begin_nested(__to_seqcount_t(s), subclass);	\
+} while (0)
+
+static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
 {
-	__write_seqcount_begin_nested(s, 0);
+	raw_write_seqcount_t_begin(s);
+	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
 }
 
 /**
  * write_seqcount_begin() - start a seqcount_t write side critical section
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * write_seqcount_begin opens a write side critical section of the given
  * seqcount_t.
@@ -308,26 +604,44 @@ static inline void __write_seqcount_begin(seqcount_t *s)
  * non-preemptible. If readers can be invoked from hardirq or softirq
  * context, interrupts or bottom halves must be respectively disabled.
  */
-static inline void write_seqcount_begin(seqcount_t *s)
+#define write_seqcount_begin(s)						\
+do {									\
+	__assert_write_section_is_protected(s);				\
+									\
+	if (__associated_lock_exists_and_is_preemptible(s))		\
+		preempt_disable();					\
+									\
+	write_seqcount_t_begin(__to_seqcount_t(s));			\
+} while (0)
+
+static inline void write_seqcount_t_begin(seqcount_t *s)
 {
-	write_seqcount_begin_nested(s, 0);
+	write_seqcount_t_begin_nested(s, 0);
 }
 
 /**
  * write_seqcount_end() - end a seqcount_t write side critical section
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * The write section must've been opened with write_seqcount_begin().
  */
-static inline void write_seqcount_end(seqcount_t *s)
+#define write_seqcount_end(s)						\
+do {									\
+	write_seqcount_t_end(__to_seqcount_t(s));			\
+									\
+	if (__associated_lock_exists_and_is_preemptible(s))		\
+		preempt_enable();					\
+} while (0)
+
+static inline void write_seqcount_t_end(seqcount_t *s)
 {
 	seqcount_release(&s->dep_map, _RET_IP_);
-	raw_write_seqcount_end(s);
+	raw_write_seqcount_t_end(s);
 }
 
 /**
  * raw_write_seqcount_barrier() - do a seqcount_t write barrier
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * This can be used to provide an ordering guarantee instead of the usual
  * consistency guarantee. It is one wmb cheaper, because it can collapse
@@ -366,7 +680,10 @@ static inline void write_seqcount_end(seqcount_t *s)
  *		WRITE_ONCE(X, false);
  *      }
  */
-static inline void raw_write_seqcount_barrier(seqcount_t *s)
+#define raw_write_seqcount_barrier(s)					\
+	raw_write_seqcount_t_barrier(__to_seqcount_t(s))
+
+static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
 	s->sequence++;
@@ -378,12 +695,15 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s)
 /**
  * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
  *                               side operations
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * After write_seqcount_invalidate, no seqcount_t read side operations
  * will complete successfully and see data older than this.
  */
-static inline void write_seqcount_invalidate(seqcount_t *s)
+#define write_seqcount_invalidate(s)					\
+	write_seqcount_t_invalidate(__to_seqcount_t(s))
+
+static inline void write_seqcount_t_invalidate(seqcount_t *s)
 {
 	smp_wmb();
 	kcsan_nestable_atomic_begin();
@@ -393,7 +713,7 @@ static inline void write_seqcount_invalidate(seqcount_t *s)
 
 /**
  * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * Use seqcount_t latching to switch between two storage places protected
  * by a sequence counter. Doing so allows having interruptible, preemptible,
@@ -406,7 +726,10 @@ static inline void write_seqcount_invalidate(seqcount_t *s)
  * picking which data copy to read. The full counter value must then be
  * checked with read_seqcount_retry().
  */
-static inline int raw_read_seqcount_latch(seqcount_t *s)
+#define raw_read_seqcount_latch(s)					\
+	raw_read_seqcount_t_latch(__to_seqcount_t(s))
+
+static inline int raw_read_seqcount_t_latch(seqcount_t *s)
 {
 	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
 	int seq = READ_ONCE(s->sequence); /* ^^^ */
@@ -415,7 +738,7 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
 
 /**
  * raw_write_seqcount_latch() - redirect readers to even/odd copy
- * @s: Pointer to seqcount_t
+ * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
  *
  * The latch technique is a multiversion concurrency control method that allows
  * queries during non-atomic modifications. If you can guarantee queries never
@@ -494,7 +817,10 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
  *	When data is a dynamic data structure; one should use regular RCU
  *	patterns to manage the lifetimes of the objects within.
  */
-static inline void raw_write_seqcount_latch(seqcount_t *s)
+#define raw_write_seqcount_latch(s)					\
+	raw_write_seqcount_t_latch(__to_seqcount_t(s))
+
+static inline void raw_write_seqcount_t_latch(seqcount_t *s)
 {
        smp_wmb();      /* prior stores before incrementing "sequence" */
        s->sequence++;
@@ -592,7 +918,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 static inline void write_seqlock(seqlock_t *sl)
 {
 	spin_lock(&sl->lock);
-	__write_seqcount_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount);
 }
 
 /**
@@ -604,7 +930,7 @@ static inline void write_seqlock(seqlock_t *sl)
  */
 static inline void write_sequnlock(seqlock_t *sl)
 {
-	write_seqcount_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount);
 	spin_unlock(&sl->lock);
 }
 
@@ -618,7 +944,7 @@ static inline void write_sequnlock(seqlock_t *sl)
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
-	__write_seqcount_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount);
 }
 
 /**
@@ -631,7 +957,7 @@ static inline void write_seqlock_bh(seqlock_t *sl)
  */
 static inline void write_sequnlock_bh(seqlock_t *sl)
 {
-	write_seqcount_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount);
 	spin_unlock_bh(&sl->lock);
 }
 
@@ -645,7 +971,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
-	__write_seqcount_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount);
 }
 
 /**
@@ -657,7 +983,7 @@ static inline void write_seqlock_irq(seqlock_t *sl)
  */
 static inline void write_sequnlock_irq(seqlock_t *sl)
 {
-	write_seqcount_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount);
 	spin_unlock_irq(&sl->lock);
 }
 
@@ -666,7 +992,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sl->lock, flags);
-	__write_seqcount_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount);
 	return flags;
 }
 
@@ -695,13 +1021,13 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 static inline void
 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 {
-	write_seqcount_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount);
 	spin_unlock_irqrestore(&sl->lock, flags);
 }
 
 /**
  * read_seqlock_excl() - begin a seqlock_t locking reader section
- * @sl: Pointer to seqlock_t
+ * @sl:	Pointer to seqlock_t
  *
  * read_seqlock_excl opens a seqlock_t locking reader critical section.  A
  * locking reader exclusively locks out *both* other writers *and* other

^ permalink raw reply	[relevance 28%]

* [tip: locking/core] seqlock: Reorder seqcount_t and seqlock_t API definitions
  2020-07-20 15:55 70%   ` [PATCH v4 04/24] seqlock: Reorder seqcount_t and seqlock_t API definitions Ahmed S. Darwish
@ 2020-07-29 14:33 55%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     f4a27cbcec90ac04ee60e04b222e1449dcdba0bd
Gitweb:        https://git.kernel.org/tip/f4a27cbcec90ac04ee60e04b222e1449dcdba0bd
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:10 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:23 +02:00

seqlock: Reorder seqcount_t and seqlock_t API definitions

The seqlock.h seqcount_t and seqlock_t API definitions are presented in
the chronological order of their development rather than the order that
makes most sense to readers. This makes it hard to follow and understand
the header file code.

Group and reorder all of the exported seqlock.h functions according to
their function.

First, group together the seqcount_t standard read path functions:

    - __read_seqcount_begin()
    - raw_read_seqcount_begin()
    - read_seqcount_begin()

since each function is implemented exactly in terms of the one above
it. Then, group the special-case seqcount_t readers on their own as:

    - raw_read_seqcount()
    - raw_seqcount_begin()

since the only difference between the two functions is that the second
one masks the sequence counter LSB while the first one does not. Note
that raw_seqcount_begin() can actually be implemented in terms of
raw_read_seqcount(), which will be done in a follow-up commit.

Then, group the seqcount_t write path functions, instead of injecting
unrelated seqcount_t latch functions between them, and order them as:

    - raw_write_seqcount_begin()
    - raw_write_seqcount_end()
    - write_seqcount_begin_nested()
    - write_seqcount_begin()
    - write_seqcount_end()
    - raw_write_seqcount_barrier()
    - write_seqcount_invalidate()

which is the expected natural order. This also isolates the seqcount_t
latch functions into their own area, at the end of the sequence counters
section, and before jumping to the next one: sequential locks
(seqlock_t).

Do a similar grouping and reordering for seqlock_t "locking" readers vs.
the "conditionally locking or lockless" ones.

No implementation code was changed in any of the reordering above.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-5-a.darwish@linutronix.de
---
 include/linux/seqlock.h | 158 +++++++++++++++++++--------------------
 1 file changed, 78 insertions(+), 80 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index d724b5e..4c14560 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -129,23 +129,6 @@ repeat:
 }
 
 /**
- * raw_read_seqcount - Read the raw seqcount
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
- *
- * raw_read_seqcount opens a read critical section of the given
- * seqcount without any lockdep checking and without checking or
- * masking the LSB. Calling code is responsible for handling that.
- */
-static inline unsigned raw_read_seqcount(const seqcount_t *s)
-{
-	unsigned ret = READ_ONCE(s->sequence);
-	smp_rmb();
-	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
-	return ret;
-}
-
-/**
  * raw_read_seqcount_begin - start seq-read critical section w/o lockdep
  * @s: pointer to seqcount_t
  * Returns: count to be passed to read_seqcount_retry
@@ -177,6 +160,23 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
 }
 
 /**
+ * raw_read_seqcount - Read the raw seqcount
+ * @s: pointer to seqcount_t
+ * Returns: count to be passed to read_seqcount_retry
+ *
+ * raw_read_seqcount opens a read critical section of the given
+ * seqcount without any lockdep checking and without checking or
+ * masking the LSB. Calling code is responsible for handling that.
+ */
+static inline unsigned raw_read_seqcount(const seqcount_t *s)
+{
+	unsigned ret = READ_ONCE(s->sequence);
+	smp_rmb();
+	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
+	return ret;
+}
+
+/**
  * raw_seqcount_begin - begin a seq-read critical section
  * @s: pointer to seqcount_t
  * Returns: count to be passed to read_seqcount_retry
@@ -234,8 +234,6 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
 	return __read_seqcount_retry(s, start);
 }
 
-
-
 static inline void raw_write_seqcount_begin(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
@@ -250,6 +248,23 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
+static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
+{
+	raw_write_seqcount_begin(s);
+	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+}
+
+static inline void write_seqcount_begin(seqcount_t *s)
+{
+	write_seqcount_begin_nested(s, 0);
+}
+
+static inline void write_seqcount_end(seqcount_t *s)
+{
+	seqcount_release(&s->dep_map, _RET_IP_);
+	raw_write_seqcount_end(s);
+}
+
 /**
  * raw_write_seqcount_barrier - do a seq write barrier
  * @s: pointer to seqcount_t
@@ -300,6 +315,21 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
+/**
+ * write_seqcount_invalidate - invalidate in-progress read-side seq operations
+ * @s: pointer to seqcount_t
+ *
+ * After write_seqcount_invalidate, no read-side seq operations will complete
+ * successfully and see data older than this.
+ */
+static inline void write_seqcount_invalidate(seqcount_t *s)
+{
+	smp_wmb();
+	kcsan_nestable_atomic_begin();
+	s->sequence+=2;
+	kcsan_nestable_atomic_end();
+}
+
 static inline int raw_read_seqcount_latch(seqcount_t *s)
 {
 	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
@@ -395,38 +425,6 @@ static inline void raw_write_seqcount_latch(seqcount_t *s)
        smp_wmb();      /* increment "sequence" before following stores */
 }
 
-static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
-{
-	raw_write_seqcount_begin(s);
-	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
-}
-
-static inline void write_seqcount_begin(seqcount_t *s)
-{
-	write_seqcount_begin_nested(s, 0);
-}
-
-static inline void write_seqcount_end(seqcount_t *s)
-{
-	seqcount_release(&s->dep_map, _RET_IP_);
-	raw_write_seqcount_end(s);
-}
-
-/**
- * write_seqcount_invalidate - invalidate in-progress read-side seq operations
- * @s: pointer to seqcount_t
- *
- * After write_seqcount_invalidate, no read-side seq operations will complete
- * successfully and see data older than this.
- */
-static inline void write_seqcount_invalidate(seqcount_t *s)
-{
-	smp_wmb();
-	kcsan_nestable_atomic_begin();
-	s->sequence+=2;
-	kcsan_nestable_atomic_end();
-}
-
 /*
  * Sequential locks (seqlock_t)
  *
@@ -555,35 +553,6 @@ static inline void read_sequnlock_excl(seqlock_t *sl)
 	spin_unlock(&sl->lock);
 }
 
-/**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
- *
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
- */
-static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
-{
-	if (!(*seq & 1))	/* Even */
-		*seq = read_seqbegin(lock);
-	else			/* Odd */
-		read_seqlock_excl(lock);
-}
-
-static inline int need_seqretry(seqlock_t *lock, int seq)
-{
-	return !(seq & 1) && read_seqretry(lock, seq);
-}
-
-static inline void done_seqretry(seqlock_t *lock, int seq)
-{
-	if (seq & 1)
-		read_sequnlock_excl(lock);
-}
-
 static inline void read_seqlock_excl_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
@@ -621,6 +590,35 @@ read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
 	spin_unlock_irqrestore(&sl->lock, flags);
 }
 
+/**
+ * read_seqbegin_or_lock - begin a sequence number check or locking block
+ * @lock: sequence lock
+ * @seq : sequence number to be checked
+ *
+ * First try it once optimistically without taking the lock. If that fails,
+ * take the lock. The sequence number is also used as a marker for deciding
+ * whether to be a reader (even) or writer (odd).
+ * N.B. seq must be initialized to an even number to begin with.
+ */
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
+{
+	if (!(*seq & 1))	/* Even */
+		*seq = read_seqbegin(lock);
+	else			/* Odd */
+		read_seqlock_excl(lock);
+}
+
+static inline int need_seqretry(seqlock_t *lock, int seq)
+{
+	return !(seq & 1) && read_seqretry(lock, seq);
+}
+
+static inline void done_seqretry(seqlock_t *lock, int seq)
+{
+	if (seq & 1)
+		read_sequnlock_excl(lock);
+}
+
 static inline unsigned long
 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
 {

^ permalink raw reply	[relevance 55%]

* [tip: locking/core] seqlock: seqcount_t latch: End read sections with read_seqcount_retry()
  @ 2020-07-29 14:33 86% ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     d3b35b87f436c1b226a8061bee9c8875ba6658bd
Gitweb:        https://git.kernel.org/tip/d3b35b87f436c1b226a8061bee9c8875ba6658bd
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:09 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:23 +02:00

seqlock: seqcount_t latch: End read sections with read_seqcount_retry()

The seqcount_t latch reader example at the raw_write_seqcount_latch()
kernel-doc comment ends the latch read section with a manual smp memory
barrier and sequence counter comparison.

This is technically correct, but it is suboptimal: read_seqcount_retry()
already contains the same logic of an smp memory barrier and sequence
counter comparison.

End the latch read critical section example with read_seqcount_retry().

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-4-a.darwish@linutronix.de
---
 include/linux/seqlock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 6c4f68e..d724b5e 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -363,8 +363,8 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
  *			idx = seq & 0x01;
  *			entry = data_query(latch->data[idx], ...);
  *
- *			smp_rmb();
- *		} while (seq != latch->seq);
+ *		// read_seqcount_retry() includes needed smp_rmb()
+ *		} while (read_seqcount_retry(&latch->seq, seq));
  *
  *		return entry;
  *	}

^ permalink raw reply	[relevance 86%]

* [tip: locking/core] seqlock: Properly format kernel-doc code samples
  2020-07-20 15:55 75%   ` [PATCH v4 02/24] seqlock: Properly format kernel-doc code samples Ahmed S. Darwish
@ 2020-07-29 14:33 62%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     15cbe67bbd3adeb4854c42713dbeaf2ff876beee
Gitweb:        https://git.kernel.org/tip/15cbe67bbd3adeb4854c42713dbeaf2ff876beee
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:08 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:23 +02:00

seqlock: Properly format kernel-doc code samples

Align the code samples and note sections inside kernel-doc comments with
tabs. This way they can be properly parsed and rendered by Sphinx. It
also makes the code samples easier to read from text editors.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-3-a.darwish@linutronix.de
---
 include/linux/seqlock.h | 108 ++++++++++++++++++++-------------------
 1 file changed, 56 insertions(+), 52 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 299d68f..6c4f68e 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -263,32 +263,32 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
  * atomically, avoiding compiler optimizations; b) to document which writes are
  * meant to propagate to the reader critical section. This is necessary because
  * neither writes before and after the barrier are enclosed in a seq-writer
- * critical section that would ensure readers are aware of ongoing writes.
+ * critical section that would ensure readers are aware of ongoing writes::
  *
- *      seqcount_t seq;
- *      bool X = true, Y = false;
+ *	seqcount_t seq;
+ *	bool X = true, Y = false;
  *
- *      void read(void)
- *      {
- *              bool x, y;
+ *	void read(void)
+ *	{
+ *		bool x, y;
  *
- *              do {
- *                      int s = read_seqcount_begin(&seq);
+ *		do {
+ *			int s = read_seqcount_begin(&seq);
  *
- *                      x = X; y = Y;
+ *			x = X; y = Y;
  *
- *              } while (read_seqcount_retry(&seq, s));
+ *		} while (read_seqcount_retry(&seq, s));
  *
- *              BUG_ON(!x && !y);
+ *		BUG_ON(!x && !y);
  *      }
  *
  *      void write(void)
  *      {
- *              WRITE_ONCE(Y, true);
+ *		WRITE_ONCE(Y, true);
  *
- *              raw_write_seqcount_barrier(seq);
+ *		raw_write_seqcount_barrier(seq);
  *
- *              WRITE_ONCE(X, false);
+ *		WRITE_ONCE(X, false);
  *      }
  */
 static inline void raw_write_seqcount_barrier(seqcount_t *s)
@@ -325,64 +325,68 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
  * Very simply put: we first modify one copy and then the other. This ensures
  * there is always one copy in a stable state, ready to give us an answer.
  *
- * The basic form is a data structure like:
+ * The basic form is a data structure like::
  *
- * struct latch_struct {
- *	seqcount_t		seq;
- *	struct data_struct	data[2];
- * };
+ *	struct latch_struct {
+ *		seqcount_t		seq;
+ *		struct data_struct	data[2];
+ *	};
  *
  * Where a modification, which is assumed to be externally serialized, does the
- * following:
+ * following::
  *
- * void latch_modify(struct latch_struct *latch, ...)
- * {
- *	smp_wmb();	<- Ensure that the last data[1] update is visible
- *	latch->seq++;
- *	smp_wmb();	<- Ensure that the seqcount update is visible
+ *	void latch_modify(struct latch_struct *latch, ...)
+ *	{
+ *		smp_wmb();	// Ensure that the last data[1] update is visible
+ *		latch->seq++;
+ *		smp_wmb();	// Ensure that the seqcount update is visible
  *
- *	modify(latch->data[0], ...);
+ *		modify(latch->data[0], ...);
  *
- *	smp_wmb();	<- Ensure that the data[0] update is visible
- *	latch->seq++;
- *	smp_wmb();	<- Ensure that the seqcount update is visible
+ *		smp_wmb();	// Ensure that the data[0] update is visible
+ *		latch->seq++;
+ *		smp_wmb();	// Ensure that the seqcount update is visible
  *
- *	modify(latch->data[1], ...);
- * }
+ *		modify(latch->data[1], ...);
+ *	}
  *
- * The query will have a form like:
+ * The query will have a form like::
  *
- * struct entry *latch_query(struct latch_struct *latch, ...)
- * {
- *	struct entry *entry;
- *	unsigned seq, idx;
+ *	struct entry *latch_query(struct latch_struct *latch, ...)
+ *	{
+ *		struct entry *entry;
+ *		unsigned seq, idx;
  *
- *	do {
- *		seq = raw_read_seqcount_latch(&latch->seq);
+ *		do {
+ *			seq = raw_read_seqcount_latch(&latch->seq);
  *
- *		idx = seq & 0x01;
- *		entry = data_query(latch->data[idx], ...);
+ *			idx = seq & 0x01;
+ *			entry = data_query(latch->data[idx], ...);
  *
- *		smp_rmb();
- *	} while (seq != latch->seq);
+ *			smp_rmb();
+ *		} while (seq != latch->seq);
  *
- *	return entry;
- * }
+ *		return entry;
+ *	}
  *
  * So during the modification, queries are first redirected to data[1]. Then we
  * modify data[0]. When that is complete, we redirect queries back to data[0]
  * and we can modify data[1].
  *
- * NOTE: The non-requirement for atomic modifications does _NOT_ include
- *       the publishing of new entries in the case where data is a dynamic
- *       data structure.
+ * NOTE:
+ *
+ *	The non-requirement for atomic modifications does _NOT_ include
+ *	the publishing of new entries in the case where data is a dynamic
+ *	data structure.
+ *
+ *	An iteration might start in data[0] and get suspended long enough
+ *	to miss an entire modification sequence, once it resumes it might
+ *	observe the new entry.
  *
- *       An iteration might start in data[0] and get suspended long enough
- *       to miss an entire modification sequence, once it resumes it might
- *       observe the new entry.
+ * NOTE:
  *
- * NOTE: When data is a dynamic data structure; one should use regular RCU
- *       patterns to manage the lifetimes of the objects within.
+ *	When data is a dynamic data structure; one should use regular RCU
+ *	patterns to manage the lifetimes of the objects within.
  */
 static inline void raw_write_seqcount_latch(seqcount_t *s)
 {

^ permalink raw reply	[relevance 62%]

* [tip: locking/core] Documentation: locking: Describe seqlock design and usage
  2020-07-20 15:55 54%   ` [PATCH v4 01/24] Documentation: locking: Describe seqlock design and usage Ahmed S. Darwish
    @ 2020-07-29 14:33 43%     ` tip-bot2 for Ahmed S. Darwish
  2 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     0d24f65e933ca89d55d17f6dbdb2a72ca88f0992
Gitweb:        https://git.kernel.org/tip/0d24f65e933ca89d55d17f6dbdb2a72ca88f0992
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:07 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:22 +02:00

Documentation: locking: Describe seqlock design and usage

Proper documentation for the design and usage of sequence counters and
sequential locks does not exist. Complete the seqlock.h documentation as
follows:

  - Divide all documentation on a seqcount_t vs. seqlock_t basis. The
    description for both mechanisms was intermingled, which is incorrect
    since the usage constrains for each type are vastly different.

  - Add an introductory paragraph describing the internal design of, and
    rationale for, sequence counters.

  - Document seqcount_t writer non-preemptibility requirement, which was
    not previously documented anywhere, and provide a clear rationale.

  - Provide template code for seqcount_t and seqlock_t initialization
    and reader/writer critical sections.

  - Recommend using seqlock_t by default. It implicitly handles the
    serialization and non-preemptibility requirements of writers.

At seqlock.h:

  - Remove references to brlocks as they've long been removed from the
    kernel.

  - Remove references to gcc-3.x since the kernel's minimum supported
    gcc version is 4.9.

References: 0f6ed63b1707 ("no need to keep brlock macros anymore...")
References: 6ec4476ac825 ("Raise gcc version requirement to 4.9")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-2-a.darwish@linutronix.de
---
 Documentation/locking/index.rst   |   1 +-
 Documentation/locking/seqlock.rst | 170 +++++++++++++++++++++++++++++-
 include/linux/seqlock.h           |  85 +++++++--------
 3 files changed, 211 insertions(+), 45 deletions(-)
 create mode 100644 Documentation/locking/seqlock.rst

diff --git a/Documentation/locking/index.rst b/Documentation/locking/index.rst
index d785878..7003bd5 100644
--- a/Documentation/locking/index.rst
+++ b/Documentation/locking/index.rst
@@ -14,6 +14,7 @@ locking
     mutex-design
     rt-mutex-design
     rt-mutex
+    seqlock
     spinlocks
     ww-mutex-design
     preempt-locking
diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst
new file mode 100644
index 0000000..366dd36
--- /dev/null
+++ b/Documentation/locking/seqlock.rst
@@ -0,0 +1,170 @@
+======================================
+Sequence counters and sequential locks
+======================================
+
+Introduction
+============
+
+Sequence counters are a reader-writer consistency mechanism with
+lockless readers (read-only retry loops), and no writer starvation. They
+are used for data that's rarely written to (e.g. system time), where the
+reader wants a consistent set of information and is willing to retry if
+that information changes.
+
+A data set is consistent when the sequence count at the beginning of the
+read side critical section is even and the same sequence count value is
+read again at the end of the critical section. The data in the set must
+be copied out inside the read side critical section. If the sequence
+count has changed between the start and the end of the critical section,
+the reader must retry.
+
+Writers increment the sequence count at the start and the end of their
+critical section. After starting the critical section the sequence count
+is odd and indicates to the readers that an update is in progress. At
+the end of the write side critical section the sequence count becomes
+even again which lets readers make progress.
+
+A sequence counter write side critical section must never be preempted
+or interrupted by read side sections. Otherwise the reader will spin for
+the entire scheduler tick due to the odd sequence count value and the
+interrupted writer. If that reader belongs to a real-time scheduling
+class, it can spin forever and the kernel will livelock.
+
+This mechanism cannot be used if the protected data contains pointers,
+as the writer can invalidate a pointer that the reader is following.
+
+
+.. _seqcount_t:
+
+Sequence counters (``seqcount_t``)
+==================================
+
+This is the the raw counting mechanism, which does not protect against
+multiple writers.  Write side critical sections must thus be serialized
+by an external lock.
+
+If the write serialization primitive is not implicitly disabling
+preemption, preemption must be explicitly disabled before entering the
+write side section. If the read section can be invoked from hardirq or
+softirq contexts, interrupts or bottom halves must also be respectively
+disabled before entering the write section.
+
+If it's desired to automatically handle the sequence counter
+requirements of writer serialization and non-preemptibility, use
+:ref:`seqlock_t` instead.
+
+Initialization::
+
+	/* dynamic */
+	seqcount_t foo_seqcount;
+	seqcount_init(&foo_seqcount);
+
+	/* static */
+	static seqcount_t foo_seqcount = SEQCNT_ZERO(foo_seqcount);
+
+	/* C99 struct init */
+	struct {
+		.seq   = SEQCNT_ZERO(foo.seq),
+	} foo;
+
+Write path::
+
+	/* Serialized context with disabled preemption */
+
+	write_seqcount_begin(&foo_seqcount);
+
+	/* ... [[write-side critical section]] ... */
+
+	write_seqcount_end(&foo_seqcount);
+
+Read path::
+
+	do {
+		seq = read_seqcount_begin(&foo_seqcount);
+
+		/* ... [[read-side critical section]] ... */
+
+	} while (read_seqcount_retry(&foo_seqcount, seq));
+
+
+.. _seqlock_t:
+
+Sequential locks (``seqlock_t``)
+================================
+
+This contains the :ref:`seqcount_t` mechanism earlier discussed, plus an
+embedded spinlock for writer serialization and non-preemptibility.
+
+If the read side section can be invoked from hardirq or softirq context,
+use the write side function variants which disable interrupts or bottom
+halves respectively.
+
+Initialization::
+
+	/* dynamic */
+	seqlock_t foo_seqlock;
+	seqlock_init(&foo_seqlock);
+
+	/* static */
+	static DEFINE_SEQLOCK(foo_seqlock);
+
+	/* C99 struct init */
+	struct {
+		.seql   = __SEQLOCK_UNLOCKED(foo.seql)
+	} foo;
+
+Write path::
+
+	write_seqlock(&foo_seqlock);
+
+	/* ... [[write-side critical section]] ... */
+
+	write_sequnlock(&foo_seqlock);
+
+Read path, three categories:
+
+1. Normal Sequence readers which never block a writer but they must
+   retry if a writer is in progress by detecting change in the sequence
+   number.  Writers do not wait for a sequence reader::
+
+	do {
+		seq = read_seqbegin(&foo_seqlock);
+
+		/* ... [[read-side critical section]] ... */
+
+	} while (read_seqretry(&foo_seqlock, seq));
+
+2. Locking readers which will wait if a writer or another locking reader
+   is in progress. A locking reader in progress will also block a writer
+   from entering its critical section. This read lock is
+   exclusive. Unlike rwlock_t, only one locking reader can acquire it::
+
+	read_seqlock_excl(&foo_seqlock);
+
+	/* ... [[read-side critical section]] ... */
+
+	read_sequnlock_excl(&foo_seqlock);
+
+3. Conditional lockless reader (as in 1), or locking reader (as in 2),
+   according to a passed marker. This is used to avoid lockless readers
+   starvation (too much retry loops) in case of a sharp spike in write
+   activity. First, a lockless read is tried (even marker passed). If
+   that trial fails (odd sequence counter is returned, which is used as
+   the next iteration marker), the lockless read is transformed to a
+   full locking read and no retry loop is necessary::
+
+	/* marker; even initialization */
+	int seq = 0;
+	do {
+		read_seqbegin_or_lock(&foo_seqlock, &seq);
+
+		/* ... [[read-side critical section]] ... */
+
+	} while (need_seqretry(&foo_seqlock, seq));
+	done_seqretry(&foo_seqlock, seq);
+
+
+API documentation
+=================
+
+.. kernel-doc:: include/linux/seqlock.h
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 8b97204..299d68f 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -1,36 +1,15 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __LINUX_SEQLOCK_H
 #define __LINUX_SEQLOCK_H
+
 /*
- * Reader/writer consistent mechanism without starving writers. This type of
- * lock for data where the reader wants a consistent set of information
- * and is willing to retry if the information changes. There are two types
- * of readers:
- * 1. Sequence readers which never block a writer but they may have to retry
- *    if a writer is in progress by detecting change in sequence number.
- *    Writers do not wait for a sequence reader.
- * 2. Locking readers which will wait if a writer or another locking reader
- *    is in progress. A locking reader in progress will also block a writer
- *    from going forward. Unlike the regular rwlock, the read lock here is
- *    exclusive so that only one locking reader can get it.
- *
- * This is not as cache friendly as brlock. Also, this may not work well
- * for data that contains pointers, because any writer could
- * invalidate a pointer that a reader was following.
- *
- * Expected non-blocking reader usage:
- * 	do {
- *	    seq = read_seqbegin(&foo);
- * 	...
- *      } while (read_seqretry(&foo, seq));
- *
- *
- * On non-SMP the spin locks disappear but the writer still needs
- * to increment the sequence variables because an interrupt routine could
- * change the state of the data.
- *
- * Based on x86_64 vsyscall gettimeofday 
- * by Keith Owens and Andrea Arcangeli
+ * seqcount_t / seqlock_t - a reader-writer consistency mechanism with
+ * lockless readers (read-only retry loops), and no writer starvation.
+ *
+ * See Documentation/locking/seqlock.rst
+ *
+ * Copyrights:
+ * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
  */
 
 #include <linux/spinlock.h>
@@ -41,8 +20,8 @@
 #include <asm/processor.h>
 
 /*
- * The seqlock interface does not prescribe a precise sequence of read
- * begin/retry/end. For readers, typically there is a call to
+ * The seqlock seqcount_t interface does not prescribe a precise sequence of
+ * read begin/retry/end. For readers, typically there is a call to
  * read_seqcount_begin() and read_seqcount_retry(), however, there are more
  * esoteric cases which do not follow this pattern.
  *
@@ -50,16 +29,30 @@
  * via seqcount_t under KCSAN: upon beginning a seq-reader critical section,
  * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as
  * atomics; if there is a matching read_seqcount_retry() call, no following
- * memory operations are considered atomic. Usage of seqlocks via seqlock_t
- * interface is not affected.
+ * memory operations are considered atomic. Usage of the seqlock_t interface
+ * is not affected.
  */
 #define KCSAN_SEQLOCK_REGION_MAX 1000
 
 /*
- * Version using sequence counter only.
- * This can be used when code has its own mutex protecting the
- * updating starting before the write_seqcountbeqin() and ending
- * after the write_seqcount_end().
+ * Sequence counters (seqcount_t)
+ *
+ * This is the raw counting mechanism, without any writer protection.
+ *
+ * Write side critical sections must be serialized and non-preemptible.
+ *
+ * If readers can be invoked from hardirq or softirq contexts,
+ * interrupts or bottom halves must also be respectively disabled before
+ * entering the write section.
+ *
+ * This mechanism can't be used if the protected data contains pointers,
+ * as the writer can invalidate a pointer that a reader is following.
+ *
+ * If it's desired to automatically handle the sequence counter writer
+ * serialization and non-preemptibility requirements, use a sequential
+ * lock (seqlock_t) instead.
+ *
+ * See Documentation/locking/seqlock.rst
  */
 typedef struct seqcount {
 	unsigned sequence;
@@ -398,10 +391,6 @@ static inline void raw_write_seqcount_latch(seqcount_t *s)
        smp_wmb();      /* increment "sequence" before following stores */
 }
 
-/*
- * Sequence counter only version assumes that callers are using their
- * own mutexing.
- */
 static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
 {
 	raw_write_seqcount_begin(s);
@@ -434,15 +423,21 @@ static inline void write_seqcount_invalidate(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
+/*
+ * Sequential locks (seqlock_t)
+ *
+ * Sequence counters with an embedded spinlock for writer serialization
+ * and non-preemptibility.
+ *
+ * For more info, see:
+ *    - Comments on top of seqcount_t
+ *    - Documentation/locking/seqlock.rst
+ */
 typedef struct {
 	struct seqcount seqcount;
 	spinlock_t lock;
 } seqlock_t;
 
-/*
- * These macros triggered gcc-3.x compile-time problems.  We think these are
- * OK now.  Be cautious.
- */
 #define __SEQLOCK_UNLOCKED(lockname)			\
 	{						\
 		.seqcount = SEQCNT_ZERO(lockname),	\

^ permalink raw reply	[relevance 43%]

* [tip: locking/core] seqlock: Add kernel-doc for seqcount_t and seqlock_t APIs
  @ 2020-07-29 14:33 28% ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     89b88845e05752b3d684eaf147f457c8dfa99c5f
Gitweb:        https://git.kernel.org/tip/89b88845e05752b3d684eaf147f457c8dfa99c5f
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:11 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:23 +02:00

seqlock: Add kernel-doc for seqcount_t and seqlock_t APIs

seqlock.h is now included by kernel's RST documentation, but a small
number of the the exported seqlock.h functions are kernel-doc annotated.

Add kernel-doc for all seqlock.h exported APIs.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-6-a.darwish@linutronix.de
---
 include/linux/seqlock.h | 425 +++++++++++++++++++++++++++++++--------
 1 file changed, 348 insertions(+), 77 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 4c14560..85fb3ac 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -75,6 +75,10 @@ static inline void __seqcount_init(seqcount_t *s, const char *name,
 # define SEQCOUNT_DEP_MAP_INIT(lockname) \
 		.dep_map = { .name = #lockname } \
 
+/**
+ * seqcount_init() - runtime initializer for seqcount_t
+ * @s: Pointer to the seqcount_t instance
+ */
 # define seqcount_init(s)				\
 	do {						\
 		static struct lock_class_key __key;	\
@@ -98,13 +102,15 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
 # define seqcount_lockdep_reader_access(x)
 #endif
 
-#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)}
-
+/**
+ * SEQCNT_ZERO() - static initializer for seqcount_t
+ * @name: Name of the seqcount_t instance
+ */
+#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
 
 /**
- * __read_seqcount_begin - begin a seq-read critical section (without barrier)
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
+ * @s: Pointer to seqcount_t
  *
  * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -113,6 +119,8 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  *
  * Use carefully, only in critical code, and comment how the barrier is
  * provided.
+ *
+ * Return: count to be passed to read_seqcount_retry()
  */
 static inline unsigned __read_seqcount_begin(const seqcount_t *s)
 {
@@ -129,13 +137,10 @@ repeat:
 }
 
 /**
- * raw_read_seqcount_begin - start seq-read critical section w/o lockdep
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
+ * @s: Pointer to seqcount_t
  *
- * raw_read_seqcount_begin opens a read critical section of the given
- * seqcount, but without any lockdep checking. Validity of the critical
- * section is tested by checking read_seqcount_retry function.
+ * Return: count to be passed to read_seqcount_retry()
  */
 static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
 {
@@ -145,13 +150,10 @@ static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
 }
 
 /**
- * read_seqcount_begin - begin a seq-read critical section
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * read_seqcount_begin() - begin a seqcount_t read critical section
+ * @s: Pointer to seqcount_t
  *
- * read_seqcount_begin opens a read critical section of the given seqcount.
- * Validity of the critical section is tested by checking read_seqcount_retry
- * function.
+ * Return: count to be passed to read_seqcount_retry()
  */
 static inline unsigned read_seqcount_begin(const seqcount_t *s)
 {
@@ -160,13 +162,15 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
 }
 
 /**
- * raw_read_seqcount - Read the raw seqcount
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * raw_read_seqcount() - read the raw seqcount_t counter value
+ * @s: Pointer to seqcount_t
  *
  * raw_read_seqcount opens a read critical section of the given
- * seqcount without any lockdep checking and without checking or
- * masking the LSB. Calling code is responsible for handling that.
+ * seqcount_t, without any lockdep checking, and without checking or
+ * masking the sequence counter LSB. Calling code is responsible for
+ * handling that.
+ *
+ * Return: count to be passed to read_seqcount_retry()
  */
 static inline unsigned raw_read_seqcount(const seqcount_t *s)
 {
@@ -177,18 +181,21 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s)
 }
 
 /**
- * raw_seqcount_begin - begin a seq-read critical section
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
+ *                        lockdep and w/o counter stabilization
+ * @s: Pointer to seqcount_t
  *
- * raw_seqcount_begin opens a read critical section of the given seqcount.
- * Validity of the critical section is tested by checking read_seqcount_retry
- * function.
+ * raw_seqcount_begin opens a read critical section of the given
+ * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
+ * for the count to stabilize. If a writer is active when it begins, it
+ * will fail the read_seqcount_retry() at the end of the read critical
+ * section instead of stabilizing at the beginning of it.
  *
- * Unlike read_seqcount_begin(), this function will not wait for the count
- * to stabilize. If a writer is active when we begin, we will fail the
- * read_seqcount_retry() instead of stabilizing at the beginning of the
- * critical section.
+ * Use this only in special kernel hot paths where the read section is
+ * small and has a high probability of success through other external
+ * means. It will save a single branching instruction.
+ *
+ * Return: count to be passed to read_seqcount_retry()
  */
 static inline unsigned raw_seqcount_begin(const seqcount_t *s)
 {
@@ -199,10 +206,9 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s)
 }
 
 /**
- * __read_seqcount_retry - end a seq-read critical section (without barrier)
- * @s: pointer to seqcount_t
- * @start: count, from read_seqcount_begin
- * Returns: 1 if retry is required, else 0
+ * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
+ * @s: Pointer to seqcount_t
+ * @start: count, from read_seqcount_begin()
  *
  * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -211,6 +217,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s)
  *
  * Use carefully, only in critical code, and comment how the barrier is
  * provided.
+ *
+ * Return: true if a read section retry is required, else false
  */
 static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
@@ -219,14 +227,15 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
 }
 
 /**
- * read_seqcount_retry - end a seq-read critical section
- * @s: pointer to seqcount_t
- * @start: count, from read_seqcount_begin
- * Returns: 1 if retry is required, else 0
+ * read_seqcount_retry() - end a seqcount_t read critical section
+ * @s: Pointer to seqcount_t
+ * @start: count, from read_seqcount_begin()
  *
- * read_seqcount_retry closes a read critical section of the given seqcount.
- * If the critical section was invalid, it must be ignored (and typically
- * retried).
+ * read_seqcount_retry closes the read critical section of given
+ * seqcount_t.  If the critical section was invalid, it must be ignored
+ * (and typically retried).
+ *
+ * Return: true if a read section retry is required, else false
  */
 static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
@@ -234,6 +243,10 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
 	return __read_seqcount_retry(s, start);
 }
 
+/**
+ * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
+ * @s: Pointer to seqcount_t
+ */
 static inline void raw_write_seqcount_begin(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
@@ -241,6 +254,10 @@ static inline void raw_write_seqcount_begin(seqcount_t *s)
 	smp_wmb();
 }
 
+/**
+ * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
+ * @s: Pointer to seqcount_t
+ */
 static inline void raw_write_seqcount_end(seqcount_t *s)
 {
 	smp_wmb();
@@ -248,17 +265,42 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
+/**
+ * write_seqcount_begin_nested() - start a seqcount_t write section with
+ *                                 custom lockdep nesting level
+ * @s: Pointer to seqcount_t
+ * @subclass: lockdep nesting level
+ *
+ * See Documentation/locking/lockdep-design.rst
+ */
 static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
 {
 	raw_write_seqcount_begin(s);
 	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
 }
 
+/**
+ * write_seqcount_begin() - start a seqcount_t write side critical section
+ * @s: Pointer to seqcount_t
+ *
+ * write_seqcount_begin opens a write side critical section of the given
+ * seqcount_t.
+ *
+ * Context: seqcount_t write side critical sections must be serialized and
+ * non-preemptible. If readers can be invoked from hardirq or softirq
+ * context, interrupts or bottom halves must be respectively disabled.
+ */
 static inline void write_seqcount_begin(seqcount_t *s)
 {
 	write_seqcount_begin_nested(s, 0);
 }
 
+/**
+ * write_seqcount_end() - end a seqcount_t write side critical section
+ * @s: Pointer to seqcount_t
+ *
+ * The write section must've been opened with write_seqcount_begin().
+ */
 static inline void write_seqcount_end(seqcount_t *s)
 {
 	seqcount_release(&s->dep_map, _RET_IP_);
@@ -266,12 +308,12 @@ static inline void write_seqcount_end(seqcount_t *s)
 }
 
 /**
- * raw_write_seqcount_barrier - do a seq write barrier
- * @s: pointer to seqcount_t
+ * raw_write_seqcount_barrier() - do a seqcount_t write barrier
+ * @s: Pointer to seqcount_t
  *
- * This can be used to provide an ordering guarantee instead of the
- * usual consistency guarantee. It is one wmb cheaper, because we can
- * collapse the two back-to-back wmb()s.
+ * This can be used to provide an ordering guarantee instead of the usual
+ * consistency guarantee. It is one wmb cheaper, because it can collapse
+ * the two back-to-back wmb()s.
  *
  * Note that writes surrounding the barrier should be declared atomic (e.g.
  * via WRITE_ONCE): a) to ensure the writes become visible to other threads
@@ -316,11 +358,12 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s)
 }
 
 /**
- * write_seqcount_invalidate - invalidate in-progress read-side seq operations
- * @s: pointer to seqcount_t
+ * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
+ *                               side operations
+ * @s: Pointer to seqcount_t
  *
- * After write_seqcount_invalidate, no read-side seq operations will complete
- * successfully and see data older than this.
+ * After write_seqcount_invalidate, no seqcount_t read side operations
+ * will complete successfully and see data older than this.
  */
 static inline void write_seqcount_invalidate(seqcount_t *s)
 {
@@ -330,6 +373,21 @@ static inline void write_seqcount_invalidate(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
+/**
+ * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
+ * @s: Pointer to seqcount_t
+ *
+ * Use seqcount_t latching to switch between two storage places protected
+ * by a sequence counter. Doing so allows having interruptible, preemptible,
+ * seqcount_t write side critical sections.
+ *
+ * Check raw_write_seqcount_latch() for more details and a full reader and
+ * writer usage example.
+ *
+ * Return: sequence counter raw value. Use the lowest bit as an index for
+ * picking which data copy to read. The full counter value must then be
+ * checked with read_seqcount_retry().
+ */
 static inline int raw_read_seqcount_latch(seqcount_t *s)
 {
 	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
@@ -338,8 +396,8 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
 }
 
 /**
- * raw_write_seqcount_latch - redirect readers to even/odd copy
- * @s: pointer to seqcount_t
+ * raw_write_seqcount_latch() - redirect readers to even/odd copy
+ * @s: Pointer to seqcount_t
  *
  * The latch technique is a multiversion concurrency control method that allows
  * queries during non-atomic modifications. If you can guarantee queries never
@@ -446,17 +504,28 @@ typedef struct {
 		.lock =	__SPIN_LOCK_UNLOCKED(lockname)	\
 	}
 
-#define seqlock_init(x)					\
+/**
+ * seqlock_init() - dynamic initializer for seqlock_t
+ * @sl: Pointer to the seqlock_t instance
+ */
+#define seqlock_init(sl)				\
 	do {						\
-		seqcount_init(&(x)->seqcount);		\
-		spin_lock_init(&(x)->lock);		\
+		seqcount_init(&(sl)->seqcount);		\
+		spin_lock_init(&(sl)->lock);		\
 	} while (0)
 
-#define DEFINE_SEQLOCK(x) \
-		seqlock_t x = __SEQLOCK_UNLOCKED(x)
+/**
+ * DEFINE_SEQLOCK() - Define a statically allocated seqlock_t
+ * @sl: Name of the seqlock_t instance
+ */
+#define DEFINE_SEQLOCK(sl) \
+		seqlock_t sl = __SEQLOCK_UNLOCKED(sl)
 
-/*
- * Read side functions for starting and finalizing a read side section.
+/**
+ * read_seqbegin() - start a seqlock_t read side critical section
+ * @sl: Pointer to seqlock_t
+ *
+ * Return: count, to be passed to read_seqretry()
  */
 static inline unsigned read_seqbegin(const seqlock_t *sl)
 {
@@ -467,6 +536,17 @@ static inline unsigned read_seqbegin(const seqlock_t *sl)
 	return ret;
 }
 
+/**
+ * read_seqretry() - end a seqlock_t read side section
+ * @sl: Pointer to seqlock_t
+ * @start: count, from read_seqbegin()
+ *
+ * read_seqretry closes the read side critical section of given seqlock_t.
+ * If the critical section was invalid, it must be ignored (and typically
+ * retried).
+ *
+ * Return: true if a read section retry is required, else false
+ */
 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 {
 	/*
@@ -478,10 +558,18 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 	return read_seqcount_retry(&sl->seqcount, start);
 }
 
-/*
- * Lock out other writers and update the count.
- * Acts like a normal spin_lock/unlock.
- * Don't need preempt_disable() because that is in the spin_lock already.
+/**
+ * write_seqlock() - start a seqlock_t write side critical section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_seqlock opens a write side critical section for the given
+ * seqlock_t.  It also implicitly acquires the spinlock_t embedded inside
+ * that sequential lock. All seqlock_t write side sections are thus
+ * automatically serialized and non-preemptible.
+ *
+ * Context: if the seqlock_t read section, or other write side critical
+ * sections, can be invoked from hardirq or softirq contexts, use the
+ * _irqsave or _bh variants of this function instead.
  */
 static inline void write_seqlock(seqlock_t *sl)
 {
@@ -489,30 +577,66 @@ static inline void write_seqlock(seqlock_t *sl)
 	write_seqcount_begin(&sl->seqcount);
 }
 
+/**
+ * write_sequnlock() - end a seqlock_t write side critical section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_sequnlock closes the (serialized and non-preemptible) write side
+ * critical section of given seqlock_t.
+ */
 static inline void write_sequnlock(seqlock_t *sl)
 {
 	write_seqcount_end(&sl->seqcount);
 	spin_unlock(&sl->lock);
 }
 
+/**
+ * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * _bh variant of write_seqlock(). Use only if the read side section, or
+ * other write side sections, can be invoked from softirq contexts.
+ */
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
 	write_seqcount_begin(&sl->seqcount);
 }
 
+/**
+ * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_sequnlock_bh closes the serialized, non-preemptible, and
+ * softirqs-disabled, seqlock_t write side critical section opened with
+ * write_seqlock_bh().
+ */
 static inline void write_sequnlock_bh(seqlock_t *sl)
 {
 	write_seqcount_end(&sl->seqcount);
 	spin_unlock_bh(&sl->lock);
 }
 
+/**
+ * write_seqlock_irq() - start a non-interruptible seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * _irq variant of write_seqlock(). Use only if the read side section, or
+ * other write sections, can be invoked from hardirq contexts.
+ */
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
 	write_seqcount_begin(&sl->seqcount);
 }
 
+/**
+ * write_sequnlock_irq() - end a non-interruptible seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_sequnlock_irq closes the serialized and non-interruptible
+ * seqlock_t write side section opened with write_seqlock_irq().
+ */
 static inline void write_sequnlock_irq(seqlock_t *sl)
 {
 	write_seqcount_end(&sl->seqcount);
@@ -528,9 +652,28 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 	return flags;
 }
 
+/**
+ * write_seqlock_irqsave() - start a non-interruptible seqlock_t write
+ *                           section
+ * @lock:  Pointer to seqlock_t
+ * @flags: Stack-allocated storage for saving caller's local interrupt
+ *         state, to be passed to write_sequnlock_irqrestore().
+ *
+ * _irqsave variant of write_seqlock(). Use it only if the read side
+ * section, or other write sections, can be invoked from hardirq context.
+ */
 #define write_seqlock_irqsave(lock, flags)				\
 	do { flags = __write_seqlock_irqsave(lock); } while (0)
 
+/**
+ * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write
+ *                                section
+ * @sl:    Pointer to seqlock_t
+ * @flags: Caller's saved interrupt state, from write_seqlock_irqsave()
+ *
+ * write_sequnlock_irqrestore closes the serialized and non-interruptible
+ * seqlock_t write section previously opened with write_seqlock_irqsave().
+ */
 static inline void
 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 {
@@ -538,36 +681,79 @@ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 	spin_unlock_irqrestore(&sl->lock, flags);
 }
 
-/*
- * A locking reader exclusively locks out other writers and locking readers,
- * but doesn't update the sequence number. Acts like a normal spin_lock/unlock.
- * Don't need preempt_disable() because that is in the spin_lock already.
+/**
+ * read_seqlock_excl() - begin a seqlock_t locking reader section
+ * @sl: Pointer to seqlock_t
+ *
+ * read_seqlock_excl opens a seqlock_t locking reader critical section.  A
+ * locking reader exclusively locks out *both* other writers *and* other
+ * locking readers, but it does not update the embedded sequence number.
+ *
+ * Locking readers act like a normal spin_lock()/spin_unlock().
+ *
+ * Context: if the seqlock_t write section, *or other read sections*, can
+ * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
+ * variant of this function instead.
+ *
+ * The opened read section must be closed with read_sequnlock_excl().
  */
 static inline void read_seqlock_excl(seqlock_t *sl)
 {
 	spin_lock(&sl->lock);
 }
 
+/**
+ * read_sequnlock_excl() - end a seqlock_t locking reader critical section
+ * @sl: Pointer to seqlock_t
+ */
 static inline void read_sequnlock_excl(seqlock_t *sl)
 {
 	spin_unlock(&sl->lock);
 }
 
+/**
+ * read_seqlock_excl_bh() - start a seqlock_t locking reader section with
+ *			    softirqs disabled
+ * @sl: Pointer to seqlock_t
+ *
+ * _bh variant of read_seqlock_excl(). Use this variant only if the
+ * seqlock_t write side section, *or other read sections*, can be invoked
+ * from softirq contexts.
+ */
 static inline void read_seqlock_excl_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
 }
 
+/**
+ * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking
+ *			      reader section
+ * @sl: Pointer to seqlock_t
+ */
 static inline void read_sequnlock_excl_bh(seqlock_t *sl)
 {
 	spin_unlock_bh(&sl->lock);
 }
 
+/**
+ * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking
+ *			     reader section
+ * @sl: Pointer to seqlock_t
+ *
+ * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t
+ * write side section, *or other read sections*, can be invoked from a
+ * hardirq context.
+ */
 static inline void read_seqlock_excl_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
 }
 
+/**
+ * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t
+ *                             locking reader section
+ * @sl: Pointer to seqlock_t
+ */
 static inline void read_sequnlock_excl_irq(seqlock_t *sl)
 {
 	spin_unlock_irq(&sl->lock);
@@ -581,9 +767,26 @@ static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
 	return flags;
 }
 
+/**
+ * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t
+ *				 locking reader section
+ * @lock:  Pointer to seqlock_t
+ * @flags: Stack-allocated storage for saving caller's local interrupt
+ *         state, to be passed to read_sequnlock_excl_irqrestore().
+ *
+ * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t
+ * write side section, *or other read sections*, can be invoked from a
+ * hardirq context.
+ */
 #define read_seqlock_excl_irqsave(lock, flags)				\
 	do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
 
+/**
+ * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t
+ *				      locking reader section
+ * @sl:    Pointer to seqlock_t
+ * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave()
+ */
 static inline void
 read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
 {
@@ -591,14 +794,35 @@ read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
 }
 
 /**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
- *
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
+ * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader
+ * @lock: Pointer to seqlock_t
+ * @seq : Marker and return parameter. If the passed value is even, the
+ * reader will become a *lockless* seqlock_t reader as in read_seqbegin().
+ * If the passed value is odd, the reader will become a *locking* reader
+ * as in read_seqlock_excl().  In the first call to this function, the
+ * caller *must* initialize and pass an even value to @seq; this way, a
+ * lockless read can be optimistically tried first.
+ *
+ * read_seqbegin_or_lock is an API designed to optimistically try a normal
+ * lockless seqlock_t read section first.  If an odd counter is found, the
+ * lockless read trial has failed, and the next read iteration transforms
+ * itself into a full seqlock_t locking reader.
+ *
+ * This is typically used to avoid seqlock_t lockless readers starvation
+ * (too much retry loops) in the case of a sharp spike in write side
+ * activity.
+ *
+ * Context: if the seqlock_t write section, *or other read sections*, can
+ * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
+ * variant of this function instead.
+ *
+ * Check Documentation/locking/seqlock.rst for template example code.
+ *
+ * Return: the encountered sequence counter value, through the @seq
+ * parameter, which is overloaded as a return parameter. This returned
+ * value must be checked with need_seqretry(). If the read section need to
+ * be retried, this returned value must also be passed as the @seq
+ * parameter of the next read_seqbegin_or_lock() iteration.
  */
 static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
 {
@@ -608,17 +832,52 @@ static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
 		read_seqlock_excl(lock);
 }
 
+/**
+ * need_seqretry() - validate seqlock_t "locking or lockless" read section
+ * @lock: Pointer to seqlock_t
+ * @seq: sequence count, from read_seqbegin_or_lock()
+ *
+ * Return: true if a read section retry is required, false otherwise
+ */
 static inline int need_seqretry(seqlock_t *lock, int seq)
 {
 	return !(seq & 1) && read_seqretry(lock, seq);
 }
 
+/**
+ * done_seqretry() - end seqlock_t "locking or lockless" reader section
+ * @lock: Pointer to seqlock_t
+ * @seq: count, from read_seqbegin_or_lock()
+ *
+ * done_seqretry finishes the seqlock_t read side critical section started
+ * with read_seqbegin_or_lock() and validated by need_seqretry().
+ */
 static inline void done_seqretry(seqlock_t *lock, int seq)
 {
 	if (seq & 1)
 		read_sequnlock_excl(lock);
 }
 
+/**
+ * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or
+ *                                   a non-interruptible locking reader
+ * @lock: Pointer to seqlock_t
+ * @seq:  Marker and return parameter. Check read_seqbegin_or_lock().
+ *
+ * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if
+ * the seqlock_t write section, *or other read sections*, can be invoked
+ * from hardirq context.
+ *
+ * Note: Interrupts will be disabled only for "locking reader" mode.
+ *
+ * Return:
+ *
+ *   1. The saved local interrupts state in case of a locking reader, to
+ *      be passed to done_seqretry_irqrestore().
+ *
+ *   2. The encountered sequence counter value, returned through @seq
+ *      overloaded as a return parameter. Check read_seqbegin_or_lock().
+ */
 static inline unsigned long
 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
 {
@@ -632,6 +891,18 @@ read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
 	return flags;
 }
 
+/**
+ * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a
+ *				non-interruptible locking reader section
+ * @lock:  Pointer to seqlock_t
+ * @seq:   Count, from read_seqbegin_or_lock_irqsave()
+ * @flags: Caller's saved local interrupt state in case of a locking
+ *	   reader, also from read_seqbegin_or_lock_irqsave()
+ *
+ * This is the _irqrestore variant of done_seqretry(). The read section
+ * must've been opened with read_seqbegin_or_lock_irqsave(), and validated
+ * by need_seqretry().
+ */
 static inline void
 done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
 {

^ permalink raw reply	[relevance 28%]

* [tip: locking/core] dma-buf: Remove custom seqcount lockdep class key
  2020-07-20 15:55 91%   ` [PATCH v4 11/24] dma-buf: Remove custom seqcount lockdep class key Ahmed S. Darwish
@ 2020-07-29 14:33 77%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ahmed S. Darwish, Peter Zijlstra (Intel),
	Sebastian Andrzej Siewior, Daniel Vetter, x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     318ce71f3e3ae4108c1665f3860afa8a2a4c9f02
Gitweb:        https://git.kernel.org/tip/318ce71f3e3ae4108c1665f3860afa8a2a4c9f02
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:17 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:25 +02:00

dma-buf: Remove custom seqcount lockdep class key

Commit 3c3b177a9369 ("reservation: add support for read-only access
using rcu") introduced a sequence counter to manage updates to
reservations. Back then, the reservation object initializer
reservation_object_init() was always inlined.

Having the sequence counter initialization inlined meant that each of
the call sites would have a different lockdep class key, which would've
broken lockdep's deadlock detection. The aforementioned commit thus
introduced, and exported, a custom seqcount lockdep class key and name.

The commit 8735f16803f00 ("dma-buf: cleanup reservation_object_init...")
transformed the reservation object initializer to a normal non-inlined C
function. seqcount_init(), which automatically defines the seqcount
lockdep class key and must be called non-inlined, can now be safely used.

Remove the seqcount custom lockdep class key, name, and export. Use
seqcount_init() inside the dma reservation object initializer.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://lkml.kernel.org/r/20200720155530.1173732-12-a.darwish@linutronix.de
---
 drivers/dma-buf/dma-resv.c |  9 +--------
 include/linux/dma-resv.h   |  2 --
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index b45f851..15efa0c 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -51,12 +51,6 @@
 DEFINE_WD_CLASS(reservation_ww_class);
 EXPORT_SYMBOL(reservation_ww_class);
 
-struct lock_class_key reservation_seqcount_class;
-EXPORT_SYMBOL(reservation_seqcount_class);
-
-const char reservation_seqcount_string[] = "reservation_seqcount";
-EXPORT_SYMBOL(reservation_seqcount_string);
-
 /**
  * dma_resv_list_alloc - allocate fence list
  * @shared_max: number of fences we need space for
@@ -135,9 +129,8 @@ subsys_initcall(dma_resv_lockdep);
 void dma_resv_init(struct dma_resv *obj)
 {
 	ww_mutex_init(&obj->lock, &reservation_ww_class);
+	seqcount_init(&obj->seq);
 
-	__seqcount_init(&obj->seq, reservation_seqcount_string,
-			&reservation_seqcount_class);
 	RCU_INIT_POINTER(obj->fence, NULL);
 	RCU_INIT_POINTER(obj->fence_excl, NULL);
 }
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index ee50d10..a6538ae 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -46,8 +46,6 @@
 #include <linux/rcupdate.h>
 
 extern struct ww_class reservation_ww_class;
-extern struct lock_class_key reservation_seqcount_class;
-extern const char reservation_seqcount_string[];
 
 /**
  * struct dma_resv_list - a list of shared fences

^ permalink raw reply	[relevance 77%]

* [tip: locking/core] dma-buf: Use sequence counter with associated wound/wait mutex
  2020-07-20 15:55 82%   ` [PATCH v4 12/24] dma-buf: Use sequence counter with associated wound/wait mutex Ahmed S. Darwish
@ 2020-07-29 14:33 69%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), Daniel Vetter, x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     cd29f22019ec4ab998d2e1e8c831c7c42db4aa7d
Gitweb:        https://git.kernel.org/tip/cd29f22019ec4ab998d2e1e8c831c7c42db4aa7d
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:18 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:25 +02:00

dma-buf: Use sequence counter with associated wound/wait mutex

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. If the serialization primitive is
not disabling preemption implicitly, preemption has to be explicitly
disabled before entering the sequence counter write side critical
section.

The dma-buf reservation subsystem uses plain sequence counters to manage
updates to reservations. Writer serialization is accomplished through a
wound/wait mutex.

Acquiring a wound/wait mutex does not disable preemption, so this needs
to be done manually before and after the write side critical section.

Use the newly-added seqcount_ww_mutex_t instead:

  - It associates the ww_mutex with the sequence count, which enables
    lockdep to validate that the write side critical section is properly
    serialized.

  - It removes the need to explicitly add preempt_disable/enable()
    around the write side critical section because the write_begin/end()
    functions for this new data type automatically do this.

If lockdep is disabled this ww_mutex lock association is compiled out
and has neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://lkml.kernel.org/r/20200720155530.1173732-13-a.darwish@linutronix.de
---
 drivers/dma-buf/dma-resv.c                       | 8 +-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 --
 include/linux/dma-resv.h                         | 2 +-
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 15efa0c..a763135 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -129,7 +129,7 @@ subsys_initcall(dma_resv_lockdep);
 void dma_resv_init(struct dma_resv *obj)
 {
 	ww_mutex_init(&obj->lock, &reservation_ww_class);
-	seqcount_init(&obj->seq);
+	seqcount_ww_mutex_init(&obj->seq, &obj->lock);
 
 	RCU_INIT_POINTER(obj->fence, NULL);
 	RCU_INIT_POINTER(obj->fence_excl, NULL);
@@ -260,7 +260,6 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
 	fobj = dma_resv_get_list(obj);
 	count = fobj->shared_count;
 
-	preempt_disable();
 	write_seqcount_begin(&obj->seq);
 
 	for (i = 0; i < count; ++i) {
@@ -282,7 +281,6 @@ replace:
 	smp_store_mb(fobj->shared_count, count);
 
 	write_seqcount_end(&obj->seq);
-	preempt_enable();
 	dma_fence_put(old);
 }
 EXPORT_SYMBOL(dma_resv_add_shared_fence);
@@ -309,14 +307,12 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence)
 	if (fence)
 		dma_fence_get(fence);
 
-	preempt_disable();
 	write_seqcount_begin(&obj->seq);
 	/* write_seqcount_begin provides the necessary memory barrier */
 	RCU_INIT_POINTER(obj->fence_excl, fence);
 	if (old)
 		old->shared_count = 0;
 	write_seqcount_end(&obj->seq);
-	preempt_enable();
 
 	/* inplace update, no shared fences */
 	while (i--)
@@ -394,13 +390,11 @@ retry:
 	src_list = dma_resv_get_list(dst);
 	old = dma_resv_get_excl(dst);
 
-	preempt_disable();
 	write_seqcount_begin(&dst->seq);
 	/* write_seqcount_begin provides the necessary memory barrier */
 	RCU_INIT_POINTER(dst->fence_excl, new);
 	RCU_INIT_POINTER(dst->fence, dst_list);
 	write_seqcount_end(&dst->seq);
-	preempt_enable();
 
 	dma_resv_list_free(src_list);
 	dma_fence_put(old);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b91b517..ff4b583 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -258,11 +258,9 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
 	new->shared_count = k;
 
 	/* Install the new fence list, seqcount provides the barriers */
-	preempt_disable();
 	write_seqcount_begin(&resv->seq);
 	RCU_INIT_POINTER(resv->fence, new);
 	write_seqcount_end(&resv->seq);
-	preempt_enable();
 
 	/* Drop the references to the removed fences or move them to ef_list */
 	for (i = j, k = 0; i < old->shared_count; ++i) {
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index a6538ae..d44a77e 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -69,7 +69,7 @@ struct dma_resv_list {
  */
 struct dma_resv {
 	struct ww_mutex lock;
-	seqcount_t seq;
+	seqcount_ww_mutex_t seq;
 
 	struct dma_fence __rcu *fence_excl;
 	struct dma_resv_list __rcu *fence;

^ permalink raw reply	[relevance 69%]

* [tip: locking/core] seqlock: Align multi-line macros newline escapes at 72 columns
  2020-07-20 15:55 96%   ` [PATCH v4 10/24] seqlock: Align multi-line macros newline escapes at 72 columns Ahmed S. Darwish
@ 2020-07-29 14:33 82%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     ec8702da570ebb59f38471007bf71359c51b027b
Gitweb:        https://git.kernel.org/tip/ec8702da570ebb59f38471007bf71359c51b027b
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:16 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:25 +02:00

seqlock: Align multi-line macros newline escapes at 72 columns

Parent commit, "seqlock: Extend seqcount API with associated locks",
introduced a big number of multi-line macros that are newline-escaped
at 72 columns.

For overall cohesion, align the earlier-existing macros similarly.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-11-a.darwish@linutronix.de
---
 include/linux/seqlock.h | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 8c16a49..b487299 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -80,17 +80,18 @@ static inline void __seqcount_init(seqcount_t *s, const char *name,
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define SEQCOUNT_DEP_MAP_INIT(lockname) \
-		.dep_map = { .name = #lockname } \
+
+# define SEQCOUNT_DEP_MAP_INIT(lockname)				\
+		.dep_map = { .name = #lockname }
 
 /**
  * seqcount_init() - runtime initializer for seqcount_t
  * @s: Pointer to the seqcount_t instance
  */
-# define seqcount_init(s)				\
-	do {						\
-		static struct lock_class_key __key;	\
-		__seqcount_init((s), #s, &__key);	\
+# define seqcount_init(s)						\
+	do {								\
+		static struct lock_class_key __key;			\
+		__seqcount_init((s), #s, &__key);			\
 	} while (0)
 
 static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
@@ -842,20 +843,20 @@ typedef struct {
 	spinlock_t lock;
 } seqlock_t;
 
-#define __SEQLOCK_UNLOCKED(lockname)			\
-	{						\
-		.seqcount = SEQCNT_ZERO(lockname),	\
-		.lock =	__SPIN_LOCK_UNLOCKED(lockname)	\
+#define __SEQLOCK_UNLOCKED(lockname)					\
+	{								\
+		.seqcount = SEQCNT_ZERO(lockname),			\
+		.lock =	__SPIN_LOCK_UNLOCKED(lockname)			\
 	}
 
 /**
  * seqlock_init() - dynamic initializer for seqlock_t
  * @sl: Pointer to the seqlock_t instance
  */
-#define seqlock_init(sl)				\
-	do {						\
-		seqcount_init(&(sl)->seqcount);		\
-		spin_lock_init(&(sl)->lock);		\
+#define seqlock_init(sl)						\
+	do {								\
+		seqcount_init(&(sl)->seqcount);				\
+		spin_lock_init(&(sl)->lock);				\
 	} while (0)
 
 /**

^ permalink raw reply	[relevance 82%]

* [tip: locking/core] netfilter: conntrack: Use sequence counter with associated spinlock
  2020-07-20 15:55 91%   ` [PATCH v4 14/24] netfilter: conntrack: " Ahmed S. Darwish
@ 2020-07-29 14:33 80%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     8201d923f492703a7d6c980cff3034759a452b86
Gitweb:        https://git.kernel.org/tip/8201d923f492703a7d6c980cff3034759a452b86
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:20 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:26 +02:00

netfilter: conntrack: Use sequence counter with associated spinlock

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-15-a.darwish@linutronix.de
---
 include/net/netfilter/nf_conntrack.h | 2 +-
 net/netfilter/nf_conntrack_core.c    | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 90690e3..ea4e201 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -286,7 +286,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize);
 
 extern struct hlist_nulls_head *nf_conntrack_hash;
 extern unsigned int nf_conntrack_htable_size;
-extern seqcount_t nf_conntrack_generation;
+extern seqcount_spinlock_t nf_conntrack_generation;
 extern unsigned int nf_conntrack_max;
 
 /* must be called with rcu read lock held */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f33d72c..b597b5b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -180,7 +180,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 
 unsigned int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
-seqcount_t nf_conntrack_generation __read_mostly;
+seqcount_spinlock_t nf_conntrack_generation __read_mostly;
 static unsigned int nf_conntrack_hash_rnd __read_mostly;
 
 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
@@ -2600,7 +2600,8 @@ int nf_conntrack_init_start(void)
 	/* struct nf_ct_ext uses u8 to store offsets/size */
 	BUILD_BUG_ON(total_extension_size() > 255u);
 
-	seqcount_init(&nf_conntrack_generation);
+	seqcount_spinlock_init(&nf_conntrack_generation,
+			       &nf_conntrack_locks_all_lock);
 
 	for (i = 0; i < CONNTRACK_LOCKS; i++)
 		spin_lock_init(&nf_conntrack_locks[i]);

^ permalink raw reply	[relevance 80%]

* [tip: locking/core] xfrm: policy: Use sequence counters with associated lock
  2020-07-20 15:55 89%   ` [PATCH v4 16/24] xfrm: policy: Use sequence counters with associated lock Ahmed S. Darwish
@ 2020-07-29 14:33 77%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     77cc278f7b202e4f16f8596837219d02cb090b96
Gitweb:        https://git.kernel.org/tip/77cc278f7b202e4f16f8596837219d02cb090b96
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:22 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:27 +02:00

xfrm: policy: Use sequence counters with associated lock

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. If the serialization primitive is
not disabling preemption implicitly, preemption has to be explicitly
disabled before entering the sequence counter write side critical
section.

A plain seqcount_t does not contain the information of which lock must
be held when entering a write side critical section.

Use the new seqcount_spinlock_t and seqcount_mutex_t data types instead,
which allow to associate a lock with the sequence counter. This enables
lockdep to verify that the lock used for writer serialization is held
when the write side critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-17-a.darwish@linutronix.de
---
 net/xfrm/xfrm_policy.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 564aa64..732a940 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -122,7 +122,7 @@ struct xfrm_pol_inexact_bin {
 	/* list containing '*:*' policies */
 	struct hlist_head hhead;
 
-	seqcount_t count;
+	seqcount_spinlock_t count;
 	/* tree sorted by daddr/prefix */
 	struct rb_root root_d;
 
@@ -155,7 +155,7 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
 
 static struct kmem_cache *xfrm_dst_cache __ro_after_init;
-static __read_mostly seqcount_t xfrm_policy_hash_generation;
+static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation;
 
 static struct rhashtable xfrm_policy_inexact_table;
 static const struct rhashtable_params xfrm_pol_inexact_params;
@@ -719,7 +719,7 @@ xfrm_policy_inexact_alloc_bin(const struct xfrm_policy *pol, u8 dir)
 	INIT_HLIST_HEAD(&bin->hhead);
 	bin->root_d = RB_ROOT;
 	bin->root_s = RB_ROOT;
-	seqcount_init(&bin->count);
+	seqcount_spinlock_init(&bin->count, &net->xfrm.xfrm_policy_lock);
 
 	prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table,
 						&bin->k, &bin->head,
@@ -1906,7 +1906,7 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
 
 static struct xfrm_pol_inexact_node *
 xfrm_policy_lookup_inexact_addr(const struct rb_root *r,
-				seqcount_t *count,
+				seqcount_spinlock_t *count,
 				const xfrm_address_t *addr, u16 family)
 {
 	const struct rb_node *parent;
@@ -4153,7 +4153,7 @@ void __init xfrm_init(void)
 {
 	register_pernet_subsys(&xfrm_net_ops);
 	xfrm_dev_init();
-	seqcount_init(&xfrm_policy_hash_generation);
+	seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex);
 	xfrm_input_init();
 
 #ifdef CONFIG_INET_ESPINTCP

^ permalink raw reply	[relevance 77%]

* [tip: locking/core] sched: tasks: Use sequence counter with associated spinlock
  2020-07-20 15:55 92%   ` [PATCH v4 13/24] sched: tasks: Use sequence counter with associated spinlock Ahmed S. Darwish
@ 2020-07-29 14:33 80%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     b75058614fdd3140074a640b514f6a0b4d485a2d
Gitweb:        https://git.kernel.org/tip/b75058614fdd3140074a640b514f6a0b4d485a2d
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:19 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:26 +02:00

sched: tasks: Use sequence counter with associated spinlock

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-14-a.darwish@linutronix.de
---
 include/linux/sched.h | 2 +-
 init/init_task.c      | 3 ++-
 kernel/fork.c         | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d1de02..9a9d826 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1050,7 +1050,7 @@ struct task_struct {
 	/* Protected by ->alloc_lock: */
 	nodemask_t			mems_allowed;
 	/* Seqence number to catch updates: */
-	seqcount_t			mems_allowed_seq;
+	seqcount_spinlock_t		mems_allowed_seq;
 	int				cpuset_mem_spread_rotor;
 	int				cpuset_slab_spread_rotor;
 #endif
diff --git a/init/init_task.c b/init/init_task.c
index 15089d1..94fe3ba 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -154,7 +154,8 @@ struct task_struct init_task
 	.trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
 #endif
 #ifdef CONFIG_CPUSETS
-	.mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq),
+	.mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq,
+						 &init_task.alloc_lock),
 #endif
 #ifdef CONFIG_RT_MUTEXES
 	.pi_waiters	= RB_ROOT_CACHED,
diff --git a/kernel/fork.c b/kernel/fork.c
index 70d9d0a..fc72f09 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2032,7 +2032,7 @@ static __latent_entropy struct task_struct *copy_process(
 #ifdef CONFIG_CPUSETS
 	p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
 	p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
-	seqcount_init(&p->mems_allowed_seq);
+	seqcount_spinlock_init(&p->mems_allowed_seq, &p->alloc_lock);
 #endif
 #ifdef CONFIG_TRACE_IRQFLAGS
 	p->irq_events = 0;

^ permalink raw reply	[relevance 80%]

* [tip: locking/core] vfs: Use sequence counter with associated spinlock
  2020-07-20 15:55 88%   ` [PATCH v4 18/24] vfs: Use sequence counter with associated spinlock Ahmed S. Darwish
@ 2020-07-29 14:33 76%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     26475371976c69489d3a8e6c8bbf35afbbc25055
Gitweb:        https://git.kernel.org/tip/26475371976c69489d3a8e6c8bbf35afbbc25055
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:24 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:27 +02:00

vfs: Use sequence counter with associated spinlock

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-19-a.darwish@linutronix.de
---
 fs/dcache.c               | 2 +-
 fs/fs_struct.c            | 4 ++--
 include/linux/dcache.h    | 2 +-
 include/linux/fs_struct.h | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 361ea7a..ea04858 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1746,7 +1746,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 	dentry->d_lockref.count = 1;
 	dentry->d_flags = 0;
 	spin_lock_init(&dentry->d_lock);
-	seqcount_init(&dentry->d_seq);
+	seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock);
 	dentry->d_inode = NULL;
 	dentry->d_parent = dentry;
 	dentry->d_sb = sb;
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index ca639ed..04b3f5b 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -117,7 +117,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 		fs->users = 1;
 		fs->in_exec = 0;
 		spin_lock_init(&fs->lock);
-		seqcount_init(&fs->seq);
+		seqcount_spinlock_init(&fs->seq, &fs->lock);
 		fs->umask = old->umask;
 
 		spin_lock(&old->lock);
@@ -163,6 +163,6 @@ EXPORT_SYMBOL(current_umask);
 struct fs_struct init_fs = {
 	.users		= 1,
 	.lock		= __SPIN_LOCK_UNLOCKED(init_fs.lock),
-	.seq		= SEQCNT_ZERO(init_fs.seq),
+	.seq		= SEQCNT_SPINLOCK_ZERO(init_fs.seq, &init_fs.lock),
 	.umask		= 0022,
 };
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index a81f0c3..65d975b 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -89,7 +89,7 @@ extern struct dentry_stat_t dentry_stat;
 struct dentry {
 	/* RCU lookup touched fields */
 	unsigned int d_flags;		/* protected by d_lock */
-	seqcount_t d_seq;		/* per dentry seqlock */
+	seqcount_spinlock_t d_seq;	/* per dentry seqlock */
 	struct hlist_bl_node d_hash;	/* lookup hash list */
 	struct dentry *d_parent;	/* parent directory */
 	struct qstr d_name;
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index cf1015a..783b48d 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -9,7 +9,7 @@
 struct fs_struct {
 	int users;
 	spinlock_t lock;
-	seqcount_t seq;
+	seqcount_spinlock_t seq;
 	int umask;
 	int in_exec;
 	struct path root, pwd;

^ permalink raw reply	[relevance 76%]

* [tip: locking/core] iocost: Use sequence counter with associated spinlock
  2020-07-20 15:55 94%   ` [PATCH v4 20/24] iocost: " Ahmed S. Darwish
@ 2020-07-29 14:33 80%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), Daniel Wagner, x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     67b7b641ca69cafb467f7560316b09b8ff0fa5c9
Gitweb:        https://git.kernel.org/tip/67b7b641ca69cafb467f7560316b09b8ff0fa5c9
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:26 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:28 +02:00

iocost: Use sequence counter with associated spinlock

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Wagner <dwagner@suse.de>
Link: https://lkml.kernel.org/r/20200720155530.1173732-21-a.darwish@linutronix.de
---
 block/blk-iocost.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 8ac4aad..8e940c2 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -406,7 +406,7 @@ struct ioc {
 	enum ioc_running		running;
 	atomic64_t			vtime_rate;
 
-	seqcount_t			period_seqcount;
+	seqcount_spinlock_t		period_seqcount;
 	u32				period_at;	/* wallclock starttime */
 	u64				period_at_vtime; /* vtime starttime */
 
@@ -873,7 +873,6 @@ static void ioc_now(struct ioc *ioc, struct ioc_now *now)
 
 static void ioc_start_period(struct ioc *ioc, struct ioc_now *now)
 {
-	lockdep_assert_held(&ioc->lock);
 	WARN_ON_ONCE(ioc->running != IOC_RUNNING);
 
 	write_seqcount_begin(&ioc->period_seqcount);
@@ -2001,7 +2000,7 @@ static int blk_iocost_init(struct request_queue *q)
 
 	ioc->running = IOC_IDLE;
 	atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC);
-	seqcount_init(&ioc->period_seqcount);
+	seqcount_spinlock_init(&ioc->period_seqcount, &ioc->lock);
 	ioc->period_at = ktime_to_us(ktime_get());
 	atomic64_set(&ioc->cur_period, 0);
 	atomic_set(&ioc->hweight_gen, 0);

^ permalink raw reply	[relevance 80%]

* [tip: locking/core] kvm/eventfd: Use sequence counter with associated spinlock
  2020-07-20 15:55 94%   ` [PATCH v4 23/24] kvm/eventfd: " Ahmed S. Darwish
@ 2020-07-29 14:33 81%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), Paolo Bonzini, x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     5c73b9a2b1b4ecc809a914aa64970157b3d8c936
Gitweb:        https://git.kernel.org/tip/5c73b9a2b1b4ecc809a914aa64970157b3d8c936
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:29 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:29 +02:00

kvm/eventfd: Use sequence counter with associated spinlock

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lkml.kernel.org/r/20200720155530.1173732-24-a.darwish@linutronix.de
---
 include/linux/kvm_irqfd.h | 2 +-
 virt/kvm/eventfd.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index dc1da02..dac047a 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -42,7 +42,7 @@ struct kvm_kernel_irqfd {
 	wait_queue_entry_t wait;
 	/* Update side is protected by irqfds.lock */
 	struct kvm_kernel_irq_routing_entry irq_entry;
-	seqcount_t irq_entry_sc;
+	seqcount_spinlock_t irq_entry_sc;
 	/* Used for level IRQ fast-path */
 	int gsi;
 	struct work_struct inject;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index ef7ed91..d6408bb 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -303,7 +303,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	INIT_LIST_HEAD(&irqfd->list);
 	INIT_WORK(&irqfd->inject, irqfd_inject);
 	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
-	seqcount_init(&irqfd->irq_entry_sc);
+	seqcount_spinlock_init(&irqfd->irq_entry_sc, &kvm->irqfds.lock);
 
 	f = fdget(args->fd);
 	if (!f.file) {

^ permalink raw reply	[relevance 81%]

* [tip: locking/core] raid5: Use sequence counter with associated spinlock
  2020-07-20 15:55 94%   ` [PATCH v4 19/24] raid5: " Ahmed S. Darwish
@ 2020-07-29 14:33 80%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), Song Liu, x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     0a87b25ff2eb6169403c88b0d5f3c97bdaa3c930
Gitweb:        https://git.kernel.org/tip/0a87b25ff2eb6169403c88b0d5f3c97bdaa3c930
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:25 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:27 +02:00

raid5: Use sequence counter with associated spinlock

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_spinlock_t data type, which allows to associate a
spinlock with the sequence counter. This enables lockdep to verify that
the spinlock used for writer serialization is held when the write side
critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Song Liu <song@kernel.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-20-a.darwish@linutronix.de
---
 drivers/md/raid5.c | 2 +-
 drivers/md/raid5.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ab8067f..892aefe 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6935,7 +6935,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 	} else
 		goto abort;
 	spin_lock_init(&conf->device_lock);
-	seqcount_init(&conf->gen_lock);
+	seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock);
 	mutex_init(&conf->cache_size_mutex);
 	init_waitqueue_head(&conf->wait_for_quiescent);
 	init_waitqueue_head(&conf->wait_for_stripe);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index f90e070..a2c9e9e 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -589,7 +589,7 @@ struct r5conf {
 	int			prev_chunk_sectors;
 	int			prev_algo;
 	short			generation; /* increments with every reshape */
-	seqcount_t		gen_lock;	/* lock against generation changes */
+	seqcount_spinlock_t	gen_lock;	/* lock against generation changes */
 	unsigned long		reshape_checkpoint; /* Time we last updated
 						     * metadata */
 	long long		min_offset_diff; /* minimum difference between

^ permalink raw reply	[relevance 80%]

* [tip: locking/core] hrtimer: Use sequence counter with associated raw spinlock
  2020-07-20 15:55 93%   ` [PATCH v4 24/24] hrtimer: Use sequence counter with associated raw spinlock Ahmed S. Darwish
@ 2020-07-29 14:33 79%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-07-29 14:33 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     af5a06b582ec3d7b0160b4faaa65f73d8dcf989f
Gitweb:        https://git.kernel.org/tip/af5a06b582ec3d7b0160b4faaa65f73d8dcf989f
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Mon, 20 Jul 2020 17:55:30 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 29 Jul 2020 16:14:29 +02:00

hrtimer: Use sequence counter with associated raw spinlock

A sequence counter write side critical section must be protected by some
form of locking to serialize writers. A plain seqcount_t does not
contain the information of which lock must be held when entering a write
side critical section.

Use the new seqcount_raw_spinlock_t data type, which allows to associate
a raw spinlock with the sequence counter. This enables lockdep to verify
that the raw spinlock used for writer serialization is held when the
write side critical section is entered.

If lockdep is disabled this lock association is compiled out and has
neither storage size nor runtime overhead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200720155530.1173732-25-a.darwish@linutronix.de
---
 include/linux/hrtimer.h |  2 +-
 kernel/time/hrtimer.c   | 13 ++++++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 15c8ac3..25993b8 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -159,7 +159,7 @@ struct hrtimer_clock_base {
 	struct hrtimer_cpu_base	*cpu_base;
 	unsigned int		index;
 	clockid_t		clockid;
-	seqcount_t		seq;
+	seqcount_raw_spinlock_t	seq;
 	struct hrtimer		*running;
 	struct timerqueue_head	active;
 	ktime_t			(*get_time)(void);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index d89da1c..c403851 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -135,7 +135,11 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
  * timer->base->cpu_base
  */
 static struct hrtimer_cpu_base migration_cpu_base = {
-	.clock_base = { { .cpu_base = &migration_cpu_base, }, },
+	.clock_base = { {
+		.cpu_base = &migration_cpu_base,
+		.seq      = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq,
+						     &migration_cpu_base.lock),
+	}, },
 };
 
 #define migration_base	migration_cpu_base.clock_base[0]
@@ -1998,8 +2002,11 @@ int hrtimers_prepare_cpu(unsigned int cpu)
 	int i;
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-		cpu_base->clock_base[i].cpu_base = cpu_base;
-		timerqueue_init_head(&cpu_base->clock_base[i].active);
+		struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i];
+
+		clock_b->cpu_base = cpu_base;
+		seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock);
+		timerqueue_init_head(&clock_b->active);
 	}
 
 	cpu_base->cpu = cpu;

^ permalink raw reply	[relevance 79%]

* Re: [PATCH 2/5] seqlock: Fold seqcount_LOCKNAME_t definition
  @ 2020-07-29 14:38 99%   ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-07-29 14:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, will, tglx, paulmck, bigeasy, rostedt, linux-kernel,
	corbet, davem, netdev, linux-doc, viro, linux-fsdevel

On Wed, Jul 29, 2020 at 03:52:51PM +0200, Peter Zijlstra wrote:
> Manual repetition is boring and error prone.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---

...

> +/**
> + * typedef seqcount_LOCKNAME_t - sequence counter with spinlock associated

                                                        ^ associated lock

> + * @seqcount:	The real sequence counter
> + * @lock:	Pointer to the associated spinlock
> + *

              ^ Pointer to the associated write serialization lock

> + * A plain sequence counter with external writer synchronization by a
> + * spinlock. The spinlock is associated to the sequence count in the
> + * static initializer or init function. This enables lockdep to validate
> + * that the write side critical section is properly serialized.

ditto, you forgot to change the associated spinlock language to generic
locks ;-)

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* Re: [PATCH 5/5] seqcount: More consistent seqprop names
  @ 2020-07-29 14:39 99%   ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-07-29 14:39 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, will, tglx, paulmck, bigeasy, rostedt, linux-kernel,
	corbet, davem, netdev, linux-doc, viro, linux-fsdevel

On Wed, Jul 29, 2020 at 03:52:54PM +0200, Peter Zijlstra wrote:
> Attempt uniformity and brevity.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---

Acked-by: Ahmed S. Darwish <a.darwish@linutronix.de>

^ permalink raw reply	[relevance 99%]

* Re: [PATCH v4 08/24] seqlock: lockdep assert non-preemptibility on seqcount_t write
  @ 2020-08-09 18:42 99%       ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-09 18:42 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Peter Zijlstra, Ingo Molnar, Will Deacon, Thomas Gleixner,
	Paul E. McKenney, Sebastian A. Siewior, Steven Rostedt, LKML

On Sat, Aug 08, 2020 at 04:21:22PM -0700, Guenter Roeck wrote:
> On Mon, Jul 20, 2020 at 05:55:14PM +0200, Ahmed S. Darwish wrote:
> > Preemption must be disabled before entering a sequence count write side
> > critical section.  Failing to do so, the seqcount read side can preempt
> > the write side section and spin for the entire scheduler tick.  If that
> > reader belongs to a real-time scheduling class, it can spin forever and
> > the kernel will livelock.
> >
> > Assert through lockdep that preemption is disabled for seqcount writers.
> >
>
> This patch is causing compile failures for various images (eg arm:allmodconfig,
> arm:imx_v6_v7_defconfig, mips:allmodconfig).
>
> In file included from arch/arm/include/asm/bug.h:60,
>                  from include/linux/bug.h:5,
>                  from include/linux/thread_info.h:12,
>                  from include/asm-generic/current.h:5,
>                  from ./arch/arm/include/generated/asm/current.h:1,
>                  from include/linux/sched.h:12,
>                  from arch/arm/kernel/asm-offsets.c:11:
> include/linux/seqlock.h: In function 'write_seqcount_begin_nested':
> include/asm-generic/percpu.h:31:40: error: implicit declaration of function 'raw_smp_processor_id'
>
> Reverting it fixes the problem. Is this being addressed ?
>

@Peter, I think let's revert this one for now?

Shall I also send you a version of:

    [PATCH v4 09/24] seqlock: Extend seqcount API with associated locks
    https://lkml.kernel.org/r/20200720155530.1173732-10-a.darwish@linutronix.de/

with the problematic "lockdep_assert_preemption_disabled()" removed?

> Guenter
>

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* [PATCH] Revert "seqlock: lockdep assert non-preemptibility on seqcount_t write"
  @ 2020-08-10  9:54 88% ` Ahmed S. Darwish
    0 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-08-10  9:54 UTC (permalink / raw)
  To: gregkh
  Cc: a.darwish, bigeasy, linux-kernel, linux, mingo, paulmck, peterz,
	rostedt, tglx, will

This reverts commit 859247d39fb008ea812e8f0c398a58a20c12899e.

Current implementation of lockdep_assert_preemption_disabled() uses
per-CPU variables, which was done to untangle the existing
seqlock.h<=>sched.h 'current->' task_struct circular dependency.

Using per-CPU variables did not fully untangle the dependency for
various non-x86 architectures though, resulting in multiple broken
builds. For the affected architectures, raw_smp_processor_id() led
back to 'current->', thus having the original seqlock.h<=>sched.h
dependency in full-effect.

For now, revert adding lockdep_assert_preemption_disabled() to
seqlock.h.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lkml.kernel.org/r/20200808232122.GA176509@roeck-us.net
Link: https://lkml.kernel.org/r/20200810085954.GA1591892@kroah.com
References: Commit a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu variables")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---

Notes:
    My apologies for the mess on this one :-(

 include/linux/seqlock.h | 29 ++++++-----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 54bc20496392..e885702d8b82 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -266,12 +266,6 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
-static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass)
-{
-	raw_write_seqcount_begin(s);
-	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
-}
-
 /**
  * write_seqcount_begin_nested() - start a seqcount_t write section with
  *                                 custom lockdep nesting level
@@ -282,19 +276,8 @@ static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass)
  */
 static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
 {
-	lockdep_assert_preemption_disabled();
-	__write_seqcount_begin_nested(s, subclass);
-}
-
-/*
- * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks.
- *
- * Use for internal seqlock.h code where it's known that preemption is
- * already disabled. For example, seqlock_t write side functions.
- */
-static inline void __write_seqcount_begin(seqcount_t *s)
-{
-	__write_seqcount_begin_nested(s, 0);
+	raw_write_seqcount_begin(s);
+	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
 }
 
 /**
@@ -592,7 +575,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 static inline void write_seqlock(seqlock_t *sl)
 {
 	spin_lock(&sl->lock);
-	__write_seqcount_begin(&sl->seqcount);
+	write_seqcount_begin(&sl->seqcount);
 }
 
 /**
@@ -618,7 +601,7 @@ static inline void write_sequnlock(seqlock_t *sl)
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
-	__write_seqcount_begin(&sl->seqcount);
+	write_seqcount_begin(&sl->seqcount);
 }
 
 /**
@@ -645,7 +628,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
-	__write_seqcount_begin(&sl->seqcount);
+	write_seqcount_begin(&sl->seqcount);
 }
 
 /**
@@ -666,7 +649,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sl->lock, flags);
-	__write_seqcount_begin(&sl->seqcount);
+	write_seqcount_begin(&sl->seqcount);
 	return flags;
 }
 
-- 
2.28.0


^ permalink raw reply	[relevance 88%]

* Re: [PATCH] Revert "seqlock: lockdep assert non-preemptibility on seqcount_t write"
  @ 2020-08-10 10:35 99%     ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-10 10:35 UTC (permalink / raw)
  To: Greg KH
  Cc: bigeasy, linux-kernel, linux, mingo, paulmck, peterz, rostedt,
	tglx, will

On Mon, Aug 10, 2020 at 12:05:02PM +0200, Greg KH wrote:
> On Mon, Aug 10, 2020 at 11:54:28AM +0200, Ahmed S. Darwish wrote:
> > This reverts commit 859247d39fb008ea812e8f0c398a58a20c12899e.
> >
> > Current implementation of lockdep_assert_preemption_disabled() uses
> > per-CPU variables, which was done to untangle the existing
> > seqlock.h<=>sched.h 'current->' task_struct circular dependency.
> >
> > Using per-CPU variables did not fully untangle the dependency for
> > various non-x86 architectures though, resulting in multiple broken
> > builds. For the affected architectures, raw_smp_processor_id() led
> > back to 'current->', thus having the original seqlock.h<=>sched.h
> > dependency in full-effect.
> >
> > For now, revert adding lockdep_assert_preemption_disabled() to
> > seqlock.h.
> >
> > Reported-by: Guenter Roeck <linux@roeck-us.net>
> > Link: https://lkml.kernel.org/r/20200808232122.GA176509@roeck-us.net
> > Link: https://lkml.kernel.org/r/20200810085954.GA1591892@kroah.com
> > References: Commit a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu variables")
> > Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
>
> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>
> Even after this, there are still some build errors on arm32, but I don't
> think they are due to this change:
>
> 	ERROR: modpost: "__aeabi_uldivmod" [drivers/net/ethernet/sfc/sfc.ko] undefined!
> 	ERROR: modpost: "__bad_udelay" [drivers/net/ethernet/aquantia/atlantic/atlantic.ko] undefined!
>

Yes, they are unrelated to the seqlock.h changes.

(I've locally reverted the whole series just to be sure, and the same
 linking errors as above were still there for an ARM allyesconfig.)

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* v5.9-rc1 commit reliably breaks pci nvme detection
@ 2020-08-17 13:50 97% Ahmed S. Darwish
    0 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-08-17 13:50 UTC (permalink / raw)
  To: Keith Busch, Jens Axboe, Sagi Grimberg
  Cc: Chaitanya Kulkarni, linux-nvme, linux-kernel

Hello,

Below v5.9-rc1 commit reliably breaks my boot on a Thinkpad e480
laptop. PCI nvme detection fails, and the kernel becomes not able
anymore to find the rootfs / parse "root=".

Bisecting v5.8=>v5.9-rc1 blames that commit. Reverting it *reliably*
fixes the problem and makes me able to boot v5.9-rc1.

Please advise.

commit 61f3b89630973037f67d8e25e5d26e80a51a7b37
Author: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date:   Wed Jun 17 10:05:13 2020 +0200

    nvme-pci: use unsigned for io queue depth
    
    The NVMe PCIe declares module parameter io_queue_depth as int. Change
    this to u16 as queue depth can never be negative. Now to reflect this
    update module parameter getter function from param_get_int() ->
    param_get_uint() and respective setter function with type of n changed
    from int to u16 with param_set_int() to param_set_ushort(). Finally
    update struct nvme_dev q_depth member to u16 and use u16 in min_t()
    when calculating dev->q_depth in the nvme_pci_enable() (since q_depth is
    now u16) and use unsigned int instead of int when calculating
    dev->tagset.queue_depth as target variable tagset->queue_depth is of type
    unsigned int in nvme_dev_add().

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 97%]

* Re: v5.9-rc1 commit reliably breaks pci nvme detection
  @ 2020-08-20 17:07 99%       ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-20 17:07 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Keith Busch, linux-nvme, Sagi Grimberg, Chaitanya Kulkarni, linux-kernel

On Thu, Aug 20, 2020 at 09:35:38AM -0600, Jens Axboe wrote:
> On 8/17/20 9:58 AM, Jens Axboe wrote:
> > On 8/17/20 8:56 AM, Keith Busch wrote:
> >> On Mon, Aug 17, 2020 at 03:50:11PM +0200, Ahmed S. Darwish wrote:
> >>> Hello,
> >>>
> >>> Below v5.9-rc1 commit reliably breaks my boot on a Thinkpad e480
> >>> laptop. PCI nvme detection fails, and the kernel becomes not able
> >>> anymore to find the rootfs / parse "root=".
> >>>
> >>> Bisecting v5.8=>v5.9-rc1 blames that commit. Reverting it *reliably*
> >>> fixes the problem and makes me able to boot v5.9-rc1.
> >>
> >> The fix is staged in the nvme tree here:
> >>
> >>   http://git.infradead.org/nvme.git/commit/286155561ecd13b6c85a78eaf2880d3baea03b9e
> >
> > That would have been nice to have in -rc1...
>
> And now we're getting very close to shipping items for -rc2, and it's still
> not in. Can we please get the nvme pull request out for -rc2?
>

I keep wondering that myself. Completely breaking the boot like this is
really not nice -- and for x86-64 laptops no less :-(

The fix is really small and isolated. "Urgent pull requests", containing
only a fix or two, were created *exactly* for this reson...

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* [PATCH v1 0/8] seqlock: Introduce seqcount_latch_t
    @ 2020-08-27 11:40 91% ` Ahmed S. Darwish
  2020-08-27 11:40 97%   ` [PATCH v1 1/8] time/sched_clock: Use raw_read_seqcount_latch() during suspend Ahmed S. Darwish
                     ` (7 more replies)
  2020-08-28  1:07 95% ` [PATCH v1 0/5] seqlock: Introduce PREEMPT_RT support Ahmed S. Darwish
  2020-09-10 15:08 63% ` [tip: locking/core] seqlock: seqcount_LOCKNAME_t: " tip-bot2 for Ahmed S. Darwish
  3 siblings, 8 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-27 11:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, LKML, Ahmed S. Darwish,
	Borislav Petkov, x86, H. Peter Anvin

Latch sequence counters are a multiversion concurrency control mechanism
where the embedded seqcount_t counter even/odd value is used to switch
between two copies of protected data. This allows the sequence counter
read side to be invoked from NMIs and safely interrupt its own write
side critical section.

Initially, latch sequence counters were implemented as a single write
function above plain seqcount_t, raw_write_seqcount_latch(). The read
path was expected to use plain seqcount_t raw_read_seqcount().

A specialized latch read function, raw_read_seqcount_latch(), was later
added. It became the standardized way for latch read paths.  Due to the
dependent load, it has one read memory barrier less than the more
generic raw_read_seqcount() API.

Only raw_write_seqcount_latch() and raw_read_seqcount_latch() should be
used with latch sequence counters. Having unique read and write path
APIs means that latch sequence counters are actually a data type of
their own -- just inappropriately overloading plain seqcount_t.

Introduce seqcount_latch_t. This adds type-safety and ensures that only
the correct latch-safe APIs are to be used.

Not to break bisection, let the latch APIs also accept plain seqcount_t
or seqcount_raw_spinlock_t. After converting all call sites to
seqcount_latch_t, (patches #4 => #7), only allow seqcount_latch_t.

Thanks,

8<--------------

Ahmed S. Darwish (8):
  time/sched_clock: Use raw_read_seqcount_latch() during suspend
  mm/swap: Do not abuse the seqcount_t latching API
  seqlock: Introduce seqcount_latch_t
  time/sched_clock: Use seqcount_latch_t
  timekeeping: Use seqcount_latch_t
  x86/tsc: Use seqcount_latch_t
  rbtree_latch: Use seqcount_latch_t
  seqlock: seqcount latch APIs: Only allow seqcount_latch_t

 Documentation/locking/seqlock.rst | 18 ++++++
 arch/x86/kernel/tsc.c             | 12 ++--
 include/linux/rbtree_latch.h      |  6 +-
 include/linux/seqlock.h           | 96 +++++++++++++++++++++----------
 kernel/time/sched_clock.c         |  6 +-
 kernel/time/timekeeping.c         | 10 ++--
 mm/swap.c                         | 65 +++++++++++++++++----
 7 files changed, 156 insertions(+), 57 deletions(-)

base-commit: d012a7190fc1fd72ed48911e77ca97ba4521bccd
--
2.28.0

^ permalink raw reply	[relevance 91%]

* [PATCH v1 4/8] time/sched_clock: Use seqcount_latch_t
  2020-08-27 11:40 91% ` [PATCH v1 0/8] seqlock: Introduce seqcount_latch_t Ahmed S. Darwish
                     ` (2 preceding siblings ...)
  2020-08-27 11:40 67%   ` [PATCH v1 3/8] seqlock: Introduce seqcount_latch_t Ahmed S. Darwish
@ 2020-08-27 11:40 99%   ` Ahmed S. Darwish
  2020-09-10 15:08 86%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-08-27 11:40 88%   ` [PATCH v1 5/8] timekeeping: " Ahmed S. Darwish
                     ` (3 subsequent siblings)
  7 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-08-27 11:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, LKML, Ahmed S. Darwish

Latch sequence counters have unique read and write APIs, and thus
seqcount_latch_t was recently introduced at seqlock.h.

Use that new data type instead of plain seqcount_t. This adds the
necessary type-safety and ensures only latching-safe seqcount APIs are
to be used.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 kernel/time/sched_clock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 8c6b5febd7a0..0642013dace4 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -35,7 +35,7 @@
  * into a single 64-byte cache line.
  */
 struct clock_data {
-	seqcount_t		seq;
+	seqcount_latch_t	seq;
 	struct clock_read_data	read_data[2];
 	ktime_t			wrap_kt;
 	unsigned long		rate;
@@ -76,7 +76,7 @@ struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
 
 int sched_clock_read_retry(unsigned int seq)
 {
-	return read_seqcount_retry(&cd.seq, seq);
+	return read_seqcount_latch_retry(&cd.seq, seq);
 }
 
 unsigned long long notrace sched_clock(void)
-- 
2.28.0


^ permalink raw reply	[relevance 99%]

* [PATCH v1 8/8] seqlock: seqcount latch APIs: Only allow seqcount_latch_t
  2020-08-27 11:40 91% ` [PATCH v1 0/8] seqlock: Introduce seqcount_latch_t Ahmed S. Darwish
                     ` (6 preceding siblings ...)
  2020-08-27 11:40 99%   ` [PATCH v1 7/8] rbtree_latch: " Ahmed S. Darwish
@ 2020-08-27 11:40 89%   ` Ahmed S. Darwish
  2020-09-10 15:08 75%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  7 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-08-27 11:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, LKML, Ahmed S. Darwish

All latch sequence counter call-sites have now been converted from plain
seqcount_t to the new seqcount_latch_t data type.

Enforce type-safety by modifying seqlock.h latch APIs to only accept
seqcount_latch_t.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index f83fbf2180db..04db28499331 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -620,7 +620,7 @@ static inline void seqcount_latch_init(seqcount_latch_t *s)
 
 /**
  * raw_read_seqcount_latch() - pick even/odd latch data copy
- * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t
+ * @s: Pointer to seqcount_latch_t
  *
  * See raw_write_seqcount_latch() for details and a full reader/writer
  * usage example.
@@ -629,17 +629,14 @@ static inline void seqcount_latch_init(seqcount_latch_t *s)
  * picking which data copy to read. The full counter must then be checked
  * with read_seqcount_latch_retry().
  */
-#define raw_read_seqcount_latch(s)						\
-({										\
-	/*									\
-	 * Pairs with the first smp_wmb() in raw_write_seqcount_latch().	\
-	 * Due to the dependent load, a full smp_rmb() is not needed.		\
-	 */									\
-	_Generic(*(s),								\
-		 seqcount_t:		  READ_ONCE(((seqcount_t *)s)->sequence),			\
-		 seqcount_raw_spinlock_t: READ_ONCE(((seqcount_raw_spinlock_t *)s)->seqcount.sequence),	\
-		 seqcount_latch_t:	  READ_ONCE(((seqcount_latch_t *)s)->seqcount.sequence));	\
-})
+static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s)
+{
+	/*
+	 * Pairs with the first smp_wmb() in raw_write_seqcount_latch().
+	 * Due to the dependent load, a full smp_rmb() is not needed.
+	 */
+	return READ_ONCE(s->seqcount.sequence);
+}
 
 /**
  * read_seqcount_latch_retry() - end a seqcount_latch_t read section
@@ -656,7 +653,7 @@ read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
 
 /**
  * raw_write_seqcount_latch() - redirect latch readers to even/odd copy
- * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t
+ * @s: Pointer to seqcount_latch_t
  *
  * The latch technique is a multiversion concurrency control method that allows
  * queries during non-atomic modifications. If you can guarantee queries never
@@ -735,14 +732,11 @@ read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
  *	When data is a dynamic data structure; one should use regular RCU
  *	patterns to manage the lifetimes of the objects within.
  */
-#define raw_write_seqcount_latch(s)						\
-{										\
-       smp_wmb();      /* prior stores before incrementing "sequence" */	\
-       _Generic(*(s),								\
-		seqcount_t:		((seqcount_t *)s)->sequence++,		\
-		seqcount_raw_spinlock_t:((seqcount_raw_spinlock_t *)s)->seqcount.sequence++, \
-		seqcount_latch_t:	((seqcount_latch_t *)s)->seqcount.sequence++); \
-       smp_wmb();      /* increment "sequence" before following stores */	\
+static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
+{
+	smp_wmb();	/* prior stores before incrementing "sequence" */
+	s->seqcount.sequence++;
+	smp_wmb();      /* increment "sequence" before following stores */
 }
 
 /*
-- 
2.28.0


^ permalink raw reply	[relevance 89%]

* [PATCH v1 7/8] rbtree_latch: Use seqcount_latch_t
  2020-08-27 11:40 91% ` [PATCH v1 0/8] seqlock: Introduce seqcount_latch_t Ahmed S. Darwish
                     ` (5 preceding siblings ...)
  2020-08-27 11:40 94%   ` [PATCH v1 6/8] x86/tsc: " Ahmed S. Darwish
@ 2020-08-27 11:40 99%   ` Ahmed S. Darwish
  2020-09-10 15:08 86%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-08-27 11:40 89%   ` [PATCH v1 8/8] seqlock: seqcount latch APIs: Only allow seqcount_latch_t Ahmed S. Darwish
  7 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-08-27 11:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, LKML, Ahmed S. Darwish

Latch sequence counters have unique read and write APIs, and thus
seqcount_latch_t was recently introduced at seqlock.h.

Use that new data type instead of plain seqcount_t. This adds the
necessary type-safety and ensures that only latching-safe seqcount APIs
are to be used.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/rbtree_latch.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
index 7d012faa509a..3d1a9e716b80 100644
--- a/include/linux/rbtree_latch.h
+++ b/include/linux/rbtree_latch.h
@@ -42,8 +42,8 @@ struct latch_tree_node {
 };
 
 struct latch_tree_root {
-	seqcount_t	seq;
-	struct rb_root	tree[2];
+	seqcount_latch_t	seq;
+	struct rb_root		tree[2];
 };
 
 /**
@@ -206,7 +206,7 @@ latch_tree_find(void *key, struct latch_tree_root *root,
 	do {
 		seq = raw_read_seqcount_latch(&root->seq);
 		node = __lt_find(key, root, seq & 1, ops->comp);
-	} while (read_seqcount_retry(&root->seq, seq));
+	} while (read_seqcount_latch_retry(&root->seq, seq));
 
 	return node;
 }
-- 
2.28.0


^ permalink raw reply	[relevance 99%]

* [PATCH v1 6/8] x86/tsc: Use seqcount_latch_t
  2020-08-27 11:40 91% ` [PATCH v1 0/8] seqlock: Introduce seqcount_latch_t Ahmed S. Darwish
                     ` (4 preceding siblings ...)
  2020-08-27 11:40 88%   ` [PATCH v1 5/8] timekeeping: " Ahmed S. Darwish
@ 2020-08-27 11:40 94%   ` Ahmed S. Darwish
    2020-09-10 15:08 81%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-08-27 11:40 99%   ` [PATCH v1 7/8] rbtree_latch: " Ahmed S. Darwish
  2020-08-27 11:40 89%   ` [PATCH v1 8/8] seqlock: seqcount latch APIs: Only allow seqcount_latch_t Ahmed S. Darwish
  7 siblings, 2 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-27 11:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, LKML, Ahmed S. Darwish,
	Borislav Petkov, x86, H. Peter Anvin

Latch sequence counters have unique read and write APIs, and thus
seqcount_latch_t was recently introduced at seqlock.h.

Use that new data type instead of plain seqcount_t. This adds the
necessary type-safety and ensures that only latching-safe seqcount APIs
are to be used.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 arch/x86/kernel/tsc.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 49d925043171..cbf17e9f1d03 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -54,7 +54,7 @@ struct clocksource *art_related_clocksource;
 
 struct cyc2ns {
 	struct cyc2ns_data data[2];	/*  0 + 2*16 = 32 */
-	seqcount_t	   seq;		/* 32 + 4    = 36 */
+	seqcount_latch_t   seq;		/* 32 + 4    = 36 */
 
 }; /* fits one cacheline */
 
@@ -68,19 +68,21 @@ early_param("tsc_early_khz", tsc_early_khz_setup);
 
 __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
 {
+	seqcount_latch_t *seqcount;
 	int seq, idx;
 
 	preempt_disable_notrace();
 
+	seqcount = &this_cpu_ptr(&cyc2ns)->seq;
 	do {
-		seq = this_cpu_read(cyc2ns.seq.sequence);
+		seq = raw_read_seqcount_latch(seqcount);
 		idx = seq & 1;
 
 		data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
 		data->cyc2ns_mul    = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
 		data->cyc2ns_shift  = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
 
-	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
+	} while (read_seqcount_latch_retry(seqcount, seq));
 }
 
 __always_inline void cyc2ns_read_end(void)
@@ -186,7 +188,7 @@ static void __init cyc2ns_init_boot_cpu(void)
 {
 	struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
 
-	seqcount_init(&c2n->seq);
+	seqcount_latch_init(&c2n->seq);
 	__set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
 }
 
@@ -203,7 +205,7 @@ static void __init cyc2ns_init_secondary_cpus(void)
 
 	for_each_possible_cpu(cpu) {
 		if (cpu != this_cpu) {
-			seqcount_init(&c2n->seq);
+			seqcount_latch_init(&c2n->seq);
 			c2n = per_cpu_ptr(&cyc2ns, cpu);
 			c2n->data[0] = data[0];
 			c2n->data[1] = data[1];
-- 
2.28.0


^ permalink raw reply	[relevance 94%]

* [PATCH v1 5/8] timekeeping: Use seqcount_latch_t
  2020-08-27 11:40 91% ` [PATCH v1 0/8] seqlock: Introduce seqcount_latch_t Ahmed S. Darwish
                     ` (3 preceding siblings ...)
  2020-08-27 11:40 99%   ` [PATCH v1 4/8] time/sched_clock: Use seqcount_latch_t Ahmed S. Darwish
@ 2020-08-27 11:40 88%   ` Ahmed S. Darwish
  2020-09-10 15:08 74%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-08-27 11:40 94%   ` [PATCH v1 6/8] x86/tsc: " Ahmed S. Darwish
                     ` (2 subsequent siblings)
  7 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-08-27 11:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, LKML, Ahmed S. Darwish

Latch sequence counters are a multiversion concurrency control mechanism
where the seqcount_t counter even/odd value is used to switch between
two data storage copies. This allows the seqcount_t read path to safely
interrupt its write side critical section (e.g. from NMIs).

Initially, latch sequence counters were implemented as a single write
function, raw_write_seqcount_latch(), above plain seqcount_t. The read
path was expected to use plain seqcount_t raw_read_seqcount().

A specialized read function was later added, raw_read_seqcount_latch(),
and became the standardized way for latch read paths. Having unique read
and write APIs meant that latch sequence counters are basically a data
type of their own -- just inappropriately overloading plain seqcount_t.
The seqcount_latch_t data type was thus introduced at seqlock.h.

Use that new data type instead of seqcount_raw_spinlock_t. This ensures
that only latch-safe APIs are to be used with the sequence counter.

Note that the use of seqcount_raw_spinlock_t was not very useful in the
first place. Only the "raw_" subset of seqcount_t APIs were used at
timekeeping.c. This subset was created for contexts where lockdep cannot
be used. seqcount_LOCKTYPE_t's raison d'être -- verifying that the
seqcount_t writer serialization lock is held -- cannot thus be done.

References: 0c3351d451ae ("seqlock: Use raw_ prefix instead of _no_lockdep")
References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 kernel/time/timekeeping.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4c47f388a83f..999c981ae766 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -64,7 +64,7 @@ static struct timekeeper shadow_timekeeper;
  * See @update_fast_timekeeper() below.
  */
 struct tk_fast {
-	seqcount_raw_spinlock_t	seq;
+	seqcount_latch_t	seq;
 	struct tk_read_base	base[2];
 };
 
@@ -81,13 +81,13 @@ static struct clocksource dummy_clock = {
 };
 
 static struct tk_fast tk_fast_mono ____cacheline_aligned = {
-	.seq     = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_mono.seq, &timekeeper_lock),
+	.seq     = SEQCNT_LATCH_ZERO(tk_fast_mono.seq),
 	.base[0] = { .clock = &dummy_clock, },
 	.base[1] = { .clock = &dummy_clock, },
 };
 
 static struct tk_fast tk_fast_raw  ____cacheline_aligned = {
-	.seq     = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_raw.seq, &timekeeper_lock),
+	.seq     = SEQCNT_LATCH_ZERO(tk_fast_raw.seq),
 	.base[0] = { .clock = &dummy_clock, },
 	.base[1] = { .clock = &dummy_clock, },
 };
@@ -467,7 +467,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 					tk_clock_read(tkr),
 					tkr->cycle_last,
 					tkr->mask));
-	} while (read_seqcount_retry(&tkf->seq, seq));
+	} while (read_seqcount_latch_retry(&tkf->seq, seq));
 
 	return now;
 }
@@ -533,7 +533,7 @@ static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf)
 					tk_clock_read(tkr),
 					tkr->cycle_last,
 					tkr->mask));
-	} while (read_seqcount_retry(&tkf->seq, seq));
+	} while (read_seqcount_latch_retry(&tkf->seq, seq));
 
 	return now;
 }
-- 
2.28.0


^ permalink raw reply	[relevance 88%]

* [PATCH v1 2/8] mm/swap: Do not abuse the seqcount_t latching API
  2020-08-27 11:40 91% ` [PATCH v1 0/8] seqlock: Introduce seqcount_latch_t Ahmed S. Darwish
  2020-08-27 11:40 97%   ` [PATCH v1 1/8] time/sched_clock: Use raw_read_seqcount_latch() during suspend Ahmed S. Darwish
@ 2020-08-27 11:40 74%   ` Ahmed S. Darwish
  2020-08-27 11:40 67%   ` [PATCH v1 3/8] seqlock: Introduce seqcount_latch_t Ahmed S. Darwish
                     ` (5 subsequent siblings)
  7 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-27 11:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon, Andrew Morton,
	Konstantin Khlebnikov, linux-mm
  Cc: Thomas Gleixner, Sebastian A. Siewior, LKML, Ahmed S. Darwish

Commit eef1a429f234 ("mm/swap.c: piggyback lru_add_drain_all() calls")
implemented an optimization mechanism to exit the to-be-started LRU
drain operation (name it A) if another drain operation *started and
finished* while (A) was blocked on the LRU draining mutex.

This was done through a seqcount_t latch, which is an abuse of its
semantics:

  1. seqcount_t latching should be used for the purpose of switching
     between two storage places with sequence protection to allow
     interruptible, preemptible, writer sections. The referenced
     optimization mechanism has absolutely nothing to do with that.

  2. The used raw_write_seqcount_latch() has two SMP write memory
     barriers to insure one consistent storage place out of the two
     storage places available. A full memory barrier is required
     instead: to guarantee that the pagevec counter stores visible by
     local CPU are visible to other CPUs -- before loading the current
     drain generation.

Beside the seqcount_t API abuse, the semantics of a latch sequence
counter was force-fitted into the referenced optimization. What was
meant is to track "generations" of LRU draining operations, where
"global lru draining generation = x" implies that all generations
0 < n <= x are already *scheduled* for draining -- thus nothing needs
to be done if the current generation number n <= x.

Remove the conceptually-inappropriate seqcount_t latch usage. Manually
implement the referenced optimization using a counter and SMP memory
barriers.

Note, while at it, use the non-atomic variant of cpumask_set_cpu(),
__cpumask_set_cpu(), due to the already existing mutex protection.

Link: https://lkml.kernel.org/r/CALYGNiPSr-cxV9MX9czaVh6Wz_gzSv3H_8KPvgjBTGbJywUJpA@mail.gmail.com
Link: https://lkml.kernel.org/r/87y2pg9erj.fsf@vostro.fn.ogness.net
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 mm/swap.c | 65 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 11 deletions(-)

diff --git a/mm/swap.c b/mm/swap.c
index d16d65d9b4e0..a1ec807e325d 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -763,10 +763,20 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
  */
 void lru_add_drain_all(void)
 {
-	static seqcount_t seqcount = SEQCNT_ZERO(seqcount);
-	static DEFINE_MUTEX(lock);
+	/*
+	 * lru_drain_gen - Global pages generation number
+	 *
+	 * (A) Definition: global lru_drain_gen = x implies that all generations
+	 *     0 < n <= x are already *scheduled* for draining.
+	 *
+	 * This is an optimization for the highly-contended use case where a
+	 * user space workload keeps constantly generating a flow of pages for
+	 * each CPU.
+	 */
+	static unsigned int lru_drain_gen;
 	static struct cpumask has_work;
-	int cpu, seq;
+	static DEFINE_MUTEX(lock);
+	unsigned cpu, this_gen;
 
 	/*
 	 * Make sure nobody triggers this path before mm_percpu_wq is fully
@@ -775,21 +785,54 @@ void lru_add_drain_all(void)
 	if (WARN_ON(!mm_percpu_wq))
 		return;
 
-	seq = raw_read_seqcount_latch(&seqcount);
+	/*
+	 * Guarantee pagevec counter stores visible by this CPU are visible to
+	 * other CPUs before loading the current drain generation.
+	 */
+	smp_mb();
+
+	/*
+	 * (B) Locally cache global LRU draining generation number
+	 *
+	 * The read barrier ensures that the counter is loaded before the mutex
+	 * is taken. It pairs with smp_mb() inside the mutex critical section
+	 * at (D).
+	 */
+	this_gen = smp_load_acquire(&lru_drain_gen);
 
 	mutex_lock(&lock);
 
 	/*
-	 * Piggyback on drain started and finished while we waited for lock:
-	 * all pages pended at the time of our enter were drained from vectors.
+	 * (C) Exit the draining operation if a newer generation, from another
+	 * lru_add_drain_all(), was already scheduled for draining. Check (A).
 	 */
-	if (__read_seqcount_retry(&seqcount, seq))
+	if (unlikely(this_gen != lru_drain_gen))
 		goto done;
 
-	raw_write_seqcount_latch(&seqcount);
+	/*
+	 * (D) Increment global generation number
+	 *
+	 * Pairs with smp_load_acquire() at (B), outside of the critical
+	 * section. Use a full memory barrier to guarantee that the new global
+	 * drain generation number is stored before loading pagevec counters.
+	 *
+	 * This pairing must be done here, before the for_each_online_cpu loop
+	 * below which drains the page vectors.
+	 *
+	 * Let x, y, and z represent some system CPU numbers, where x < y < z.
+	 * Assume CPU #z is is in the middle of the for_each_online_cpu loop
+	 * below and has already reached CPU #y's per-cpu data. CPU #x comes
+	 * along, adds some pages to its per-cpu vectors, then calls
+	 * lru_add_drain_all().
+	 *
+	 * If the paired barrier is done at any later step, e.g. after the
+	 * loop, CPU #x will just exit at (C) and miss flushing out all of its
+	 * added pages.
+	 */
+	WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1);
+	smp_mb();
 
 	cpumask_clear(&has_work);
-
 	for_each_online_cpu(cpu) {
 		struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
 
@@ -801,7 +844,7 @@ void lru_add_drain_all(void)
 		    need_activate_page_drain(cpu)) {
 			INIT_WORK(work, lru_add_drain_per_cpu);
 			queue_work_on(cpu, mm_percpu_wq, work);
-			cpumask_set_cpu(cpu, &has_work);
+			__cpumask_set_cpu(cpu, &has_work);
 		}
 	}
 
@@ -816,7 +859,7 @@ void lru_add_drain_all(void)
 {
 	lru_add_drain();
 }
-#endif
+#endif /* CONFIG_SMP */
 
 /**
  * release_pages - batched put_page()
-- 
2.28.0


^ permalink raw reply	[relevance 74%]

* [PATCH v1 1/8] time/sched_clock: Use raw_read_seqcount_latch() during suspend
  2020-08-27 11:40 91% ` [PATCH v1 0/8] seqlock: Introduce seqcount_latch_t Ahmed S. Darwish
@ 2020-08-27 11:40 97%   ` Ahmed S. Darwish
  2020-08-27 11:40 74%   ` [PATCH v1 2/8] mm/swap: Do not abuse the seqcount_t latching API Ahmed S. Darwish
                     ` (6 subsequent siblings)
  7 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-27 11:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, LKML, Ahmed S. Darwish

sched_clock uses seqcount_t latching to switch between two storage
places protected by the sequence counter. This allows it to have
interruptible, NMI-safe, seqcount_t write side critical sections.

Since 7fc26327b756 ("seqlock: Introduce raw_read_seqcount_latch()"),
raw_read_seqcount_latch() became the standardized way for seqcount_t
latch read paths. Due to the dependent load, it has one read memory
barrier less than the currently used raw_read_seqcount() API.

Use raw_read_seqcount_latch() for the suspend path.

Commit aadd6e5caaac ("time/sched_clock: Use raw_read_seqcount_latch()")
missed changing that instance of raw_read_seqcount().

Link: https://lkml.kernel.org/r/20200625085745.GD117543@hirez.programming.kicks-ass.net
Link: https://lkml.kernel.org/r/20200715092345.GA231464@debian-buster-darwi.lab.linutronix.de
References: 1809bfa44e10 ("timers, sched/clock: Avoid deadlock during read from NMI")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 kernel/time/sched_clock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 1c03eec6ca9b..8c6b5febd7a0 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -258,7 +258,7 @@ void __init generic_sched_clock_init(void)
  */
 static u64 notrace suspended_sched_clock_read(void)
 {
-	unsigned int seq = raw_read_seqcount(&cd.seq);
+	unsigned int seq = raw_read_seqcount_latch(&cd.seq);
 
 	return cd.read_data[seq & 1].epoch_cyc;
 }
-- 
2.28.0


^ permalink raw reply	[relevance 97%]

* [PATCH v1 3/8] seqlock: Introduce seqcount_latch_t
  2020-08-27 11:40 91% ` [PATCH v1 0/8] seqlock: Introduce seqcount_latch_t Ahmed S. Darwish
  2020-08-27 11:40 97%   ` [PATCH v1 1/8] time/sched_clock: Use raw_read_seqcount_latch() during suspend Ahmed S. Darwish
  2020-08-27 11:40 74%   ` [PATCH v1 2/8] mm/swap: Do not abuse the seqcount_t latching API Ahmed S. Darwish
@ 2020-08-27 11:40 67%   ` Ahmed S. Darwish
  2020-09-10 15:08 55%     ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-08-27 11:40 99%   ` [PATCH v1 4/8] time/sched_clock: Use seqcount_latch_t Ahmed S. Darwish
                     ` (4 subsequent siblings)
  7 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-08-27 11:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, LKML, Ahmed S. Darwish

Latch sequence counters are a multiversion concurrency control mechanism
where the seqcount_t counter even/odd value is used to switch between
two copies of protected data. This allows the seqcount_t read path to
safely interrupt its write side critical section (e.g. from NMIs).

Initially, latch sequence counters were implemented as a single write
function above plain seqcount_t: raw_write_seqcount_latch(). The read
side was expected to use plain seqcount_t raw_read_seqcount().

A specialized latch read function, raw_read_seqcount_latch(), was later
added. It became the standardized way for latch read paths.  Due to the
dependent load, it has one read memory barrier less than the plain
seqcount_t raw_read_seqcount() API.

Only raw_write_seqcount_latch() and raw_read_seqcount_latch() should be
used with latch sequence counters. Having *unique* read and write path
APIs means that latch sequence counters are actually a data type of
their own -- just inappropriately overloading plain seqcount_t.

Introduce seqcount_latch_t. This adds type-safety and ensures that only
the correct latch-safe APIs are to be used.

Not to break bisection, let the latch APIs also accept plain seqcount_t
or seqcount_raw_spinlock_t. After converting all call sites to
seqcount_latch_t, only that new data type will be allowed.

References: 9b0fd802e8c0 ("seqcount: Add raw_write_seqcount_latch()")
References: 7fc26327b756 ("seqlock: Introduce raw_read_seqcount_latch()")
References: aadd6e5caaac ("time/sched_clock: Use raw_read_seqcount_latch()")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 Documentation/locking/seqlock.rst |  18 ++++++
 include/linux/seqlock.h           | 104 +++++++++++++++++++++---------
 2 files changed, 91 insertions(+), 31 deletions(-)

diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst
index 62c5ad98c11c..a334b584f2b3 100644
--- a/Documentation/locking/seqlock.rst
+++ b/Documentation/locking/seqlock.rst
@@ -139,6 +139,24 @@ with the associated LOCKTYPE lock acquired.
 
 Read path: same as in :ref:`seqcount_t`.
 
+
+.. _seqcount_latch_t:
+
+Latch sequence counters (``seqcount_latch_t``)
+----------------------------------------------
+
+Latch sequence counters are a multiversion concurrency control mechanism
+where the embedded seqcount_t counter even/odd value is used to switch
+between two copies of protected data. This allows the sequence counter
+read path to safely interrupt its own write side critical section.
+
+Use seqcount_latch_t when the write side sections cannot be protected
+from interruption by readers. This is typically the case when the read
+side can be invoked from NMI handlers.
+
+Check `raw_write_seqcount_latch()` for more information.
+
+
 .. _seqlock_t:
 
 Sequential locks (``seqlock_t``)
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 962d9768945f..f83fbf2180db 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -587,34 +587,76 @@ static inline void write_seqcount_t_invalidate(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
-/**
- * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+/*
+ * Latch sequence counters (seqcount_latch_t)
+ *
+ * A sequence counter variant where the counter even/odd value is used to
+ * switch between two copies of protected data. This allows the read path,
+ * typically NMIs, to safely interrupt the write side critical section.
  *
- * Use seqcount_t latching to switch between two storage places protected
- * by a sequence counter. Doing so allows having interruptible, preemptible,
- * seqcount_t write side critical sections.
+ * As the write sections are fully preemptible, no special handling for
+ * PREEMPT_RT is needed.
+ */
+typedef struct {
+	seqcount_t seqcount;
+} seqcount_latch_t;
+
+/**
+ * SEQCNT_LATCH_ZERO() - static initializer for seqcount_latch_t
+ * @seq_name: Name of the seqcount_latch_t instance
+ */
+#define SEQCNT_LATCH_ZERO(seq_name) {					\
+	.seqcount		= SEQCNT_ZERO(seq_name.seqcount),	\
+}
+
+/**
+ * seqcount_latch_init() - runtime initializer for seqcount_latch_t
+ * @s: Pointer to the seqcount_latch_t instance
+ */
+static inline void seqcount_latch_init(seqcount_latch_t *s)
+{
+	seqcount_init(&s->seqcount);
+}
+
+/**
+ * raw_read_seqcount_latch() - pick even/odd latch data copy
+ * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t
  *
- * Check raw_write_seqcount_latch() for more details and a full reader and
- * writer usage example.
+ * See raw_write_seqcount_latch() for details and a full reader/writer
+ * usage example.
  *
  * Return: sequence counter raw value. Use the lowest bit as an index for
- * picking which data copy to read. The full counter value must then be
- * checked with read_seqcount_retry().
+ * picking which data copy to read. The full counter must then be checked
+ * with read_seqcount_latch_retry().
  */
-#define raw_read_seqcount_latch(s)					\
-	raw_read_seqcount_t_latch(__seqcount_ptr(s))
+#define raw_read_seqcount_latch(s)						\
+({										\
+	/*									\
+	 * Pairs with the first smp_wmb() in raw_write_seqcount_latch().	\
+	 * Due to the dependent load, a full smp_rmb() is not needed.		\
+	 */									\
+	_Generic(*(s),								\
+		 seqcount_t:		  READ_ONCE(((seqcount_t *)s)->sequence),			\
+		 seqcount_raw_spinlock_t: READ_ONCE(((seqcount_raw_spinlock_t *)s)->seqcount.sequence),	\
+		 seqcount_latch_t:	  READ_ONCE(((seqcount_latch_t *)s)->seqcount.sequence));	\
+})
 
-static inline int raw_read_seqcount_t_latch(seqcount_t *s)
+/**
+ * read_seqcount_latch_retry() - end a seqcount_latch_t read section
+ * @s:		Pointer to seqcount_latch_t
+ * @start:	count, from raw_read_seqcount_latch()
+ *
+ * Return: true if a read section retry is required, else false
+ */
+static inline int
+read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
 {
-	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
-	int seq = READ_ONCE(s->sequence); /* ^^^ */
-	return seq;
+	return read_seqcount_retry(&s->seqcount, start);
 }
 
 /**
- * raw_write_seqcount_latch() - redirect readers to even/odd copy
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * raw_write_seqcount_latch() - redirect latch readers to even/odd copy
+ * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t
  *
  * The latch technique is a multiversion concurrency control method that allows
  * queries during non-atomic modifications. If you can guarantee queries never
@@ -633,7 +675,7 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
  * The basic form is a data structure like::
  *
  *	struct latch_struct {
- *		seqcount_t		seq;
+ *		seqcount_latch_t	seq;
  *		struct data_struct	data[2];
  *	};
  *
@@ -643,13 +685,13 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
  *	void latch_modify(struct latch_struct *latch, ...)
  *	{
  *		smp_wmb();	// Ensure that the last data[1] update is visible
- *		latch->seq++;
+ *		latch->seq.sequence++;
  *		smp_wmb();	// Ensure that the seqcount update is visible
  *
  *		modify(latch->data[0], ...);
  *
  *		smp_wmb();	// Ensure that the data[0] update is visible
- *		latch->seq++;
+ *		latch->seq.sequence++;
  *		smp_wmb();	// Ensure that the seqcount update is visible
  *
  *		modify(latch->data[1], ...);
@@ -668,8 +710,8 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
  *			idx = seq & 0x01;
  *			entry = data_query(latch->data[idx], ...);
  *
- *		// read_seqcount_retry() includes needed smp_rmb()
- *		} while (read_seqcount_retry(&latch->seq, seq));
+ *		// This includes needed smp_rmb()
+ *		} while (read_seqcount_latch_retry(&latch->seq, seq));
  *
  *		return entry;
  *	}
@@ -693,14 +735,14 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
  *	When data is a dynamic data structure; one should use regular RCU
  *	patterns to manage the lifetimes of the objects within.
  */
-#define raw_write_seqcount_latch(s)					\
-	raw_write_seqcount_t_latch(__seqcount_ptr(s))
-
-static inline void raw_write_seqcount_t_latch(seqcount_t *s)
-{
-       smp_wmb();      /* prior stores before incrementing "sequence" */
-       s->sequence++;
-       smp_wmb();      /* increment "sequence" before following stores */
+#define raw_write_seqcount_latch(s)						\
+{										\
+       smp_wmb();      /* prior stores before incrementing "sequence" */	\
+       _Generic(*(s),								\
+		seqcount_t:		((seqcount_t *)s)->sequence++,		\
+		seqcount_raw_spinlock_t:((seqcount_raw_spinlock_t *)s)->seqcount.sequence++, \
+		seqcount_latch_t:	((seqcount_latch_t *)s)->seqcount.sequence++); \
+       smp_wmb();      /* increment "sequence" before following stores */	\
 }
 
 /*
-- 
2.28.0


^ permalink raw reply	[relevance 67%]

* [PATCH v1 0/5] seqlock: Introduce PREEMPT_RT support
      2020-08-27 11:40 91% ` [PATCH v1 0/8] seqlock: Introduce seqcount_latch_t Ahmed S. Darwish
@ 2020-08-28  1:07 95% ` Ahmed S. Darwish
  2020-08-28  1:07 61%   ` [PATCH v1 1/5] seqlock: seqcount_LOCKTYPE_t: Standardize naming convention Ahmed S. Darwish
                     ` (5 more replies)
  2020-09-10 15:08 63% ` [tip: locking/core] seqlock: seqcount_LOCKNAME_t: " tip-bot2 for Ahmed S. Darwish
  3 siblings, 6 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-28  1:07 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, Paul E. McKenney,
	Steven Rostedt, LKML, Ahmed S. Darwish

Hi,

Preemption must be disabled before entering a sequence counter write
side critical section.  Failing to do so, the read side section can
preempt the write side section and spin for the entire scheduler tick.
If that reader belongs to a real-time scheduling class, it can spin
forever and the kernel will livelock.

Disabling preemption cannot be done for PREEMPT_RT. It can lead to
higher latencies and the write side sections will not be able to acquire
locks which become sleeping locks (e.g. spinlock_t).

To solve this dilemma, do not disable preemption for seqcount_LOCKTYPE_t
writers. Rather, detect if a seqcount_LOCKTYPE_t writer is in progress.
If that is the case, acquire then release the associated LOCKTYPE writer
serialization lock. This will allow any preempted writer to make progress
until the end of its writer serialization lock critical section.

Implement this technique for all of PREEMPT_RT sleeping locks.

Thanks,

8<--------------

Ahmed S. Darwish (5):
  seqlock: seqcount_LOCKTYPE_t: Standardize naming convention
  seqlock: Use unique prefix for seqcount_t property accessors
  seqlock: seqcount_t: Implement all read APIs as statement expressions
  seqlock: seqcount_LOCKTYPE_t: Introduce PREEMPT_RT support
  seqlock: PREEMPT_RT: Do not starve seqlock_t writers

 include/linux/seqlock.h | 277 ++++++++++++++++++++++++----------------
 1 file changed, 167 insertions(+), 110 deletions(-)

base-commit: d012a7190fc1fd72ed48911e77ca97ba4521bccd
--
2.28.0

^ permalink raw reply	[relevance 95%]

* [PATCH v1 1/5] seqlock: seqcount_LOCKTYPE_t: Standardize naming convention
  2020-08-28  1:07 95% ` [PATCH v1 0/5] seqlock: Introduce PREEMPT_RT support Ahmed S. Darwish
@ 2020-08-28  1:07 61%   ` Ahmed S. Darwish
    2020-08-28  1:07 91%   ` [PATCH v1 2/5] seqlock: Use unique prefix for seqcount_t property accessors Ahmed S. Darwish
                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-08-28  1:07 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, Paul E. McKenney,
	Steven Rostedt, LKML, Ahmed S. Darwish

At seqlock.h, sequence counters with associated locks are either called
seqcount_LOCKNAME_t, seqcount_LOCKTYPE_t, or seqcount_locktype_t.

Standardize on "seqcount_LOCKTYPE_t" for all instances in comments,
kernel-doc, and SEQCOUNT_LOCKTYPE() generative macro paramters.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 80 +++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 39 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 962d9768945f..598af597f74e 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -138,56 +138,58 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
 #endif
 
 /**
- * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPR associated
+ * typedef seqcount_LOCKTYPE_t - sequence counter with associated lock
  * @seqcount:	The real sequence counter
  * @lock:	Pointer to the associated spinlock
  *
- * A plain sequence counter with external writer synchronization by a
- * spinlock. The spinlock is associated to the sequence count in the
+ * A plain sequence counter with external writer synchronization by
+ * LOCKTYPE @lock. The lock is associated to the sequence counter in the
  * static initializer or init function. This enables lockdep to validate
  * that the write side critical section is properly serialized.
+ *
+ * LOCKTYPE:	raw_spinlock, spinlock, rwlock, mutex, or ww_mutex.
  */
 
 /**
- * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t
- * @s:		Pointer to the seqcount_LOCKNAME_t instance
+ * seqcount_LOCKTYPE_init() - runtime initializer for seqcount_LOCKTYPE_t
+ * @s:		Pointer to the seqcount_LOCKTYPE_t instance
  * @lock:	Pointer to the associated LOCKTYPE
  */
 
 /*
- * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers
- * @locktype:		actual typename
- * @lockname:		name
+ * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKTYPE_t and helpers
+ * @locktype:		"LOCKTYPE" part of seqcount_LOCKTYPE_t
+ * @locktype_t:		canonical/full LOCKTYPE C data type
  * @preemptible:	preemptibility of above locktype
  * @lockmember:		argument for lockdep_assert_held()
  */
-#define SEQCOUNT_LOCKTYPE(locktype, lockname, preemptible, lockmember)	\
-typedef struct seqcount_##lockname {					\
+#define SEQCOUNT_LOCKTYPE(locktype, locktype_t, preemptible, lockmember)	\
+typedef struct seqcount_##locktype {					\
 	seqcount_t		seqcount;				\
-	__SEQ_LOCK(locktype	*lock);					\
-} seqcount_##lockname##_t;						\
+	__SEQ_LOCK(locktype_t	*lock);					\
+} seqcount_##locktype##_t;						\
 									\
 static __always_inline void						\
-seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock)	\
+seqcount_##locktype##_init(seqcount_##locktype##_t *s, locktype_t *lock)\
 {									\
 	seqcount_init(&s->seqcount);					\
 	__SEQ_LOCK(s->lock = lock);					\
 }									\
 									\
 static __always_inline seqcount_t *					\
-__seqcount_##lockname##_ptr(seqcount_##lockname##_t *s)			\
+__seqcount_##locktype##_ptr(seqcount_##locktype##_t *s)			\
 {									\
 	return &s->seqcount;						\
 }									\
 									\
 static __always_inline bool						\
-__seqcount_##lockname##_preemptible(seqcount_##lockname##_t *s)		\
+__seqcount_##locktype##_preemptible(seqcount_##locktype##_t *s)		\
 {									\
 	return preemptible;						\
 }									\
 									\
 static __always_inline void						\
-__seqcount_##lockname##_assert(seqcount_##lockname##_t *s)		\
+__seqcount_##locktype##_assert(seqcount_##locktype##_t *s)		\
 {									\
 	__SEQ_LOCK(lockdep_assert_held(lockmember));			\
 }
@@ -211,15 +213,15 @@ static inline void __seqcount_assert(seqcount_t *s)
 	lockdep_assert_preemption_disabled();
 }
 
-SEQCOUNT_LOCKTYPE(raw_spinlock_t,	raw_spinlock,	false,	s->lock)
-SEQCOUNT_LOCKTYPE(spinlock_t,		spinlock,	false,	s->lock)
-SEQCOUNT_LOCKTYPE(rwlock_t,		rwlock,		false,	s->lock)
-SEQCOUNT_LOCKTYPE(struct mutex,		mutex,		true,	s->lock)
-SEQCOUNT_LOCKTYPE(struct ww_mutex,	ww_mutex,	true,	&s->lock->base)
+SEQCOUNT_LOCKTYPE(raw_spinlock,	raw_spinlock_t,		false,	s->lock)
+SEQCOUNT_LOCKTYPE(spinlock,	spinlock_t,		false,	s->lock)
+SEQCOUNT_LOCKTYPE(rwlock,	rwlock_t,		false,	s->lock)
+SEQCOUNT_LOCKTYPE(mutex,	struct mutex,		true,	s->lock)
+SEQCOUNT_LOCKTYPE(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
 
 /**
- * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
- * @name:	Name of the seqcount_LOCKNAME_t instance
+ * SEQCNT_LOCKTYPE_ZERO - static initializer for seqcount_LOCKTYPE_t
+ * @name:	Name of the seqcount_LOCKTYPE_t instance
  * @lock:	Pointer to the associated LOCKTYPE
  */
 
@@ -235,8 +237,8 @@ SEQCOUNT_LOCKTYPE(struct ww_mutex,	ww_mutex,	true,	&s->lock->base)
 #define SEQCNT_WW_MUTEX_ZERO(name, lock) 	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
 
 
-#define __seqprop_case(s, lockname, prop)				\
-	seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s))
+#define __seqprop_case(s, locktype, prop)				\
+	seqcount_##locktype##_t: __seqcount_##locktype##_##prop((void *)(s))
 
 #define __seqprop(s, prop) _Generic(*(s),				\
 	seqcount_t:		__seqcount_##prop((void *)(s)),		\
@@ -252,7 +254,7 @@ SEQCOUNT_LOCKTYPE(struct ww_mutex,	ww_mutex,	true,	&s->lock->base)
 
 /**
  * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  *
  * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -283,7 +285,7 @@ static inline unsigned __read_seqcount_t_begin(const seqcount_t *s)
 
 /**
  * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  *
  * Return: count to be passed to read_seqcount_retry()
  */
@@ -299,7 +301,7 @@ static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
 
 /**
  * read_seqcount_begin() - begin a seqcount_t read critical section
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  *
  * Return: count to be passed to read_seqcount_retry()
  */
@@ -314,7 +316,7 @@ static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
 
 /**
  * raw_read_seqcount() - read the raw seqcount_t counter value
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  *
  * raw_read_seqcount opens a read critical section of the given
  * seqcount_t, without any lockdep checking, and without checking or
@@ -337,7 +339,7 @@ static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
 /**
  * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
  *                        lockdep and w/o counter stabilization
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  *
  * raw_seqcount_begin opens a read critical section of the given
  * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
@@ -365,7 +367,7 @@ static inline unsigned raw_seqcount_t_begin(const seqcount_t *s)
 
 /**
  * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  * @start: count, from read_seqcount_begin()
  *
  * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
@@ -389,7 +391,7 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 
 /**
  * read_seqcount_retry() - end a seqcount_t read critical section
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  * @start: count, from read_seqcount_begin()
  *
  * read_seqcount_retry closes the read critical section of given
@@ -409,7 +411,7 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 
 /**
  * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  */
 #define raw_write_seqcount_begin(s)					\
 do {									\
@@ -428,7 +430,7 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
 
 /**
  * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  */
 #define raw_write_seqcount_end(s)					\
 do {									\
@@ -448,7 +450,7 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s)
 /**
  * write_seqcount_begin_nested() - start a seqcount_t write section with
  *                                 custom lockdep nesting level
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  * @subclass: lockdep nesting level
  *
  * See Documentation/locking/lockdep-design.rst
@@ -471,7 +473,7 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
 
 /**
  * write_seqcount_begin() - start a seqcount_t write side critical section
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  *
  * write_seqcount_begin opens a write side critical section of the given
  * seqcount_t.
@@ -497,7 +499,7 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
 
 /**
  * write_seqcount_end() - end a seqcount_t write side critical section
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  *
  * The write section must've been opened with write_seqcount_begin().
  */
@@ -517,7 +519,7 @@ static inline void write_seqcount_t_end(seqcount_t *s)
 
 /**
  * raw_write_seqcount_barrier() - do a seqcount_t write barrier
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  *
  * This can be used to provide an ordering guarantee instead of the usual
  * consistency guarantee. It is one wmb cheaper, because it can collapse
@@ -571,7 +573,7 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
 /**
  * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
  *                               side operations
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  *
  * After write_seqcount_invalidate, no seqcount_t read side operations
  * will complete successfully and see data older than this.
-- 
2.28.0


^ permalink raw reply	[relevance 61%]

* [PATCH v1 2/5] seqlock: Use unique prefix for seqcount_t property accessors
  2020-08-28  1:07 95% ` [PATCH v1 0/5] seqlock: Introduce PREEMPT_RT support Ahmed S. Darwish
  2020-08-28  1:07 61%   ` [PATCH v1 1/5] seqlock: seqcount_LOCKTYPE_t: Standardize naming convention Ahmed S. Darwish
@ 2020-08-28  1:07 91%   ` Ahmed S. Darwish
    2020-08-28  1:07 80%   ` [PATCH v1 3/5] seqlock: seqcount_t: Implement all read APIs as statement expressions Ahmed S. Darwish
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-08-28  1:07 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, Paul E. McKenney,
	Steven Rostedt, LKML, Ahmed S. Darwish

At seqlock.h, the following set of functions:

    - __seqcount_ptr()
    - __seqcount_preemptible()
    - __seqcount_assert()

act as plain seqcount_t "property" accessors. Meanwhile, the following
group:

    - __seqcount_ptr()
    - __seqcount_lock_preemptible()
    - __seqcount_assert_lock_held()

act as the equivalent set, but in the generic form, taking either
seqcount_t or any of the seqcount_LOCKTYPE_t variants.

This is quite confusing, especially the first member where it is called
exactly the same in both groups.

Differentiate the first group by using "__seqcount_t_" as prefix. This
also conforms with the rest of seqlock.h naming conventions.

While at it, also constify the property accessors first parameter where
appropriate.

References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 598af597f74e..5470e9cd52ce 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -183,13 +183,13 @@ __seqcount_##locktype##_ptr(seqcount_##locktype##_t *s)			\
 }									\
 									\
 static __always_inline bool						\
-__seqcount_##locktype##_preemptible(seqcount_##locktype##_t *s)		\
+__seqcount_##locktype##_preemptible(const seqcount_##locktype##_t *s)	\
 {									\
 	return preemptible;						\
 }									\
 									\
 static __always_inline void						\
-__seqcount_##locktype##_assert(seqcount_##locktype##_t *s)		\
+__seqcount_##locktype##_assert(const seqcount_##locktype##_t *s)	\
 {									\
 	__SEQ_LOCK(lockdep_assert_held(lockmember));			\
 }
@@ -198,17 +198,17 @@ __seqcount_##locktype##_assert(seqcount_##locktype##_t *s)		\
  * __seqprop() for seqcount_t
  */
 
-static inline seqcount_t *__seqcount_ptr(seqcount_t *s)
+static inline seqcount_t *__seqcount_t_ptr(seqcount_t *s)
 {
 	return s;
 }
 
-static inline bool __seqcount_preemptible(seqcount_t *s)
+static inline bool __seqcount_t_preemptible(const seqcount_t *s)
 {
 	return false;
 }
 
-static inline void __seqcount_assert(seqcount_t *s)
+static inline void __seqcount_t_assert(const seqcount_t *s)
 {
 	lockdep_assert_preemption_disabled();
 }
@@ -236,12 +236,11 @@ SEQCOUNT_LOCKTYPE(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
 #define SEQCNT_MUTEX_ZERO(name, lock)		SEQCOUNT_LOCKTYPE_ZERO(name, lock)
 #define SEQCNT_WW_MUTEX_ZERO(name, lock) 	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
 
-
 #define __seqprop_case(s, locktype, prop)				\
 	seqcount_##locktype##_t: __seqcount_##locktype##_##prop((void *)(s))
 
 #define __seqprop(s, prop) _Generic(*(s),				\
-	seqcount_t:		__seqcount_##prop((void *)(s)),		\
+	seqcount_t:		__seqcount_t_##prop((void *)(s)),	\
 	__seqprop_case((s),	raw_spinlock,	prop),			\
 	__seqprop_case((s),	spinlock,	prop),			\
 	__seqprop_case((s),	rwlock,		prop),			\
-- 
2.28.0


^ permalink raw reply	[relevance 91%]

* [PATCH v1 3/5] seqlock: seqcount_t: Implement all read APIs as statement expressions
  2020-08-28  1:07 95% ` [PATCH v1 0/5] seqlock: Introduce PREEMPT_RT support Ahmed S. Darwish
  2020-08-28  1:07 61%   ` [PATCH v1 1/5] seqlock: seqcount_LOCKTYPE_t: Standardize naming convention Ahmed S. Darwish
  2020-08-28  1:07 91%   ` [PATCH v1 2/5] seqlock: Use unique prefix for seqcount_t property accessors Ahmed S. Darwish
@ 2020-08-28  1:07 80%   ` Ahmed S. Darwish
    2020-08-28  1:07 72%   ` [PATCH v1 4/5] seqlock: seqcount_LOCKTYPE_t: Introduce PREEMPT_RT support Ahmed S. Darwish
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-08-28  1:07 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, Paul E. McKenney,
	Steven Rostedt, LKML, Ahmed S. Darwish

The sequence counters read APIs are implemented as CPP macros, so they
can take either seqcount_t or any of the seqcount_LOCKTYPE_t variants.
Such macros then get *directly* transformed to internal C functions that
only take plain seqcount_t.

Further commits need access to seqcount_LOCKTYPE_t inside of the actual
read APIs code. Thus transform all of the seqcount read APIs to pure GCC
statement expressions instead.

This will not break type-safety: all of the transformed APIs resolve to
a _Generic() selection that does not have a "default" case.

This will also not affect the transformed APIs readability: previously
added kernel-doc above all of seqlock.h functions makes the expectations
quite clear for call-site developers.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 98 ++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 49 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 5470e9cd52ce..d114a9f4e9d9 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -182,6 +182,12 @@ __seqcount_##locktype##_ptr(seqcount_##locktype##_t *s)			\
 	return &s->seqcount;						\
 }									\
 									\
+static __always_inline unsigned						\
+__seqcount_##locktype##_sequence(const seqcount_##locktype##_t *s)	\
+{									\
+	return READ_ONCE(s->seqcount.sequence);				\
+}									\
+									\
 static __always_inline bool						\
 __seqcount_##locktype##_preemptible(const seqcount_##locktype##_t *s)	\
 {									\
@@ -203,6 +209,11 @@ static inline seqcount_t *__seqcount_t_ptr(seqcount_t *s)
 	return s;
 }
 
+static inline unsigned __seqcount_t_sequence(const seqcount_t *s)
+{
+	return READ_ONCE(s->sequence);
+}
+
 static inline bool __seqcount_t_preemptible(const seqcount_t *s)
 {
 	return false;
@@ -248,6 +259,7 @@ SEQCOUNT_LOCKTYPE(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
 	__seqprop_case((s),	ww_mutex,	prop))
 
 #define __seqcount_ptr(s)		__seqprop(s, ptr)
+#define __seqcount_sequence(s)		__seqprop(s, sequence)
 #define __seqcount_lock_preemptible(s)	__seqprop(s, preemptible)
 #define __seqcount_assert_lock_held(s)	__seqprop(s, assert)
 
@@ -266,21 +278,19 @@ SEQCOUNT_LOCKTYPE(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define __read_seqcount_begin(s)					\
-	__read_seqcount_t_begin(__seqcount_ptr(s))
-
-static inline unsigned __read_seqcount_t_begin(const seqcount_t *s)
-{
-	unsigned ret;
-
-repeat:
-	ret = READ_ONCE(s->sequence);
-	if (unlikely(ret & 1)) {
-		cpu_relax();
-		goto repeat;
-	}
-	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
-	return ret;
-}
+({									\
+	unsigned seq;							\
+									\
+	do {								\
+		seq = __seqcount_sequence(s);				\
+		if (likely(! (seq & 1)))				\
+			break;						\
+		cpu_relax();						\
+	} while (true);							\
+									\
+	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);			\
+	seq;								\
+})
 
 /**
  * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
@@ -289,14 +299,12 @@ static inline unsigned __read_seqcount_t_begin(const seqcount_t *s)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define raw_read_seqcount_begin(s)					\
-	raw_read_seqcount_t_begin(__seqcount_ptr(s))
-
-static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
-{
-	unsigned ret = __read_seqcount_t_begin(s);
-	smp_rmb();
-	return ret;
-}
+({									\
+	unsigned seq = __read_seqcount_begin(s);			\
+									\
+	smp_rmb();							\
+	seq;								\
+})
 
 /**
  * read_seqcount_begin() - begin a seqcount_t read critical section
@@ -305,13 +313,10 @@ static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define read_seqcount_begin(s)						\
-	read_seqcount_t_begin(__seqcount_ptr(s))
-
-static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
-{
-	seqcount_lockdep_reader_access(s);
-	return raw_read_seqcount_t_begin(s);
-}
+({									\
+	seqcount_lockdep_reader_access(__seqcount_ptr(s));		\
+	raw_read_seqcount_begin(s);					\
+})
 
 /**
  * raw_read_seqcount() - read the raw seqcount_t counter value
@@ -325,15 +330,13 @@ static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define raw_read_seqcount(s)						\
-	raw_read_seqcount_t(__seqcount_ptr(s))
-
-static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
-{
-	unsigned ret = READ_ONCE(s->sequence);
-	smp_rmb();
-	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
-	return ret;
-}
+({									\
+	unsigned seq = __seqcount_sequence(s);				\
+									\
+	smp_rmb();							\
+	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);			\
+	seq;								\
+})
 
 /**
  * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
@@ -353,16 +356,13 @@ static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define raw_seqcount_begin(s)						\
-	raw_seqcount_t_begin(__seqcount_ptr(s))
-
-static inline unsigned raw_seqcount_t_begin(const seqcount_t *s)
-{
-	/*
-	 * If the counter is odd, let read_seqcount_retry() fail
-	 * by decrementing the counter.
-	 */
-	return raw_read_seqcount_t(s) & ~1;
-}
+({									\
+	/*								\
+	 * If the counter is odd, let read_seqcount_retry() fail	\
+	 * by decrementing the counter.					\
+	 */								\
+	raw_read_seqcount(s) & ~1;					\
+})
 
 /**
  * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
-- 
2.28.0


^ permalink raw reply	[relevance 80%]

* [PATCH v1 4/5] seqlock: seqcount_LOCKTYPE_t: Introduce PREEMPT_RT support
  2020-08-28  1:07 95% ` [PATCH v1 0/5] seqlock: Introduce PREEMPT_RT support Ahmed S. Darwish
                     ` (2 preceding siblings ...)
  2020-08-28  1:07 80%   ` [PATCH v1 3/5] seqlock: seqcount_t: Implement all read APIs as statement expressions Ahmed S. Darwish
@ 2020-08-28  1:07 72%   ` Ahmed S. Darwish
    2020-08-28  1:07 84%   ` [PATCH v1 5/5] seqlock: PREEMPT_RT: Do not starve seqlock_t writers Ahmed S. Darwish
    5 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-08-28  1:07 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, Paul E. McKenney,
	Steven Rostedt, LKML, Ahmed S. Darwish

Preemption must be disabled before entering a sequence counter write
side critical section.  Failing to do so, the read side section can
preempt the write side section and spin for the entire scheduler tick.
If that reader belongs to a real-time scheduling class, it can spin
forever and the kernel will livelock.

Disabling preemption cannot be done for PREEMPT_RT. It can lead to
higher latencies and the write side sections will not be able to acquire
locks which become sleeping locks (e.g. spinlock_t).

To solve this dilemma, do not disable preemption for seqcount_LOCKTYPE_t
writers. Rather, detect if a seqcount_LOCKTYPE_t writer is in progress.
If that is the case, acquire then release the associated LOCKTYPE writer
serialization lock. This will allow any preempted writer to make progress
until the end of its writer serialization lock critical section.

Implement this technique for all of PREEMPT_RT sleeping locks.

Link: https://lkml.kernel.org/r/159708609435.2571.13948681727529247231.tglx@nanos
Link: https://lkml.kernel.org/r/20200519214547.352050-1-a.darwish@linutronix.de
References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 72 +++++++++++++++++++++++++++++++++--------
 1 file changed, 59 insertions(+), 13 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index d114a9f4e9d9..8d4bf12272ba 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -17,6 +17,7 @@
 #include <linux/kcsan-checks.h>
 #include <linux/lockdep.h>
 #include <linux/mutex.h>
+#include <linux/ww_mutex.h>
 #include <linux/preempt.h>
 #include <linux/spinlock.h>
 
@@ -131,7 +132,23 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  * See Documentation/locking/seqlock.rst
  */
 
-#ifdef CONFIG_LOCKDEP
+/*
+ * For PREEMPT_RT, seqcount_LOCKTYPE_t write side critical sections cannot
+ * disable preemption. It can lead to higher latencies, and the write side
+ * sections will not be able to acquire locks which become sleeping locks
+ * (e.g. spinlock_t).
+ *
+ * To remain preemptible while avoiding a possible livelock caused by the
+ * read side preempting the write side, use a different technique: detect
+ * if a seqcount_LOCKTYPE_t writer is in progress. If that is the case,
+ * acquire then release the associated LOCKTYPE writer serialization
+ * lock. This will allow any preempted writer to make progress until the
+ * end of its writer serialization lock critical section.
+ *
+ * This lock-unlock technique must be implemented for all of PREEMPT_RT
+ * sleeping locks.  See Documentation/locking/locktypes.rst
+ */
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
 #define __SEQ_LOCK(expr)	expr
 #else
 #define __SEQ_LOCK(expr)
@@ -162,8 +179,10 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  * @locktype_t:		canonical/full LOCKTYPE C data type
  * @preemptible:	preemptibility of above locktype
  * @lockmember:		argument for lockdep_assert_held()
+ * @lockbase:		associated lock release function (prefix only)
+ * @lock_acquire:	associated lock acquisition function (full call)
  */
-#define SEQCOUNT_LOCKTYPE(locktype, locktype_t, preemptible, lockmember)	\
+#define SEQCOUNT_LOCKTYPE(locktype, locktype_t, preemptible, lockmember, lockbase, lock_acquire) \
 typedef struct seqcount_##locktype {					\
 	seqcount_t		seqcount;				\
 	__SEQ_LOCK(locktype_t	*lock);					\
@@ -185,7 +204,23 @@ __seqcount_##locktype##_ptr(seqcount_##locktype##_t *s)			\
 static __always_inline unsigned						\
 __seqcount_##locktype##_sequence(const seqcount_##locktype##_t *s)	\
 {									\
-	return READ_ONCE(s->seqcount.sequence);				\
+	unsigned seq = READ_ONCE(s->seqcount.sequence);			\
+									\
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))				\
+		return seq;						\
+									\
+	if (preemptible && unlikely(seq & 1)) {				\
+		__SEQ_LOCK(lock_acquire);				\
+		__SEQ_LOCK(lockbase##_unlock(s->lock));			\
+									\
+		/*							\
+		 * Re-read the sequence counter since the (possibly	\
+		 * preempted) writer made progress.			\
+		 */							\
+		seq = READ_ONCE(s->seqcount.sequence);			\
+	}								\
+									\
+	return seq;							\
 }									\
 									\
 static __always_inline bool						\
@@ -224,11 +259,13 @@ static inline void __seqcount_t_assert(const seqcount_t *s)
 	lockdep_assert_preemption_disabled();
 }
 
-SEQCOUNT_LOCKTYPE(raw_spinlock,	raw_spinlock_t,		false,	s->lock)
-SEQCOUNT_LOCKTYPE(spinlock,	spinlock_t,		false,	s->lock)
-SEQCOUNT_LOCKTYPE(rwlock,	rwlock_t,		false,	s->lock)
-SEQCOUNT_LOCKTYPE(mutex,	struct mutex,		true,	s->lock)
-SEQCOUNT_LOCKTYPE(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
+#define __SEQ_RT	IS_ENABLED(CONFIG_PREEMPT_RT)
+
+SEQCOUNT_LOCKTYPE(raw_spinlock, raw_spinlock_t,  false,    s->lock,        raw_spin, raw_spin_lock(s->lock))
+SEQCOUNT_LOCKTYPE(spinlock,     spinlock_t,      __SEQ_RT, s->lock,        spin,     spin_lock(s->lock))
+SEQCOUNT_LOCKTYPE(rwlock,       rwlock_t,        __SEQ_RT, s->lock,        read,     read_lock(s->lock))
+SEQCOUNT_LOCKTYPE(mutex,        struct mutex,    true,     s->lock,        mutex,    mutex_lock(s->lock))
+SEQCOUNT_LOCKTYPE(ww_mutex,     struct ww_mutex, true,     &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL))
 
 /**
  * SEQCNT_LOCKTYPE_ZERO - static initializer for seqcount_LOCKTYPE_t
@@ -408,13 +445,22 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 	return __read_seqcount_t_retry(s, start);
 }
 
+/*
+ * Automatically disable preemption for seqcount_LOCKTYPE_t writers, if the
+ * associated lock does not implicitly disable preemption.
+ *
+ * Don't do it for PREEMPT_RT. Check __SEQ_LOCK().
+ */
+#define __seq_enforce_preemption_protection(s)				\
+	(!IS_ENABLED(CONFIG_PREEMPT_RT) && __seqcount_lock_preemptible(s))
+
 /**
  * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
  * @s: Pointer to seqcount_t or any of the seqcount_LOCKTYPE_t variants
  */
 #define raw_write_seqcount_begin(s)					\
 do {									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_preemption_protection(s))			\
 		preempt_disable();					\
 									\
 	raw_write_seqcount_t_begin(__seqcount_ptr(s));			\
@@ -435,7 +481,7 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
 do {									\
 	raw_write_seqcount_t_end(__seqcount_ptr(s));			\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_preemption_protection(s))			\
 		preempt_enable();					\
 } while (0)
 
@@ -458,7 +504,7 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s)
 do {									\
 	__seqcount_assert_lock_held(s);					\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_preemption_protection(s))			\
 		preempt_disable();					\
 									\
 	write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass);	\
@@ -485,7 +531,7 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
 do {									\
 	__seqcount_assert_lock_held(s);					\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_preemption_protection(s))			\
 		preempt_disable();					\
 									\
 	write_seqcount_t_begin(__seqcount_ptr(s));			\
@@ -506,7 +552,7 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
 do {									\
 	write_seqcount_t_end(__seqcount_ptr(s));			\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_preemption_protection(s))			\
 		preempt_enable();					\
 } while (0)
 
-- 
2.28.0


^ permalink raw reply	[relevance 72%]

* [PATCH v1 5/5] seqlock: PREEMPT_RT: Do not starve seqlock_t writers
  2020-08-28  1:07 95% ` [PATCH v1 0/5] seqlock: Introduce PREEMPT_RT support Ahmed S. Darwish
                     ` (3 preceding siblings ...)
  2020-08-28  1:07 72%   ` [PATCH v1 4/5] seqlock: seqcount_LOCKTYPE_t: Introduce PREEMPT_RT support Ahmed S. Darwish
@ 2020-08-28  1:07 84%   ` Ahmed S. Darwish
    5 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-28  1:07 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, Paul E. McKenney,
	Steven Rostedt, LKML, Ahmed S. Darwish

On PREEMPT_RT, seqlock_t is transformed to a sleeping lock that do not
disable preemption. A seqlock_t reader can thus preempt the write side
section and spin for the enter scheduler tick. If that reader belongs to
a real-time scheduling class, it can spin forever and the kernel will
livelock.

To break such a possible livelock on PREEMPT_RT, implement seqlock_t in
terms of "seqcount_spinlock_t" instead of plain "seqcount_t".

Beside the pure annotational value, this will leverage the already
existing seqcount_LOCKTYPE_T anti-livelock mechanisms -- without adding
any extra code.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 8d4bf12272ba..151e7a18fd7b 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -761,13 +761,17 @@ static inline void raw_write_seqcount_t_latch(seqcount_t *s)
  *    - Documentation/locking/seqlock.rst
  */
 typedef struct {
-	struct seqcount seqcount;
+	/*
+	 * Make sure that readers don't starve writers on PREEMPT_RT: use
+	 * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
+	 */
+	seqcount_spinlock_t seqcount;
 	spinlock_t lock;
 } seqlock_t;
 
 #define __SEQLOCK_UNLOCKED(lockname)					\
 	{								\
-		.seqcount = SEQCNT_ZERO(lockname),			\
+		.seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \
 		.lock =	__SPIN_LOCK_UNLOCKED(lockname)			\
 	}
 
@@ -777,8 +781,8 @@ typedef struct {
  */
 #define seqlock_init(sl)						\
 	do {								\
-		seqcount_init(&(sl)->seqcount);				\
 		spin_lock_init(&(sl)->lock);				\
+		seqcount_spinlock_init(&(sl)->seqcount, &(sl)->lock);	\
 	} while (0)
 
 /**
@@ -825,6 +829,12 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 	return read_seqcount_retry(&sl->seqcount, start);
 }
 
+/*
+ * For all seqlock_t write side functions, use write_seqcount_*t*_begin()
+ * instead of the generic write_seqcount_begin(). This way, no redundant
+ * lockdep_assert_held() checks are added.
+ */
+
 /**
  * write_seqlock() - start a seqlock_t write side critical section
  * @sl: Pointer to seqlock_t
@@ -841,7 +851,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 static inline void write_seqlock(seqlock_t *sl)
 {
 	spin_lock(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -853,7 +863,7 @@ static inline void write_seqlock(seqlock_t *sl)
  */
 static inline void write_sequnlock(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount.seqcount);
 	spin_unlock(&sl->lock);
 }
 
@@ -867,7 +877,7 @@ static inline void write_sequnlock(seqlock_t *sl)
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -880,7 +890,7 @@ static inline void write_seqlock_bh(seqlock_t *sl)
  */
 static inline void write_sequnlock_bh(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount.seqcount);
 	spin_unlock_bh(&sl->lock);
 }
 
@@ -894,7 +904,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -906,7 +916,7 @@ static inline void write_seqlock_irq(seqlock_t *sl)
  */
 static inline void write_sequnlock_irq(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount.seqcount);
 	spin_unlock_irq(&sl->lock);
 }
 
@@ -915,7 +925,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sl->lock, flags);
-	write_seqcount_t_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount.seqcount);
 	return flags;
 }
 
@@ -944,7 +954,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 static inline void
 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 {
-	write_seqcount_t_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount.seqcount);
 	spin_unlock_irqrestore(&sl->lock, flags);
 }
 
-- 
2.28.0


^ permalink raw reply	[relevance 84%]

* Re: [PATCH v1 1/5] seqlock: seqcount_LOCKTYPE_t: Standardize naming convention
  @ 2020-08-28  8:24 99%       ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-28  8:24 UTC (permalink / raw)
  To: peterz
  Cc: Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian A. Siewior,
	Paul E. McKenney, Steven Rostedt, LKML

Hi :)

On Fri, Aug 28, 2020 at 10:18:44AM +0200, peterz@infradead.org wrote:
> On Fri, Aug 28, 2020 at 03:07:06AM +0200, Ahmed S. Darwish wrote:
> > At seqlock.h, sequence counters with associated locks are either called
> > seqcount_LOCKNAME_t, seqcount_LOCKTYPE_t, or seqcount_locktype_t.
> >
> > Standardize on "seqcount_LOCKTYPE_t" for all instances in comments,
> > kernel-doc, and SEQCOUNT_LOCKTYPE() generative macro paramters.
>
> > +#define SEQCOUNT_LOCKTYPE(locktype, locktype_t, preemptible, lockmember)	\
> > +typedef struct seqcount_##locktype {					\
> > +	__SEQ_LOCK(locktype_t	*lock);					\
> > +} seqcount_##locktype##_t;						\
>
> Hurmph, so my thinking was that the above 'locktype' is not actually a
> type and therefore a misnomer.
>
> But I see your point about it being a bit of a mess.
>
> Would:
>
>  s/LOCKTYPE/LOCKNAME/g
>  s/seqcount_locktype_t/seqcount_LOCKNAME_t/g
>
> help? Then we're consistently at: seqcount_LOCKNAME_t, which is a type.
>

Sounds good, will do.

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* Re: [PATCH v1 3/5] seqlock: seqcount_t: Implement all read APIs as statement expressions
  @ 2020-08-28  8:37 99%       ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-28  8:37 UTC (permalink / raw)
  To: peterz
  Cc: Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian A. Siewior,
	Paul E. McKenney, Steven Rostedt, LKML

On Fri, Aug 28, 2020 at 10:30:22AM +0200, peterz@infradead.org wrote:
> On Fri, Aug 28, 2020 at 03:07:08AM +0200, Ahmed S. Darwish wrote:
> >  #define __read_seqcount_begin(s)					\
> > +({									\
> > +	unsigned seq;							\
> > +									\
> > +	do {								\
> > +		seq = __seqcount_sequence(s);				\
> > +		if (likely(! (seq & 1)))				\
> > +			break;						\
> > +		cpu_relax();						\
> > +	} while (true);							\
> > +									\
> > +	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);			\
> > +	seq;								\
> > +})
>
> Since we're there anyway, does it make sense to (re)write this like:
>
> 	while ((seq = __seqcount_sequence(s)) & 1)
> 		cpu_relax();
>
> ?
>

Yeah, much better of course; will do.

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* Re: [PATCH v1 2/5] seqlock: Use unique prefix for seqcount_t property accessors
  @ 2020-08-28  8:59 99%       ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-28  8:59 UTC (permalink / raw)
  To: peterz
  Cc: Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian A. Siewior,
	Paul E. McKenney, Steven Rostedt, LKML

On Fri, Aug 28, 2020 at 10:27:54AM +0200, peterz@infradead.org wrote:
> On Fri, Aug 28, 2020 at 03:07:07AM +0200, Ahmed S. Darwish wrote:
> > Differentiate the first group by using "__seqcount_t_" as prefix. This
> > also conforms with the rest of seqlock.h naming conventions.
>
> >  #define __seqprop_case(s, locktype, prop)				\
> >  	seqcount_##locktype##_t: __seqcount_##locktype##_##prop((void *)(s))
> >
> >  #define __seqprop(s, prop) _Generic(*(s),				\
> > -	seqcount_t:		__seqcount_##prop((void *)(s)),		\
> > +	seqcount_t:		__seqcount_t_##prop((void *)(s)),	\
> >  	__seqprop_case((s),	raw_spinlock,	prop),			\
> >  	__seqprop_case((s),	spinlock,	prop),			\
> >  	__seqprop_case((s),	rwlock,		prop),			\
>
> If instead you do:
>
> #define __seqprop_case(s, _lockname, prop) \
> 	seqcount##_lockname##_t: __seqcount##_lockname##_##prop((void *)(s))
>
> You can have:
>
> 	__seqprop_case((s),	,		prop),
> 	__seqprop_case((s),	_raw_spinlock,	prop),
> 	__seqprop_case((s),	_spinlock,	prop),
> 	__seqprop_case((s),	_rwlock,	prop),
> 	__seqprop_case((s),	_mutex,		prop),
> 	__seqprop_case((s),	_ww_mutex,	prop),
>
> And it's all good again.
>
> Although arguably we should do something like s/__seqcount/__seqprop/
> over this lot.
>

ACK.

^ permalink raw reply	[relevance 99%]

* Re: [PATCH v1 4/5] seqlock: seqcount_LOCKTYPE_t: Introduce PREEMPT_RT support
  @ 2020-08-28  9:31 98%       ` Ahmed S. Darwish
  2020-08-28 14:36 99%         ` Ahmed S. Darwish
  0 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-08-28  9:31 UTC (permalink / raw)
  To: peterz
  Cc: Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian A. Siewior,
	Paul E. McKenney, Steven Rostedt, LKML

On Fri, Aug 28, 2020 at 10:59:38AM +0200, peterz@infradead.org wrote:
> On Fri, Aug 28, 2020 at 03:07:09AM +0200, Ahmed S. Darwish wrote:
> > +#define __SEQ_RT	IS_ENABLED(CONFIG_PREEMPT_RT)
> > +
> > +SEQCOUNT_LOCKTYPE(raw_spinlock, raw_spinlock_t,  false,    s->lock,        raw_spin, raw_spin_lock(s->lock))
> > +SEQCOUNT_LOCKTYPE(spinlock,     spinlock_t,      __SEQ_RT, s->lock,        spin,     spin_lock(s->lock))
> > +SEQCOUNT_LOCKTYPE(rwlock,       rwlock_t,        __SEQ_RT, s->lock,        read,     read_lock(s->lock))
> > +SEQCOUNT_LOCKTYPE(mutex,        struct mutex,    true,     s->lock,        mutex,    mutex_lock(s->lock))
> > +SEQCOUNT_LOCKTYPE(ww_mutex,     struct ww_mutex, true,     &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL))
>
> Ooh, reading is hard, but ^^^^ you already have that.
>

Haha, I was just answering the other mail :)

> > +/*
> > + * Automatically disable preemption for seqcount_LOCKTYPE_t writers, if the
> > + * associated lock does not implicitly disable preemption.
> > + *
> > + * Don't do it for PREEMPT_RT. Check __SEQ_LOCK().
> > + */
> > +#define __seq_enforce_preemption_protection(s)				\
> > +	(!IS_ENABLED(CONFIG_PREEMPT_RT) && __seqcount_lock_preemptible(s))
>
> Then what is this doing ?!? I'm confused now.

There were a number of call sites (at DRM mainly) that protected their
seqcount_t write sections with mutex and ww_mutex. So, they manually
disabled preemption before entering the seqcount_t write sections.

Unfortunately these write sections were big, and spinlocks were also
acquired inside them.  This was all very bad for RT...

So the idea was to relieve call sites from the responsibility of
disabling/enabling preemption upon entering a seqcount_LOCKNAME_t write
section, and let seqlock.h automatically handle it if LOCKNAME_t is
preemptible (the macro's condition, second part).

Having seqlock.h control the preempt disable/enable allows us to never
disable preemption for RT, which is exactly what is needed since we
already handle any possible livelock through the mechanisms described at
__SEQ_LOCK (the macro's condition test, first part).

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 98%]

* Re: [PATCH v1 4/5] seqlock: seqcount_LOCKTYPE_t: Introduce PREEMPT_RT support
  2020-08-28  9:31 98%       ` Ahmed S. Darwish
@ 2020-08-28 14:36 99%         ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-08-28 14:36 UTC (permalink / raw)
  To: peterz
  Cc: Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian A. Siewior,
	Paul E. McKenney, Steven Rostedt, LKML

On Fri, Aug 28, 2020 at 11:31:32AM +0200, Ahmed S. Darwish wrote:
> On Fri, Aug 28, 2020 at 10:59:38AM +0200, peterz@infradead.org wrote:
> > On Fri, Aug 28, 2020 at 03:07:09AM +0200, Ahmed S. Darwish wrote:
> > > +#define __SEQ_RT	IS_ENABLED(CONFIG_PREEMPT_RT)
> > > +
> > > +SEQCOUNT_LOCKTYPE(raw_spinlock, raw_spinlock_t,  false,    s->lock,        raw_spin, raw_spin_lock(s->lock))
> > > +SEQCOUNT_LOCKTYPE(spinlock,     spinlock_t,      __SEQ_RT, s->lock,        spin,     spin_lock(s->lock))
> > > +SEQCOUNT_LOCKTYPE(rwlock,       rwlock_t,        __SEQ_RT, s->lock,        read,     read_lock(s->lock))
> > > +SEQCOUNT_LOCKTYPE(mutex,        struct mutex,    true,     s->lock,        mutex,    mutex_lock(s->lock))
> > > +SEQCOUNT_LOCKTYPE(ww_mutex,     struct ww_mutex, true,     &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL))
> >
> > Ooh, reading is hard, but ^^^^ you already have that.
> >
>
> Haha, I was just answering the other mail :)
>
> > > +/*
> > > + * Automatically disable preemption for seqcount_LOCKTYPE_t writers, if the
> > > + * associated lock does not implicitly disable preemption.
> > > + *
> > > + * Don't do it for PREEMPT_RT. Check __SEQ_LOCK().
> > > + */
> > > +#define __seq_enforce_preemption_protection(s)				\
> > > +	(!IS_ENABLED(CONFIG_PREEMPT_RT) && __seqcount_lock_preemptible(s))
> >
> > Then what is this doing ?!? I'm confused now.
>
> There were a number of call sites (at DRM mainly) that protected their
> seqcount_t write sections with mutex and ww_mutex. So, they manually
> disabled preemption before entering the seqcount_t write sections.
>
> Unfortunately these write sections were big, and spinlocks were also
> acquired inside them.  This was all very bad for RT...
>
> So the idea was to relieve call sites from the responsibility of
> disabling/enabling preemption upon entering a seqcount_LOCKNAME_t write
> section, and let seqlock.h automatically handle it if LOCKNAME_t is
> preemptible (the macro's condition, second part).
>
> Having seqlock.h control the preempt disable/enable allows us to never
> disable preemption for RT, which is exactly what is needed since we
> already handle any possible livelock through the mechanisms described at
> __SEQ_LOCK (the macro's condition test, first part).
>

So to summarize, __seqcount_lock_preemptible() has two use cases:

    1. For !PREEMPT_RT, to automatically disable preemption on the write
       side when the seqcount associated lock is preemptible.

    2. For PREEMPT_RT, to lock/unlock the seqcount associated lock on
       the read side if it is RT-preemptible (sleeping lock).

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* Re: [PATCH v1 0/5] seqlock: Introduce PREEMPT_RT support
  @ 2020-09-04  7:30 99%     ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-09-04  7:30 UTC (permalink / raw)
  To: peterz
  Cc: Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian A. Siewior,
	Paul E. McKenney, Steven Rostedt, LKML

On Fri, Sep 04, 2020 at 08:52:45AM +0200, peterz@infradead.org wrote:
>
> FWIW, can you please start a new thread with ever posting? I lost this
> one because it got stuck onto some ancient thread.
>

Yeah, I used an old send-mail script, sorry.

^ permalink raw reply	[relevance 99%]

* [PATCH v2 0/5] seqlock: Introduce PREEMPT_RT support
@ 2020-09-04 15:32 91% Ahmed S. Darwish
  2020-09-04 15:32 58% ` [PATCH v2 1/5] seqlock: seqcount_LOCKNAME_t: Standardize naming convention Ahmed S. Darwish
                   ` (4 more replies)
  0 siblings, 5 replies; 200+ results
From: Ahmed S. Darwish @ 2020-09-04 15:32 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, Paul E. McKenney,
	Steven Rostedt, LKML, Ahmed S. Darwish

Hi,

Changelog-v2
============

  - Standardize on seqcount_LOCKNAME_t as the canonical reference for
    sequence counters with associated locks, instead of v1
    seqcount_LOCKTYPE_t.

  - Use unique prefix "seqprop_*" for all seqcount_t/seqcount_LOCKNAME_t
    property accessors.

  - Touch-up the lock-unlock rationale for more clarity. Enforce writer
    non-preemitiblity using "__seq_enforce_writer_non_preemptibility()".

Cover letter (v1)
=================

https://lkml.kernel.org/r/20200828010710.5407-1-a.darwish@linutronix.de

Preemption must be disabled before entering a sequence counter write
side critical section.  Otherwise the read side section can preempt the
write side section and spin for the entire scheduler tick.  If that
reader belongs to a real-time scheduling class, it can spin forever and
the kernel will livelock.

Disabling preemption cannot be done for PREEMPT_RT though: it can lead
to higher latencies, and the write side sections will not be able to
acquire locks which become sleeping locks (e.g. spinlock_t).

To remain preemptible, while avoiding a possible livelock caused by the
reader preempting the writer, use a different technique: let the reader
detect if a seqcount_LOCKNAME_t writer is in progress. If that's the
case, acquire then release the associated LOCKNAME writer serialization
lock. This will allow any possibly-preempted writer to make progress
until the end of its writer serialization lock critical section.

Implement this lock-unlock technique for all seqcount_LOCKNAME_t with
an associated (PREEMPT_RT) sleeping lock, and for seqlock_t.

8<--------------

Ahmed S. Darwish (5):
  seqlock: seqcount_LOCKNAME_t: Standardize naming convention
  seqlock: Use unique prefix for seqcount_t property accessors
  seqlock: seqcount_t: Implement all read APIs as statement expressions
  seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT support
  seqlock: PREEMPT_RT: Do not starve seqlock_t writers

 include/linux/seqlock.h | 281 ++++++++++++++++++++++++----------------
 1 file changed, 167 insertions(+), 114 deletions(-)

base-commit: f75aef392f869018f78cfedf3c320a6b3fcfda6b
--
2.28.0

^ permalink raw reply	[relevance 91%]

* [PATCH v2 1/5] seqlock: seqcount_LOCKNAME_t: Standardize naming convention
  2020-09-04 15:32 91% [PATCH v2 0/5] seqlock: Introduce PREEMPT_RT support Ahmed S. Darwish
@ 2020-09-04 15:32 58% ` Ahmed S. Darwish
  2020-09-10 15:08 49%   ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-09-04 15:32 88% ` [PATCH v2 2/5] seqlock: Use unique prefix for seqcount_t property accessors Ahmed S. Darwish
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-09-04 15:32 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, Paul E. McKenney,
	Steven Rostedt, LKML, Ahmed S. Darwish

At seqlock.h, sequence counters with associated locks are either called
seqcount_LOCKNAME_t, seqcount_LOCKTYPE_t, or seqcount_locktype_t.

Standardize on seqcount_LOCKNAME_t for all instances in comments,
kernel-doc, and SEQCOUNT_LOCKNAME() generative macro paramters.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 83 +++++++++++++++++++++--------------------
 1 file changed, 42 insertions(+), 41 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 962d9768945f..4f219df659b1 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -53,7 +53,7 @@
  *
  * If the write serialization mechanism is one of the common kernel
  * locking primitives, use a sequence counter with associated lock
- * (seqcount_LOCKTYPE_t) instead.
+ * (seqcount_LOCKNAME_t) instead.
  *
  * If it's desired to automatically handle the sequence counter writer
  * serialization and non-preemptibility requirements, use a sequential
@@ -117,7 +117,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
 #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
 
 /*
- * Sequence counters with associated locks (seqcount_LOCKTYPE_t)
+ * Sequence counters with associated locks (seqcount_LOCKNAME_t)
  *
  * A sequence counter which associates the lock used for writer
  * serialization at initialization time. This enables lockdep to validate
@@ -138,30 +138,32 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
 #endif
 
 /**
- * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPR associated
+ * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated
  * @seqcount:	The real sequence counter
- * @lock:	Pointer to the associated spinlock
+ * @lock:	Pointer to the associated lock
  *
- * A plain sequence counter with external writer synchronization by a
- * spinlock. The spinlock is associated to the sequence count in the
+ * A plain sequence counter with external writer synchronization by
+ * LOCKNAME @lock. The lock is associated to the sequence counter in the
  * static initializer or init function. This enables lockdep to validate
  * that the write side critical section is properly serialized.
+ *
+ * LOCKNAME:	raw_spinlock, spinlock, rwlock, mutex, or ww_mutex.
  */
 
 /**
  * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t
  * @s:		Pointer to the seqcount_LOCKNAME_t instance
- * @lock:	Pointer to the associated LOCKTYPE
+ * @lock:	Pointer to the associated lock
  */
 
 /*
- * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers
- * @locktype:		actual typename
- * @lockname:		name
- * @preemptible:	preemptibility of above locktype
+ * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers
+ * @lockname:		"LOCKNAME" part of seqcount_LOCKNAME_t
+ * @locktype:		LOCKNAME canonical C data type
+ * @preemptible:	preemptibility of above lockname
  * @lockmember:		argument for lockdep_assert_held()
  */
-#define SEQCOUNT_LOCKTYPE(locktype, lockname, preemptible, lockmember)	\
+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember)	\
 typedef struct seqcount_##lockname {					\
 	seqcount_t		seqcount;				\
 	__SEQ_LOCK(locktype	*lock);					\
@@ -211,29 +213,28 @@ static inline void __seqcount_assert(seqcount_t *s)
 	lockdep_assert_preemption_disabled();
 }
 
-SEQCOUNT_LOCKTYPE(raw_spinlock_t,	raw_spinlock,	false,	s->lock)
-SEQCOUNT_LOCKTYPE(spinlock_t,		spinlock,	false,	s->lock)
-SEQCOUNT_LOCKTYPE(rwlock_t,		rwlock,		false,	s->lock)
-SEQCOUNT_LOCKTYPE(struct mutex,		mutex,		true,	s->lock)
-SEQCOUNT_LOCKTYPE(struct ww_mutex,	ww_mutex,	true,	&s->lock->base)
+SEQCOUNT_LOCKNAME(raw_spinlock,	raw_spinlock_t,		false,	s->lock)
+SEQCOUNT_LOCKNAME(spinlock,	spinlock_t,		false,	s->lock)
+SEQCOUNT_LOCKNAME(rwlock,	rwlock_t,		false,	s->lock)
+SEQCOUNT_LOCKNAME(mutex,	struct mutex,		true,	s->lock)
+SEQCOUNT_LOCKNAME(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
 
 /**
  * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
  * @name:	Name of the seqcount_LOCKNAME_t instance
- * @lock:	Pointer to the associated LOCKTYPE
+ * @lock:	Pointer to the associated LOCKNAME
  */
 
-#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) {			\
+#define SEQCOUNT_LOCKNAME_ZERO(seq_name, assoc_lock) {			\
 	.seqcount		= SEQCNT_ZERO(seq_name.seqcount),	\
 	__SEQ_LOCK(.lock	= (assoc_lock))				\
 }
 
-#define SEQCNT_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-#define SEQCNT_RWLOCK_ZERO(name, lock)		SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-#define SEQCNT_MUTEX_ZERO(name, lock)		SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-#define SEQCNT_WW_MUTEX_ZERO(name, lock) 	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-
+#define SEQCNT_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_RWLOCK_ZERO(name, lock)		SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_MUTEX_ZERO(name, lock)		SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_WW_MUTEX_ZERO(name, lock) 	SEQCOUNT_LOCKNAME_ZERO(name, lock)
 
 #define __seqprop_case(s, lockname, prop)				\
 	seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s))
@@ -252,7 +253,7 @@ SEQCOUNT_LOCKTYPE(struct ww_mutex,	ww_mutex,	true,	&s->lock->base)
 
 /**
  * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -283,7 +284,7 @@ static inline unsigned __read_seqcount_t_begin(const seqcount_t *s)
 
 /**
  * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * Return: count to be passed to read_seqcount_retry()
  */
@@ -299,7 +300,7 @@ static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
 
 /**
  * read_seqcount_begin() - begin a seqcount_t read critical section
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * Return: count to be passed to read_seqcount_retry()
  */
@@ -314,7 +315,7 @@ static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
 
 /**
  * raw_read_seqcount() - read the raw seqcount_t counter value
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * raw_read_seqcount opens a read critical section of the given
  * seqcount_t, without any lockdep checking, and without checking or
@@ -337,7 +338,7 @@ static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
 /**
  * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
  *                        lockdep and w/o counter stabilization
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * raw_seqcount_begin opens a read critical section of the given
  * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
@@ -365,7 +366,7 @@ static inline unsigned raw_seqcount_t_begin(const seqcount_t *s)
 
 /**
  * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  * @start: count, from read_seqcount_begin()
  *
  * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
@@ -389,7 +390,7 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 
 /**
  * read_seqcount_retry() - end a seqcount_t read critical section
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  * @start: count, from read_seqcount_begin()
  *
  * read_seqcount_retry closes the read critical section of given
@@ -409,7 +410,7 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 
 /**
  * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  */
 #define raw_write_seqcount_begin(s)					\
 do {									\
@@ -428,7 +429,7 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
 
 /**
  * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  */
 #define raw_write_seqcount_end(s)					\
 do {									\
@@ -448,7 +449,7 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s)
 /**
  * write_seqcount_begin_nested() - start a seqcount_t write section with
  *                                 custom lockdep nesting level
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  * @subclass: lockdep nesting level
  *
  * See Documentation/locking/lockdep-design.rst
@@ -471,7 +472,7 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
 
 /**
  * write_seqcount_begin() - start a seqcount_t write side critical section
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * write_seqcount_begin opens a write side critical section of the given
  * seqcount_t.
@@ -497,7 +498,7 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
 
 /**
  * write_seqcount_end() - end a seqcount_t write side critical section
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * The write section must've been opened with write_seqcount_begin().
  */
@@ -517,7 +518,7 @@ static inline void write_seqcount_t_end(seqcount_t *s)
 
 /**
  * raw_write_seqcount_barrier() - do a seqcount_t write barrier
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * This can be used to provide an ordering guarantee instead of the usual
  * consistency guarantee. It is one wmb cheaper, because it can collapse
@@ -571,7 +572,7 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
 /**
  * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
  *                               side operations
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * After write_seqcount_invalidate, no seqcount_t read side operations
  * will complete successfully and see data older than this.
@@ -589,7 +590,7 @@ static inline void write_seqcount_t_invalidate(seqcount_t *s)
 
 /**
  * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * Use seqcount_t latching to switch between two storage places protected
  * by a sequence counter. Doing so allows having interruptible, preemptible,
@@ -614,7 +615,7 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
 
 /**
  * raw_write_seqcount_latch() - redirect readers to even/odd copy
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * The latch technique is a multiversion concurrency control method that allows
  * queries during non-atomic modifications. If you can guarantee queries never
-- 
2.28.0


^ permalink raw reply	[relevance 58%]

* [PATCH v2 2/5] seqlock: Use unique prefix for seqcount_t property accessors
  2020-09-04 15:32 91% [PATCH v2 0/5] seqlock: Introduce PREEMPT_RT support Ahmed S. Darwish
  2020-09-04 15:32 58% ` [PATCH v2 1/5] seqlock: seqcount_LOCKNAME_t: Standardize naming convention Ahmed S. Darwish
@ 2020-09-04 15:32 88% ` Ahmed S. Darwish
  2020-09-10 15:08 75%   ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-09-04 15:32 80% ` [PATCH v2 3/5] seqlock: seqcount_t: Implement all read APIs as statement expressions Ahmed S. Darwish
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-09-04 15:32 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, Paul E. McKenney,
	Steven Rostedt, LKML, Ahmed S. Darwish

At seqlock.h, the following set of functions:

    - __seqcount_ptr()
    - __seqcount_preemptible()
    - __seqcount_assert()

act as plain seqcount_t "property" accessors. Meanwhile, the following
group:

    - __seqcount_ptr()
    - __seqcount_lock_preemptible()
    - __seqcount_assert_lock_held()

act as the equivalent set, but in the generic form, taking either
seqcount_t or any of the seqcount_LOCKNAME_t variants.

This is quite confusing, especially the first member where it is called
exactly the same in both groups.

Differentiate the first group by using "__seqprop" as prefix, and also
use that same prefix for all of seqcount_LOCKNAME_t property accessors.

While at it, constify the property accessors first parameter when
appropriate.

References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 4f219df659b1..96198da7debc 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -157,7 +157,9 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  */
 
 /*
- * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers
+ * SEQCOUNT_LOCKNAME()           - Instantiate seqcount_LOCKNAME_t and helpers
+ * seqprop_seqcount_LOCKNAME_*() - Property accessors for seqcount_LOCKNAME_t
+ *
  * @lockname:		"LOCKNAME" part of seqcount_LOCKNAME_t
  * @locktype:		LOCKNAME canonical C data type
  * @preemptible:	preemptibility of above lockname
@@ -177,19 +179,19 @@ seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock)	\
 }									\
 									\
 static __always_inline seqcount_t *					\
-__seqcount_##lockname##_ptr(seqcount_##lockname##_t *s)			\
+__seqprop_seqcount_##lockname##_ptr(seqcount_##lockname##_t *s)		\
 {									\
 	return &s->seqcount;						\
 }									\
 									\
 static __always_inline bool						\
-__seqcount_##lockname##_preemptible(seqcount_##lockname##_t *s)		\
+__seqprop_seqcount_##lockname##_preemptible(const seqcount_##lockname##_t *s)\
 {									\
 	return preemptible;						\
 }									\
 									\
 static __always_inline void						\
-__seqcount_##lockname##_assert(seqcount_##lockname##_t *s)		\
+__seqprop_seqcount_##lockname##_assert(const seqcount_##lockname##_t *s)\
 {									\
 	__SEQ_LOCK(lockdep_assert_held(lockmember));			\
 }
@@ -198,17 +200,17 @@ __seqcount_##lockname##_assert(seqcount_##lockname##_t *s)		\
  * __seqprop() for seqcount_t
  */
 
-static inline seqcount_t *__seqcount_ptr(seqcount_t *s)
+static inline seqcount_t *__seqprop_seqcount_ptr(seqcount_t *s)
 {
 	return s;
 }
 
-static inline bool __seqcount_preemptible(seqcount_t *s)
+static inline bool __seqprop_seqcount_preemptible(const seqcount_t *s)
 {
 	return false;
 }
 
-static inline void __seqcount_assert(seqcount_t *s)
+static inline void __seqprop_seqcount_assert(const seqcount_t *s)
 {
 	lockdep_assert_preemption_disabled();
 }
@@ -237,10 +239,10 @@ SEQCOUNT_LOCKNAME(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
 #define SEQCNT_WW_MUTEX_ZERO(name, lock) 	SEQCOUNT_LOCKNAME_ZERO(name, lock)
 
 #define __seqprop_case(s, lockname, prop)				\
-	seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s))
+	seqcount_##lockname##_t: __seqprop_seqcount_##lockname##_##prop((void *)(s))
 
 #define __seqprop(s, prop) _Generic(*(s),				\
-	seqcount_t:		__seqcount_##prop((void *)(s)),		\
+	seqcount_t:		__seqprop_seqcount_##prop((void *)(s)),	\
 	__seqprop_case((s),	raw_spinlock,	prop),			\
 	__seqprop_case((s),	spinlock,	prop),			\
 	__seqprop_case((s),	rwlock,		prop),			\
-- 
2.28.0


^ permalink raw reply	[relevance 88%]

* [PATCH v2 3/5] seqlock: seqcount_t: Implement all read APIs as statement expressions
  2020-09-04 15:32 91% [PATCH v2 0/5] seqlock: Introduce PREEMPT_RT support Ahmed S. Darwish
  2020-09-04 15:32 58% ` [PATCH v2 1/5] seqlock: seqcount_LOCKNAME_t: Standardize naming convention Ahmed S. Darwish
  2020-09-04 15:32 88% ` [PATCH v2 2/5] seqlock: Use unique prefix for seqcount_t property accessors Ahmed S. Darwish
@ 2020-09-04 15:32 80% ` Ahmed S. Darwish
  2020-09-10 15:08 68%   ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-09-04 15:32 72% ` [PATCH v2 4/5] seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT support Ahmed S. Darwish
  2020-09-04 15:32 84% ` [PATCH v2 5/5] seqlock: PREEMPT_RT: Do not starve seqlock_t writers Ahmed S. Darwish
  4 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-09-04 15:32 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, Paul E. McKenney,
	Steven Rostedt, LKML, Ahmed S. Darwish

The sequence counters read APIs are implemented as CPP macros, so they
can take either seqcount_t or any of the seqcount_LOCKNAME_t variants.
Such macros then get *directly* transformed to internal C functions that
only take plain seqcount_t.

Further commits need access to seqcount_LOCKNAME_t inside of the actual
read APIs code. Thus transform all of the seqcount read APIs to pure GCC
statement expressions instead.

This will not break type-safety: all of the transformed APIs resolve to
a _Generic() selection that does not have a "default" case.

This will also not affect the transformed APIs readability: previously
added kernel-doc above all of seqlock.h functions makes the expectations
quite clear for call-site developers.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 94 ++++++++++++++++++++---------------------
 1 file changed, 45 insertions(+), 49 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 96198da7debc..ed1c6c0ff8bb 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -184,6 +184,12 @@ __seqprop_seqcount_##lockname##_ptr(seqcount_##lockname##_t *s)		\
 	return &s->seqcount;						\
 }									\
 									\
+static __always_inline unsigned						\
+__seqprop_seqcount_##lockname##_sequence(const seqcount_##lockname##_t *s)\
+{									\
+	return READ_ONCE(s->seqcount.sequence);				\
+}									\
+									\
 static __always_inline bool						\
 __seqprop_seqcount_##lockname##_preemptible(const seqcount_##lockname##_t *s)\
 {									\
@@ -205,6 +211,11 @@ static inline seqcount_t *__seqprop_seqcount_ptr(seqcount_t *s)
 	return s;
 }
 
+static inline unsigned __seqprop_seqcount_sequence(const seqcount_t *s)
+{
+	return READ_ONCE(s->sequence);
+}
+
 static inline bool __seqprop_seqcount_preemptible(const seqcount_t *s)
 {
 	return false;
@@ -250,6 +261,7 @@ SEQCOUNT_LOCKNAME(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
 	__seqprop_case((s),	ww_mutex,	prop))
 
 #define __seqcount_ptr(s)		__seqprop(s, ptr)
+#define __seqcount_sequence(s)		__seqprop(s, sequence)
 #define __seqcount_lock_preemptible(s)	__seqprop(s, preemptible)
 #define __seqcount_assert_lock_held(s)	__seqprop(s, assert)
 
@@ -268,21 +280,15 @@ SEQCOUNT_LOCKNAME(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define __read_seqcount_begin(s)					\
-	__read_seqcount_t_begin(__seqcount_ptr(s))
-
-static inline unsigned __read_seqcount_t_begin(const seqcount_t *s)
-{
-	unsigned ret;
-
-repeat:
-	ret = READ_ONCE(s->sequence);
-	if (unlikely(ret & 1)) {
-		cpu_relax();
-		goto repeat;
-	}
-	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
-	return ret;
-}
+({									\
+	unsigned seq;							\
+									\
+	while ((seq = __seqcount_sequence(s)) & 1)			\
+		cpu_relax();						\
+									\
+	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);			\
+	seq;								\
+})
 
 /**
  * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
@@ -291,14 +297,12 @@ static inline unsigned __read_seqcount_t_begin(const seqcount_t *s)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define raw_read_seqcount_begin(s)					\
-	raw_read_seqcount_t_begin(__seqcount_ptr(s))
-
-static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
-{
-	unsigned ret = __read_seqcount_t_begin(s);
-	smp_rmb();
-	return ret;
-}
+({									\
+	unsigned seq = __read_seqcount_begin(s);			\
+									\
+	smp_rmb();							\
+	seq;								\
+})
 
 /**
  * read_seqcount_begin() - begin a seqcount_t read critical section
@@ -307,13 +311,10 @@ static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define read_seqcount_begin(s)						\
-	read_seqcount_t_begin(__seqcount_ptr(s))
-
-static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
-{
-	seqcount_lockdep_reader_access(s);
-	return raw_read_seqcount_t_begin(s);
-}
+({									\
+	seqcount_lockdep_reader_access(__seqcount_ptr(s));		\
+	raw_read_seqcount_begin(s);					\
+})
 
 /**
  * raw_read_seqcount() - read the raw seqcount_t counter value
@@ -327,15 +328,13 @@ static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define raw_read_seqcount(s)						\
-	raw_read_seqcount_t(__seqcount_ptr(s))
-
-static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
-{
-	unsigned ret = READ_ONCE(s->sequence);
-	smp_rmb();
-	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
-	return ret;
-}
+({									\
+	unsigned seq = __seqcount_sequence(s);				\
+									\
+	smp_rmb();							\
+	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);			\
+	seq;								\
+})
 
 /**
  * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
@@ -355,16 +354,13 @@ static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define raw_seqcount_begin(s)						\
-	raw_seqcount_t_begin(__seqcount_ptr(s))
-
-static inline unsigned raw_seqcount_t_begin(const seqcount_t *s)
-{
-	/*
-	 * If the counter is odd, let read_seqcount_retry() fail
-	 * by decrementing the counter.
-	 */
-	return raw_read_seqcount_t(s) & ~1;
-}
+({									\
+	/*								\
+	 * If the counter is odd, let read_seqcount_retry() fail	\
+	 * by decrementing the counter.					\
+	 */								\
+	raw_read_seqcount(s) & ~1;					\
+})
 
 /**
  * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
-- 
2.28.0


^ permalink raw reply	[relevance 80%]

* [PATCH v2 4/5] seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT support
  2020-09-04 15:32 91% [PATCH v2 0/5] seqlock: Introduce PREEMPT_RT support Ahmed S. Darwish
                   ` (2 preceding siblings ...)
  2020-09-04 15:32 80% ` [PATCH v2 3/5] seqlock: seqcount_t: Implement all read APIs as statement expressions Ahmed S. Darwish
@ 2020-09-04 15:32 72% ` Ahmed S. Darwish
    2020-09-04 15:32 84% ` [PATCH v2 5/5] seqlock: PREEMPT_RT: Do not starve seqlock_t writers Ahmed S. Darwish
  4 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-09-04 15:32 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, Paul E. McKenney,
	Steven Rostedt, LKML, Ahmed S. Darwish

Preemption must be disabled before entering a sequence counter write
side critical section.  Otherwise the read side section can preempt the
write side section and spin for the entire scheduler tick.  If that
reader belongs to a real-time scheduling class, it can spin forever and
the kernel will livelock.

Disabling preemption cannot be done for PREEMPT_RT though: it can lead
to higher latencies, and the write side sections will not be able to
acquire locks which become sleeping locks (e.g. spinlock_t).

To remain preemptible, while avoiding a possible livelock caused by the
reader preempting the writer, use a different technique: let the reader
detect if a seqcount_LOCKNAME_t writer is in progress. If that's the
case, acquire then release the associated LOCKNAME writer serialization
lock. This will allow any possibly-preempted writer to make progress
until the end of its writer serialization lock critical section.

Implement this lock-unlock technique for all seqcount_LOCKNAME_t with
an associated (PREEMPT_RT) sleeping lock.

Link: https://lkml.kernel.org/r/159708609435.2571.13948681727529247231.tglx@nanos
Link: https://lkml.kernel.org/r/20200519214547.352050-1-a.darwish@linutronix.de
References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 72 +++++++++++++++++++++++++++++++++--------
 1 file changed, 58 insertions(+), 14 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index ed1c6c0ff8bb..6ac5a63fc536 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -17,6 +17,7 @@
 #include <linux/kcsan-checks.h>
 #include <linux/lockdep.h>
 #include <linux/mutex.h>
+#include <linux/ww_mutex.h>
 #include <linux/preempt.h>
 #include <linux/spinlock.h>
 
@@ -131,7 +132,23 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  * See Documentation/locking/seqlock.rst
  */
 
-#ifdef CONFIG_LOCKDEP
+/*
+ * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
+ * disable preemption. It can lead to higher latencies, and the write side
+ * sections will not be able to acquire locks which become sleeping locks
+ * (e.g. spinlock_t).
+ *
+ * To remain preemptible while avoiding a possible livelock caused by the
+ * reader preempting the writer, use a different technique: let the reader
+ * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
+ * case, acquire then release the associated LOCKNAME writer serialization
+ * lock. This will allow any possibly-preempted writer to make progress
+ * until the end of its writer serialization lock critical section.
+ *
+ * This lock-unlock technique must be implemented for all of PREEMPT_RT
+ * sleeping locks.  See Documentation/locking/locktypes.rst
+ */
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
 #define __SEQ_LOCK(expr)	expr
 #else
 #define __SEQ_LOCK(expr)
@@ -162,10 +179,12 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  *
  * @lockname:		"LOCKNAME" part of seqcount_LOCKNAME_t
  * @locktype:		LOCKNAME canonical C data type
- * @preemptible:	preemptibility of above lockname
+ * @preemptible:	preemptibility of above locktype
  * @lockmember:		argument for lockdep_assert_held()
+ * @lockbase:		associated lock release function (prefix only)
+ * @lock_acquire:	associated lock acquisition function (full call)
  */
-#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember)	\
+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \
 typedef struct seqcount_##lockname {					\
 	seqcount_t		seqcount;				\
 	__SEQ_LOCK(locktype	*lock);					\
@@ -187,7 +206,23 @@ __seqprop_seqcount_##lockname##_ptr(seqcount_##lockname##_t *s)		\
 static __always_inline unsigned						\
 __seqprop_seqcount_##lockname##_sequence(const seqcount_##lockname##_t *s)\
 {									\
-	return READ_ONCE(s->seqcount.sequence);				\
+	unsigned seq = READ_ONCE(s->seqcount.sequence);			\
+									\
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))				\
+		return seq;						\
+									\
+	if (preemptible && unlikely(seq & 1)) {				\
+		__SEQ_LOCK(lock_acquire);				\
+		__SEQ_LOCK(lockbase##_unlock(s->lock));			\
+									\
+		/*							\
+		 * Re-read the sequence counter since the (possibly	\
+		 * preempted) writer made progress.			\
+		 */							\
+		seq = READ_ONCE(s->seqcount.sequence);			\
+	}								\
+									\
+	return seq;							\
 }									\
 									\
 static __always_inline bool						\
@@ -226,11 +261,13 @@ static inline void __seqprop_seqcount_assert(const seqcount_t *s)
 	lockdep_assert_preemption_disabled();
 }
 
-SEQCOUNT_LOCKNAME(raw_spinlock,	raw_spinlock_t,		false,	s->lock)
-SEQCOUNT_LOCKNAME(spinlock,	spinlock_t,		false,	s->lock)
-SEQCOUNT_LOCKNAME(rwlock,	rwlock_t,		false,	s->lock)
-SEQCOUNT_LOCKNAME(mutex,	struct mutex,		true,	s->lock)
-SEQCOUNT_LOCKNAME(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
+#define __SEQ_RT	IS_ENABLED(CONFIG_PREEMPT_RT)
+
+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t,  false,    s->lock,        raw_spin, raw_spin_lock(s->lock))
+SEQCOUNT_LOCKNAME(spinlock,     spinlock_t,      __SEQ_RT, s->lock,        spin,     spin_lock(s->lock))
+SEQCOUNT_LOCKNAME(rwlock,       rwlock_t,        __SEQ_RT, s->lock,        read,     read_lock(s->lock))
+SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     s->lock,        mutex,    mutex_lock(s->lock))
+SEQCOUNT_LOCKNAME(ww_mutex,     struct ww_mutex, true,     &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL))
 
 /**
  * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
@@ -406,13 +443,20 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 	return __read_seqcount_t_retry(s, start);
 }
 
+/*
+ * Enforce non-preemptibility for all seqcount_LOCKNAME_t writers. Don't
+ * do it for PREEMPT_RT, for the reasons outlined at __SEQ_LOCK().
+ */
+#define __seq_enforce_writer_non_preemptibility(s)			\
+	(!IS_ENABLED(CONFIG_PREEMPT_RT) && __seqcount_lock_preemptible(s))
+
 /**
  * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
  * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  */
 #define raw_write_seqcount_begin(s)					\
 do {									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_writer_non_preemptibility(s))			\
 		preempt_disable();					\
 									\
 	raw_write_seqcount_t_begin(__seqcount_ptr(s));			\
@@ -433,7 +477,7 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
 do {									\
 	raw_write_seqcount_t_end(__seqcount_ptr(s));			\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_writer_non_preemptibility(s))			\
 		preempt_enable();					\
 } while (0)
 
@@ -456,7 +500,7 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s)
 do {									\
 	__seqcount_assert_lock_held(s);					\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_writer_non_preemptibility(s))			\
 		preempt_disable();					\
 									\
 	write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass);	\
@@ -483,7 +527,7 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
 do {									\
 	__seqcount_assert_lock_held(s);					\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_writer_non_preemptibility(s))			\
 		preempt_disable();					\
 									\
 	write_seqcount_t_begin(__seqcount_ptr(s));			\
@@ -504,7 +548,7 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
 do {									\
 	write_seqcount_t_end(__seqcount_ptr(s));			\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seq_enforce_writer_non_preemptibility(s))			\
 		preempt_enable();					\
 } while (0)
 
-- 
2.28.0


^ permalink raw reply	[relevance 72%]

* [PATCH v2 5/5] seqlock: PREEMPT_RT: Do not starve seqlock_t writers
  2020-09-04 15:32 91% [PATCH v2 0/5] seqlock: Introduce PREEMPT_RT support Ahmed S. Darwish
                   ` (3 preceding siblings ...)
  2020-09-04 15:32 72% ` [PATCH v2 4/5] seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT support Ahmed S. Darwish
@ 2020-09-04 15:32 84% ` Ahmed S. Darwish
  2020-09-10 15:08 71%   ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  4 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-09-04 15:32 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Thomas Gleixner, Sebastian A. Siewior, Paul E. McKenney,
	Steven Rostedt, LKML, Ahmed S. Darwish

On PREEMPT_RT, seqlock_t is transformed to a sleeping lock that do not
disable preemption. A seqlock_t reader can thus preempt its write side
section and spin for the enter scheduler tick. If that reader belongs to
a real-time scheduling class, it can spin forever and the kernel will
livelock.

To break this livelock possibility on PREEMPT_RT, implement seqlock_t in
terms of "seqcount_spinlock_t" instead of plain "seqcount_t".

Beside its pure annotational value, this will leverage the existing
seqcount_LOCKNAME_T PREEMPT_RT anti-livelock mechanisms, without adding
any extra code.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 6ac5a63fc536..06a339355c3a 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -757,13 +757,17 @@ static inline void raw_write_seqcount_t_latch(seqcount_t *s)
  *    - Documentation/locking/seqlock.rst
  */
 typedef struct {
-	struct seqcount seqcount;
+	/*
+	 * Make sure that readers don't starve writers on PREEMPT_RT: use
+	 * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
+	 */
+	seqcount_spinlock_t seqcount;
 	spinlock_t lock;
 } seqlock_t;
 
 #define __SEQLOCK_UNLOCKED(lockname)					\
 	{								\
-		.seqcount = SEQCNT_ZERO(lockname),			\
+		.seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \
 		.lock =	__SPIN_LOCK_UNLOCKED(lockname)			\
 	}
 
@@ -773,8 +777,8 @@ typedef struct {
  */
 #define seqlock_init(sl)						\
 	do {								\
-		seqcount_init(&(sl)->seqcount);				\
 		spin_lock_init(&(sl)->lock);				\
+		seqcount_spinlock_init(&(sl)->seqcount, &(sl)->lock);	\
 	} while (0)
 
 /**
@@ -821,6 +825,12 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 	return read_seqcount_retry(&sl->seqcount, start);
 }
 
+/*
+ * For all seqlock_t write side functions, use write_seqcount_*t*_begin()
+ * instead of the generic write_seqcount_begin(). This way, no redundant
+ * lockdep_assert_held() checks are added.
+ */
+
 /**
  * write_seqlock() - start a seqlock_t write side critical section
  * @sl: Pointer to seqlock_t
@@ -837,7 +847,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 static inline void write_seqlock(seqlock_t *sl)
 {
 	spin_lock(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -849,7 +859,7 @@ static inline void write_seqlock(seqlock_t *sl)
  */
 static inline void write_sequnlock(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount.seqcount);
 	spin_unlock(&sl->lock);
 }
 
@@ -863,7 +873,7 @@ static inline void write_sequnlock(seqlock_t *sl)
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -876,7 +886,7 @@ static inline void write_seqlock_bh(seqlock_t *sl)
  */
 static inline void write_sequnlock_bh(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount.seqcount);
 	spin_unlock_bh(&sl->lock);
 }
 
@@ -890,7 +900,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -902,7 +912,7 @@ static inline void write_seqlock_irq(seqlock_t *sl)
  */
 static inline void write_sequnlock_irq(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount.seqcount);
 	spin_unlock_irq(&sl->lock);
 }
 
@@ -911,7 +921,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sl->lock, flags);
-	write_seqcount_t_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount.seqcount);
 	return flags;
 }
 
@@ -940,7 +950,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 static inline void
 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 {
-	write_seqcount_t_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount.seqcount);
 	spin_unlock_irqrestore(&sl->lock, flags);
 }
 
-- 
2.28.0


^ permalink raw reply	[relevance 84%]

* Re: [PATCH v1 6/8] x86/tsc: Use seqcount_latch_t
  @ 2020-09-07 16:29 61%         ` Ahmed S. Darwish
    0 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-09-07 16:29 UTC (permalink / raw)
  To: peterz
  Cc: Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian A. Siewior,
	LKML, Borislav Petkov, x86, H. Peter Anvin

Hi,

On Fri, Sep 04, 2020 at 10:03:12AM +0200, peterz@infradead.org wrote:
> On Fri, Sep 04, 2020 at 09:41:42AM +0200, peterz@infradead.org wrote:
> > On Thu, Aug 27, 2020 at 01:40:42PM +0200, Ahmed S. Darwish wrote:
> >
> > >  __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
> > >  {
> > > +	seqcount_latch_t *seqcount;
> > >  	int seq, idx;
> > >
> > >  	preempt_disable_notrace();
> > >
> > > +	seqcount = &this_cpu_ptr(&cyc2ns)->seq;
> > >  	do {
> > > -		seq = this_cpu_read(cyc2ns.seq.sequence);
> > > +		seq = raw_read_seqcount_latch(seqcount);
> > >  		idx = seq & 1;
> > >
> > >  		data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
> > >  		data->cyc2ns_mul    = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
> > >  		data->cyc2ns_shift  = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
> > >
> > > -	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
> > > +	} while (read_seqcount_latch_retry(seqcount, seq));
> > >  }
> >
> > So I worried about this change, it obviously generates worse code. But I
> > was not expecting this:
> >
> > Before:
> >
> > 196: 0000000000000110   189 FUNC    GLOBAL DEFAULT    1 native_sched_clock
> >
> > After:
> >
> > 195: 0000000000000110   399 FUNC    GLOBAL DEFAULT    1 native_sched_clock
> >
> > That's _210_ bytes extra!!
> >
> > If you look at the disassembly of the thing after it's a complete
> > trainwreck.
>
> The below delta fixes it again.
>
> ---
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -68,21 +68,19 @@ early_param("tsc_early_khz", tsc_early_k
>
>  __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
>  {
> -	seqcount_latch_t *seqcount;
>  	int seq, idx;
>
>  	preempt_disable_notrace();
>
> -	seqcount = &this_cpu_ptr(&cyc2ns)->seq;
>  	do {
> -		seq = raw_read_seqcount_latch(seqcount);
> +		seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
>  		idx = seq & 1;
>
>  		data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
>  		data->cyc2ns_mul    = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
>  		data->cyc2ns_shift  = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
>
> -	} while (read_seqcount_latch_retry(seqcount, seq));
> +	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
>  }
>
>  __always_inline void cyc2ns_read_end(void)

I've been unsuccessful in reproducing this huge, 200+ bytes, difference.
Can I please get the defconfig and GCC version?

Here are the two competing implementations:

noinline void cyc2ns_read_begin_v1(struct cyc2ns_data *data)
{
	seqcount_latch_t *seqcount;
	int seq, idx;

	preempt_disable_notrace();

	seqcount = &this_cpu_ptr(&cyc2ns)->seq;
	do {
		seq = raw_read_seqcount_latch(seqcount);
		idx = seq & 1;

		data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
		data->cyc2ns_mul    = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
		data->cyc2ns_shift  = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);

	} while (read_seqcount_latch_retry(seqcount, seq));
}

noinline void cyc2ns_read_begin_v2(struct cyc2ns_data *data)
{
	int seq, idx;

	preempt_disable_notrace();

	do {
		seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
		idx = seq & 1;

		data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
		data->cyc2ns_mul    = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
		data->cyc2ns_shift  = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);

	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
}

And here are their resulting gcc-7/8/4.9 binary output (DEBUG_PREEMPT=y,
but setting DEBUG_KERNEL/DEBUG_PREEMPT=n also doesn't show any big _v1
vs. _v2 difference; similarly when using gcc-10):

gcc-7
=====

ffffffff81028890 <cyc2ns_read_begin_v1>:
ffffffff81028890:	65 ff 05 a9 e6 fe 7e 	incl   %gs:0x7efee6a9(%rip)        # 16f40 <__preempt_count>
			ffffffff81028893: R_X86_64_PC32	__preempt_count-0x4
ffffffff81028897:	48 c7 c6 40 a5 1f 00 	mov    $0x1fa540,%rsi
			ffffffff8102889a: R_X86_64_32S	.data..percpu+0x1fa540
ffffffff8102889e:	48 89 f2             	mov    %rsi,%rdx
ffffffff810288a1:	65 48 03 15 bf 8a fe 	add    %gs:0x7efe8abf(%rip),%rdx        # 11368 <this_cpu_off>
ffffffff810288a8:	7e
			ffffffff810288a5: R_X86_64_PC32	this_cpu_off-0x4
ffffffff810288a9:	8b 4a 20             	mov    0x20(%rdx),%ecx
ffffffff810288ac:	89 c8                	mov    %ecx,%eax
ffffffff810288ae:	83 e0 01             	and    $0x1,%eax
ffffffff810288b1:	48 c1 e0 04          	shl    $0x4,%rax
ffffffff810288b5:	48 01 f0             	add    %rsi,%rax
ffffffff810288b8:	65 4c 8b 40 08       	mov    %gs:0x8(%rax),%r8
ffffffff810288bd:	4c 89 47 08          	mov    %r8,0x8(%rdi)
ffffffff810288c1:	65 44 8b 00          	mov    %gs:(%rax),%r8d
ffffffff810288c5:	44 89 07             	mov    %r8d,(%rdi)
ffffffff810288c8:	65 8b 40 04          	mov    %gs:0x4(%rax),%eax
ffffffff810288cc:	89 47 04             	mov    %eax,0x4(%rdi)
ffffffff810288cf:	8b 42 20             	mov    0x20(%rdx),%eax
ffffffff810288d2:	39 c8                	cmp    %ecx,%eax
ffffffff810288d4:	75 d3                	jne    ffffffff810288a9 <cyc2ns_read_begin_v1+0x19>
ffffffff810288d6:	f3 c3                	repz retq
ffffffff810288d8:	0f 1f 84 00 00 00 00 	nopl   0x0(%rax,%rax,1)
ffffffff810288df:	00

ffffffff810288e0 <cyc2ns_read_begin_v2>:
ffffffff810288e0:	65 ff 05 59 e6 fe 7e 	incl   %gs:0x7efee659(%rip)        # 16f40 <__preempt_count>
			ffffffff810288e3: R_X86_64_PC32	__preempt_count-0x4
ffffffff810288e7:	48 c7 c1 40 a5 1f 00 	mov    $0x1fa540,%rcx
			ffffffff810288ea: R_X86_64_32S	.data..percpu+0x1fa540
ffffffff810288ee:	48 89 c8             	mov    %rcx,%rax
ffffffff810288f1:	65 48 03 05 6f 8a fe 	add    %gs:0x7efe8a6f(%rip),%rax        # 11368 <this_cpu_off>
ffffffff810288f8:	7e
			ffffffff810288f5: R_X86_64_PC32	this_cpu_off-0x4
ffffffff810288f9:	65 8b 15 60 1c 1d 7f 	mov    %gs:0x7f1d1c60(%rip),%edx        # 1fa560 <cyc2ns+0x20>
			ffffffff810288fc: R_X86_64_PC32	.data..percpu+0x1fa55c
ffffffff81028900:	89 d0                	mov    %edx,%eax
ffffffff81028902:	83 e0 01             	and    $0x1,%eax
ffffffff81028905:	48 c1 e0 04          	shl    $0x4,%rax
ffffffff81028909:	48 01 c8             	add    %rcx,%rax
ffffffff8102890c:	65 48 8b 70 08       	mov    %gs:0x8(%rax),%rsi
ffffffff81028911:	48 89 77 08          	mov    %rsi,0x8(%rdi)
ffffffff81028915:	65 8b 30             	mov    %gs:(%rax),%esi
ffffffff81028918:	89 37                	mov    %esi,(%rdi)
ffffffff8102891a:	65 8b 40 04          	mov    %gs:0x4(%rax),%eax
ffffffff8102891e:	89 47 04             	mov    %eax,0x4(%rdi)
ffffffff81028921:	65 8b 05 38 1c 1d 7f 	mov    %gs:0x7f1d1c38(%rip),%eax        # 1fa560 <cyc2ns+0x20>
			ffffffff81028924: R_X86_64_PC32	.data..percpu+0x1fa55c
ffffffff81028928:	39 c2                	cmp    %eax,%edx
ffffffff8102892a:	75 cd                	jne    ffffffff810288f9 <cyc2ns_read_begin_v2+0x19>
ffffffff8102892c:	f3 c3                	repz retq
ffffffff8102892e:	66 90                	xchg   %ax,%ax

gcc-8
=====

ffffffff810281a0 <cyc2ns_read_begin_v1>:
ffffffff810281a0:	65 ff 05 99 ed fe 7e 	incl   %gs:0x7efeed99(%rip)        # 16f40 <__preempt_count>
			ffffffff810281a3: R_X86_64_PC32	__preempt_count-0x4
ffffffff810281a7:	49 c7 c0 40 a5 1f 00 	mov    $0x1fa540,%r8
			ffffffff810281aa: R_X86_64_32S	.data..percpu+0x1fa540
ffffffff810281ae:	4c 89 c1             	mov    %r8,%rcx
ffffffff810281b1:	65 48 03 0d af 91 fe 	add    %gs:0x7efe91af(%rip),%rcx        # 11368 <this_cpu_off>
ffffffff810281b8:	7e
			ffffffff810281b5: R_X86_64_PC32	this_cpu_off-0x4
ffffffff810281b9:	8b 51 20             	mov    0x20(%rcx),%edx
ffffffff810281bc:	89 d0                	mov    %edx,%eax
ffffffff810281be:	83 e0 01             	and    $0x1,%eax
ffffffff810281c1:	48 c1 e0 04          	shl    $0x4,%rax
ffffffff810281c5:	4c 01 c0             	add    %r8,%rax
ffffffff810281c8:	65 48 8b 70 08       	mov    %gs:0x8(%rax),%rsi
ffffffff810281cd:	48 89 77 08          	mov    %rsi,0x8(%rdi)
ffffffff810281d1:	65 8b 30             	mov    %gs:(%rax),%esi
ffffffff810281d4:	89 37                	mov    %esi,(%rdi)
ffffffff810281d6:	65 8b 40 04          	mov    %gs:0x4(%rax),%eax
ffffffff810281da:	89 47 04             	mov    %eax,0x4(%rdi)
ffffffff810281dd:	8b 41 20             	mov    0x20(%rcx),%eax
ffffffff810281e0:	39 d0                	cmp    %edx,%eax
ffffffff810281e2:	75 d5                	jne    ffffffff810281b9 <cyc2ns_read_begin_v1+0x19>
ffffffff810281e4:	c3                   	retq
ffffffff810281e5:	66 66 2e 0f 1f 84 00 	data16 nopw %cs:0x0(%rax,%rax,1)
ffffffff810281ec:	00 00 00 00

ffffffff810281f0 <cyc2ns_read_begin_v2>:
ffffffff810281f0:	65 ff 05 49 ed fe 7e 	incl   %gs:0x7efeed49(%rip)        # 16f40 <__preempt_count>
			ffffffff810281f3: R_X86_64_PC32	__preempt_count-0x4
ffffffff810281f7:	48 c7 c1 40 a5 1f 00 	mov    $0x1fa540,%rcx
			ffffffff810281fa: R_X86_64_32S	.data..percpu+0x1fa540
ffffffff810281fe:	48 89 c8             	mov    %rcx,%rax
ffffffff81028201:	65 48 03 05 5f 91 fe 	add    %gs:0x7efe915f(%rip),%rax        # 11368 <this_cpu_off>
ffffffff81028208:	7e
			ffffffff81028205: R_X86_64_PC32	this_cpu_off-0x4
ffffffff81028209:	65 8b 15 50 23 1d 7f 	mov    %gs:0x7f1d2350(%rip),%edx        # 1fa560 <cyc2ns+0x20>
			ffffffff8102820c: R_X86_64_PC32	.data..percpu+0x1fa55c
ffffffff81028210:	89 d0                	mov    %edx,%eax
ffffffff81028212:	83 e0 01             	and    $0x1,%eax
ffffffff81028215:	48 c1 e0 04          	shl    $0x4,%rax
ffffffff81028219:	48 01 c8             	add    %rcx,%rax
ffffffff8102821c:	65 48 8b 70 08       	mov    %gs:0x8(%rax),%rsi
ffffffff81028221:	48 89 77 08          	mov    %rsi,0x8(%rdi)
ffffffff81028225:	65 8b 30             	mov    %gs:(%rax),%esi
ffffffff81028228:	89 37                	mov    %esi,(%rdi)
ffffffff8102822a:	65 8b 40 04          	mov    %gs:0x4(%rax),%eax
ffffffff8102822e:	89 47 04             	mov    %eax,0x4(%rdi)
ffffffff81028231:	65 8b 05 28 23 1d 7f 	mov    %gs:0x7f1d2328(%rip),%eax        # 1fa560 <cyc2ns+0x20>
			ffffffff81028234: R_X86_64_PC32	.data..percpu+0x1fa55c
ffffffff81028238:	39 c2                	cmp    %eax,%edx
ffffffff8102823a:	75 cd                	jne    ffffffff81028209 <cyc2ns_read_begin_v2+0x19>
ffffffff8102823c:	c3                   	retq
ffffffff8102823d:	0f 1f 00             	nopl   (%rax)

gcc-4.9
=======

ffffffff810ab900 <cyc2ns_read_begin_v1>:
ffffffff810ab900:	55                   	push   %rbp
ffffffff810ab901:	48 89 fd             	mov    %rdi,%rbp
ffffffff810ab904:	53                   	push   %rbx
ffffffff810ab905:	65 ff 05 f4 b6 f6 7e 	incl   %gs:0x7ef6b6f4(%rip)        # 17000 <__preempt_count>
			ffffffff810ab908: R_X86_64_PC32	__preempt_count-0x4
ffffffff810ab90c:	e8 df da c9 00       	callq  ffffffff81d493f0 <debug_smp_processor_id>
			ffffffff810ab90d: R_X86_64_PC32	debug_smp_processor_id-0x4
ffffffff810ab911:	89 c0                	mov    %eax,%eax
ffffffff810ab913:	48 c7 c3 00 ab 02 00 	mov    $0x2ab00,%rbx
			ffffffff810ab916: R_X86_64_32S	.data..percpu+0x2ab00
ffffffff810ab91a:	48 03 1c c5 80 87 65 	add    -0x7d9a7880(,%rax,8),%rbx
ffffffff810ab921:	82
			ffffffff810ab91e: R_X86_64_32S	__per_cpu_offset
ffffffff810ab922:	8b 53 20             	mov    0x20(%rbx),%edx
ffffffff810ab925:	89 d0                	mov    %edx,%eax
ffffffff810ab927:	83 e0 01             	and    $0x1,%eax
ffffffff810ab92a:	48 c1 e0 04          	shl    $0x4,%rax
ffffffff810ab92e:	65 48 8b 88 08 ab 02 	mov    %gs:0x2ab08(%rax),%rcx
ffffffff810ab935:	00
			ffffffff810ab932: R_X86_64_32S	.data..percpu+0x2ab08
ffffffff810ab936:	48 89 4d 08          	mov    %rcx,0x8(%rbp)
ffffffff810ab93a:	65 8b 88 00 ab 02 00 	mov    %gs:0x2ab00(%rax),%ecx
			ffffffff810ab93d: R_X86_64_32S	.data..percpu+0x2ab00
ffffffff810ab941:	89 4d 00             	mov    %ecx,0x0(%rbp)
ffffffff810ab944:	65 8b 80 04 ab 02 00 	mov    %gs:0x2ab04(%rax),%eax
			ffffffff810ab947: R_X86_64_32S	.data..percpu+0x2ab04
ffffffff810ab94b:	89 45 04             	mov    %eax,0x4(%rbp)
ffffffff810ab94e:	8b 43 20             	mov    0x20(%rbx),%eax
ffffffff810ab951:	39 c2                	cmp    %eax,%edx
ffffffff810ab953:	75 cd                	jne    ffffffff810ab922 <cyc2ns_read_begin_v1+0x22>
ffffffff810ab955:	5b                   	pop    %rbx
ffffffff810ab956:	5d                   	pop    %rbp
ffffffff810ab957:	c3                   	retq
ffffffff810ab958:	0f 1f 84 00 00 00 00 	nopl   0x0(%rax,%rax,1)
ffffffff810ab95f:	00

ffffffff810ab960 <cyc2ns_read_begin_v2>:
ffffffff810ab960:	65 ff 05 99 b6 f6 7e 	incl   %gs:0x7ef6b699(%rip)        # 17000 <__preempt_count>
			ffffffff810ab963: R_X86_64_PC32	__preempt_count-0x4
ffffffff810ab967:	65 8b 15 b2 f1 f7 7e 	mov    %gs:0x7ef7f1b2(%rip),%edx        # 2ab20 <cyc2ns+0x20>
			ffffffff810ab96a: R_X86_64_PC32	.data..percpu+0x2ab1c
ffffffff810ab96e:	89 d0                	mov    %edx,%eax
ffffffff810ab970:	83 e0 01             	and    $0x1,%eax
ffffffff810ab973:	48 c1 e0 04          	shl    $0x4,%rax
ffffffff810ab977:	65 48 8b 88 08 ab 02 	mov    %gs:0x2ab08(%rax),%rcx
ffffffff810ab97e:	00
			ffffffff810ab97b: R_X86_64_32S	.data..percpu+0x2ab08
ffffffff810ab97f:	48 89 4f 08          	mov    %rcx,0x8(%rdi)
ffffffff810ab983:	65 8b 88 00 ab 02 00 	mov    %gs:0x2ab00(%rax),%ecx
			ffffffff810ab986: R_X86_64_32S	.data..percpu+0x2ab00
ffffffff810ab98a:	89 0f                	mov    %ecx,(%rdi)
ffffffff810ab98c:	65 8b 80 04 ab 02 00 	mov    %gs:0x2ab04(%rax),%eax
			ffffffff810ab98f: R_X86_64_32S	.data..percpu+0x2ab04
ffffffff810ab993:	89 47 04             	mov    %eax,0x4(%rdi)
ffffffff810ab996:	65 8b 05 83 f1 f7 7e 	mov    %gs:0x7ef7f183(%rip),%eax        # 2ab20 <cyc2ns+0x20>
			ffffffff810ab999: R_X86_64_PC32	.data..percpu+0x2ab1c
ffffffff810ab99d:	39 c2                	cmp    %eax,%edx
ffffffff810ab99f:	75 c6                	jne    ffffffff810ab967 <cyc2ns_read_begin_v2+0x7>
ffffffff810ab9a1:	f3 c3                	repz retq
ffffffff810ab9a3:	66 66 66 66 2e 0f 1f 	data16 data16 data16 nopw %cs:0x0(%rax,%rax,1)
ffffffff810ab9aa:	84 00 00 00 00 00

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 61%]

* Re: [PATCH v1 6/8] x86/tsc: Use seqcount_latch_t
  @ 2020-09-08  6:23 99%             ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-09-08  6:23 UTC (permalink / raw)
  To: peterz
  Cc: Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian A. Siewior,
	LKML, Borislav Petkov, x86, H. Peter Anvin

On Mon, Sep 07, 2020 at 07:30:47PM +0200, peterz@infradead.org wrote:
...
>
> Don't look at this function in isolation, look at native_sched_clock()
> where it's used as a whole.
>
...
> What happened (afaict) is that the change caused it to use more
> registers and ended up spiling crap on the stack.
>
...
>
> Anyway, I frobbed the patch to use the this_cpu variant, and I've queued
> the lot.

Perfect. Thanks a lot ;-)

^ permalink raw reply	[relevance 99%]

* Re: [PATCH v2 4/5] seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT support
  @ 2020-09-08 12:48 99%     ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-09-08 12:48 UTC (permalink / raw)
  To: peterz
  Cc: Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian A. Siewior,
	Paul E. McKenney, Steven Rostedt, LKML

On Tue, Sep 08, 2020 at 01:45:20PM +0200, peterz@infradead.org wrote:
> On Fri, Sep 04, 2020 at 05:32:30PM +0200, Ahmed S. Darwish wrote:
> > @@ -406,13 +443,20 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
> >  	return __read_seqcount_t_retry(s, start);
> >  }
> >
> > +/*
> > + * Enforce non-preemptibility for all seqcount_LOCKNAME_t writers. Don't
> > + * do it for PREEMPT_RT, for the reasons outlined at __SEQ_LOCK().
> > + */
> > +#define __seq_enforce_writer_non_preemptibility(s)			\
> > +	(!IS_ENABLED(CONFIG_PREEMPT_RT) && __seqcount_lock_preemptible(s))
> > +
> >  /**
> >   * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
> >   * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
> >   */
> >  #define raw_write_seqcount_begin(s)					\
> >  do {									\
> > -	if (__seqcount_lock_preemptible(s))				\
> > +	if (__seq_enforce_writer_non_preemptibility(s))			\
> >  		preempt_disable();					\
> >  									\
> >  	raw_write_seqcount_t_begin(__seqcount_ptr(s));			\
> > @@ -433,7 +477,7 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
> >  do {									\
> >  	raw_write_seqcount_t_end(__seqcount_ptr(s));			\
> >  									\
> > -	if (__seqcount_lock_preemptible(s))				\
> > +	if (__seq_enforce_writer_non_preemptibility(s))			\
> >  		preempt_enable();					\
> >  } while (0)
> >
> > @@ -456,7 +500,7 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s)
> >  do {									\
> >  	__seqcount_assert_lock_held(s);					\
> >  									\
> > -	if (__seqcount_lock_preemptible(s))				\
> > +	if (__seq_enforce_writer_non_preemptibility(s))			\
> >  		preempt_disable();					\
> >  									\
> >  	write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass);	\
> > @@ -483,7 +527,7 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
> >  do {									\
> >  	__seqcount_assert_lock_held(s);					\
> >  									\
> > -	if (__seqcount_lock_preemptible(s))				\
> > +	if (__seq_enforce_writer_non_preemptibility(s))			\
> >  		preempt_disable();					\
> >  									\
> >  	write_seqcount_t_begin(__seqcount_ptr(s));			\
> > @@ -504,7 +548,7 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
> >  do {									\
> >  	write_seqcount_t_end(__seqcount_ptr(s));			\
> >  									\
> > -	if (__seqcount_lock_preemptible(s))				\
> > +	if (__seq_enforce_writer_non_preemptibility(s))			\
> >  		preempt_enable();					\
> >  } while (0)
>
> I've replaced the above with the below, afaict there were no users of
> __seqcount_lock_preemptible() left.
>
> --- a/include/linux/seqlock.h
> +++ b/include/linux/seqlock.h
> @@ -228,7 +228,11 @@ __seqprop_##lockname##_sequence(const se
>  static __always_inline bool						\
>  __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s)	\
>  {									\
> -	return preemptible;						\
> +	if (!IS_ENABLED(CONFIG_PREEMPT_RT))				\
> +		return preemptible;					\
> +									\
> +	/* PREEMPT_RT relies on the above LOCK+UNLOCK */		\
> +	return false;							\
>  }									\
>  									\

Sounds good.

^ permalink raw reply	[relevance 99%]

* [tip: locking/core] seqlock: Introduce seqcount_latch_t
  2020-08-27 11:40 67%   ` [PATCH v1 3/8] seqlock: Introduce seqcount_latch_t Ahmed S. Darwish
@ 2020-09-10 15:08 55%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     80793c3471d90d4dc2b48deadb6413bdfe39500f
Gitweb:        https://git.kernel.org/tip/80793c3471d90d4dc2b48deadb6413bdfe39500f
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Thu, 27 Aug 2020 13:40:39 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:28 +02:00

seqlock: Introduce seqcount_latch_t

Latch sequence counters are a multiversion concurrency control mechanism
where the seqcount_t counter even/odd value is used to switch between
two copies of protected data. This allows the seqcount_t read path to
safely interrupt its write side critical section (e.g. from NMIs).

Initially, latch sequence counters were implemented as a single write
function above plain seqcount_t: raw_write_seqcount_latch(). The read
side was expected to use plain seqcount_t raw_read_seqcount().

A specialized latch read function, raw_read_seqcount_latch(), was later
added. It became the standardized way for latch read paths.  Due to the
dependent load, it has one read memory barrier less than the plain
seqcount_t raw_read_seqcount() API.

Only raw_write_seqcount_latch() and raw_read_seqcount_latch() should be
used with latch sequence counters. Having *unique* read and write path
APIs means that latch sequence counters are actually a data type of
their own -- just inappropriately overloading plain seqcount_t.

Introduce seqcount_latch_t. This adds type-safety and ensures that only
the correct latch-safe APIs are to be used.

Not to break bisection, let the latch APIs also accept plain seqcount_t
or seqcount_raw_spinlock_t. After converting all call sites to
seqcount_latch_t, only that new data type will be allowed.

References: 9b0fd802e8c0 ("seqcount: Add raw_write_seqcount_latch()")
References: 7fc26327b756 ("seqlock: Introduce raw_read_seqcount_latch()")
References: aadd6e5caaac ("time/sched_clock: Use raw_read_seqcount_latch()")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200827114044.11173-4-a.darwish@linutronix.de
---
 Documentation/locking/seqlock.rst |  18 +++++-
 include/linux/seqlock.h           | 104 ++++++++++++++++++++---------
 2 files changed, 91 insertions(+), 31 deletions(-)

diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst
index 62c5ad9..a334b58 100644
--- a/Documentation/locking/seqlock.rst
+++ b/Documentation/locking/seqlock.rst
@@ -139,6 +139,24 @@ with the associated LOCKTYPE lock acquired.
 
 Read path: same as in :ref:`seqcount_t`.
 
+
+.. _seqcount_latch_t:
+
+Latch sequence counters (``seqcount_latch_t``)
+----------------------------------------------
+
+Latch sequence counters are a multiversion concurrency control mechanism
+where the embedded seqcount_t counter even/odd value is used to switch
+between two copies of protected data. This allows the sequence counter
+read path to safely interrupt its own write side critical section.
+
+Use seqcount_latch_t when the write side sections cannot be protected
+from interruption by readers. This is typically the case when the read
+side can be invoked from NMI handlers.
+
+Check `raw_write_seqcount_latch()` for more information.
+
+
 .. _seqlock_t:
 
 Sequential locks (``seqlock_t``)
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 300cbf3..88b917d 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -587,34 +587,76 @@ static inline void write_seqcount_t_invalidate(seqcount_t *s)
 	kcsan_nestable_atomic_end();
 }
 
-/**
- * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+/*
+ * Latch sequence counters (seqcount_latch_t)
  *
- * Use seqcount_t latching to switch between two storage places protected
- * by a sequence counter. Doing so allows having interruptible, preemptible,
- * seqcount_t write side critical sections.
+ * A sequence counter variant where the counter even/odd value is used to
+ * switch between two copies of protected data. This allows the read path,
+ * typically NMIs, to safely interrupt the write side critical section.
  *
- * Check raw_write_seqcount_latch() for more details and a full reader and
- * writer usage example.
+ * As the write sections are fully preemptible, no special handling for
+ * PREEMPT_RT is needed.
+ */
+typedef struct {
+	seqcount_t seqcount;
+} seqcount_latch_t;
+
+/**
+ * SEQCNT_LATCH_ZERO() - static initializer for seqcount_latch_t
+ * @seq_name: Name of the seqcount_latch_t instance
+ */
+#define SEQCNT_LATCH_ZERO(seq_name) {					\
+	.seqcount		= SEQCNT_ZERO(seq_name.seqcount),	\
+}
+
+/**
+ * seqcount_latch_init() - runtime initializer for seqcount_latch_t
+ * @s: Pointer to the seqcount_latch_t instance
+ */
+static inline void seqcount_latch_init(seqcount_latch_t *s)
+{
+	seqcount_init(&s->seqcount);
+}
+
+/**
+ * raw_read_seqcount_latch() - pick even/odd latch data copy
+ * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t
+ *
+ * See raw_write_seqcount_latch() for details and a full reader/writer
+ * usage example.
  *
  * Return: sequence counter raw value. Use the lowest bit as an index for
- * picking which data copy to read. The full counter value must then be
- * checked with read_seqcount_retry().
+ * picking which data copy to read. The full counter must then be checked
+ * with read_seqcount_latch_retry().
  */
-#define raw_read_seqcount_latch(s)					\
-	raw_read_seqcount_t_latch(__seqcount_ptr(s))
+#define raw_read_seqcount_latch(s)						\
+({										\
+	/*									\
+	 * Pairs with the first smp_wmb() in raw_write_seqcount_latch().	\
+	 * Due to the dependent load, a full smp_rmb() is not needed.		\
+	 */									\
+	_Generic(*(s),								\
+		 seqcount_t:		  READ_ONCE(((seqcount_t *)s)->sequence),			\
+		 seqcount_raw_spinlock_t: READ_ONCE(((seqcount_raw_spinlock_t *)s)->seqcount.sequence),	\
+		 seqcount_latch_t:	  READ_ONCE(((seqcount_latch_t *)s)->seqcount.sequence));	\
+})
 
-static inline int raw_read_seqcount_t_latch(seqcount_t *s)
+/**
+ * read_seqcount_latch_retry() - end a seqcount_latch_t read section
+ * @s:		Pointer to seqcount_latch_t
+ * @start:	count, from raw_read_seqcount_latch()
+ *
+ * Return: true if a read section retry is required, else false
+ */
+static inline int
+read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
 {
-	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
-	int seq = READ_ONCE(s->sequence); /* ^^^ */
-	return seq;
+	return read_seqcount_retry(&s->seqcount, start);
 }
 
 /**
- * raw_write_seqcount_latch() - redirect readers to even/odd copy
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * raw_write_seqcount_latch() - redirect latch readers to even/odd copy
+ * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t
  *
  * The latch technique is a multiversion concurrency control method that allows
  * queries during non-atomic modifications. If you can guarantee queries never
@@ -633,7 +675,7 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
  * The basic form is a data structure like::
  *
  *	struct latch_struct {
- *		seqcount_t		seq;
+ *		seqcount_latch_t	seq;
  *		struct data_struct	data[2];
  *	};
  *
@@ -643,13 +685,13 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
  *	void latch_modify(struct latch_struct *latch, ...)
  *	{
  *		smp_wmb();	// Ensure that the last data[1] update is visible
- *		latch->seq++;
+ *		latch->seq.sequence++;
  *		smp_wmb();	// Ensure that the seqcount update is visible
  *
  *		modify(latch->data[0], ...);
  *
  *		smp_wmb();	// Ensure that the data[0] update is visible
- *		latch->seq++;
+ *		latch->seq.sequence++;
  *		smp_wmb();	// Ensure that the seqcount update is visible
  *
  *		modify(latch->data[1], ...);
@@ -668,8 +710,8 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
  *			idx = seq & 0x01;
  *			entry = data_query(latch->data[idx], ...);
  *
- *		// read_seqcount_retry() includes needed smp_rmb()
- *		} while (read_seqcount_retry(&latch->seq, seq));
+ *		// This includes needed smp_rmb()
+ *		} while (read_seqcount_latch_retry(&latch->seq, seq));
  *
  *		return entry;
  *	}
@@ -693,14 +735,14 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
  *	When data is a dynamic data structure; one should use regular RCU
  *	patterns to manage the lifetimes of the objects within.
  */
-#define raw_write_seqcount_latch(s)					\
-	raw_write_seqcount_t_latch(__seqcount_ptr(s))
-
-static inline void raw_write_seqcount_t_latch(seqcount_t *s)
-{
-       smp_wmb();      /* prior stores before incrementing "sequence" */
-       s->sequence++;
-       smp_wmb();      /* increment "sequence" before following stores */
+#define raw_write_seqcount_latch(s)						\
+{										\
+       smp_wmb();      /* prior stores before incrementing "sequence" */	\
+       _Generic(*(s),								\
+		seqcount_t:		((seqcount_t *)s)->sequence++,		\
+		seqcount_raw_spinlock_t:((seqcount_raw_spinlock_t *)s)->seqcount.sequence++, \
+		seqcount_latch_t:	((seqcount_latch_t *)s)->seqcount.sequence++); \
+       smp_wmb();      /* increment "sequence" before following stores */	\
 }
 
 /*

^ permalink raw reply	[relevance 55%]

* [tip: locking/core] timekeeping: Use seqcount_latch_t
  2020-08-27 11:40 88%   ` [PATCH v1 5/8] timekeeping: " Ahmed S. Darwish
@ 2020-09-10 15:08 74%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     249d053835320cb3e7c00066cf085a6ba9b1f126
Gitweb:        https://git.kernel.org/tip/249d053835320cb3e7c00066cf085a6ba9b1f126
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Thu, 27 Aug 2020 13:40:41 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:29 +02:00

timekeeping: Use seqcount_latch_t

Latch sequence counters are a multiversion concurrency control mechanism
where the seqcount_t counter even/odd value is used to switch between
two data storage copies. This allows the seqcount_t read path to safely
interrupt its write side critical section (e.g. from NMIs).

Initially, latch sequence counters were implemented as a single write
function, raw_write_seqcount_latch(), above plain seqcount_t. The read
path was expected to use plain seqcount_t raw_read_seqcount().

A specialized read function was later added, raw_read_seqcount_latch(),
and became the standardized way for latch read paths. Having unique read
and write APIs meant that latch sequence counters are basically a data
type of their own -- just inappropriately overloading plain seqcount_t.
The seqcount_latch_t data type was thus introduced at seqlock.h.

Use that new data type instead of seqcount_raw_spinlock_t. This ensures
that only latch-safe APIs are to be used with the sequence counter.

Note that the use of seqcount_raw_spinlock_t was not very useful in the
first place. Only the "raw_" subset of seqcount_t APIs were used at
timekeeping.c. This subset was created for contexts where lockdep cannot
be used. seqcount_LOCKTYPE_t's raison d'être -- verifying that the
seqcount_t writer serialization lock is held -- cannot thus be done.

References: 0c3351d451ae ("seqlock: Use raw_ prefix instead of _no_lockdep")
References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200827114044.11173-6-a.darwish@linutronix.de
---
 kernel/time/timekeeping.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4c47f38..999c981 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -64,7 +64,7 @@ static struct timekeeper shadow_timekeeper;
  * See @update_fast_timekeeper() below.
  */
 struct tk_fast {
-	seqcount_raw_spinlock_t	seq;
+	seqcount_latch_t	seq;
 	struct tk_read_base	base[2];
 };
 
@@ -81,13 +81,13 @@ static struct clocksource dummy_clock = {
 };
 
 static struct tk_fast tk_fast_mono ____cacheline_aligned = {
-	.seq     = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_mono.seq, &timekeeper_lock),
+	.seq     = SEQCNT_LATCH_ZERO(tk_fast_mono.seq),
 	.base[0] = { .clock = &dummy_clock, },
 	.base[1] = { .clock = &dummy_clock, },
 };
 
 static struct tk_fast tk_fast_raw  ____cacheline_aligned = {
-	.seq     = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_raw.seq, &timekeeper_lock),
+	.seq     = SEQCNT_LATCH_ZERO(tk_fast_raw.seq),
 	.base[0] = { .clock = &dummy_clock, },
 	.base[1] = { .clock = &dummy_clock, },
 };
@@ -467,7 +467,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 					tk_clock_read(tkr),
 					tkr->cycle_last,
 					tkr->mask));
-	} while (read_seqcount_retry(&tkf->seq, seq));
+	} while (read_seqcount_latch_retry(&tkf->seq, seq));
 
 	return now;
 }
@@ -533,7 +533,7 @@ static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf)
 					tk_clock_read(tkr),
 					tkr->cycle_last,
 					tkr->mask));
-	} while (read_seqcount_retry(&tkf->seq, seq));
+	} while (read_seqcount_latch_retry(&tkf->seq, seq));
 
 	return now;
 }

^ permalink raw reply	[relevance 74%]

* [tip: locking/core] seqlock: seqcount latch APIs: Only allow seqcount_latch_t
  2020-08-27 11:40 89%   ` [PATCH v1 8/8] seqlock: seqcount latch APIs: Only allow seqcount_latch_t Ahmed S. Darwish
@ 2020-09-10 15:08 75%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     0c9794c8b6781eb7dad8e19b78c5d4557790597a
Gitweb:        https://git.kernel.org/tip/0c9794c8b6781eb7dad8e19b78c5d4557790597a
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Thu, 27 Aug 2020 13:40:44 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:30 +02:00

seqlock: seqcount latch APIs: Only allow seqcount_latch_t

All latch sequence counter call-sites have now been converted from plain
seqcount_t to the new seqcount_latch_t data type.

Enforce type-safety by modifying seqlock.h latch APIs to only accept
seqcount_latch_t.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200827114044.11173-9-a.darwish@linutronix.de
---
 include/linux/seqlock.h | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 88b917d..f2a7a46 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -620,7 +620,7 @@ static inline void seqcount_latch_init(seqcount_latch_t *s)
 
 /**
  * raw_read_seqcount_latch() - pick even/odd latch data copy
- * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t
+ * @s: Pointer to seqcount_latch_t
  *
  * See raw_write_seqcount_latch() for details and a full reader/writer
  * usage example.
@@ -629,17 +629,14 @@ static inline void seqcount_latch_init(seqcount_latch_t *s)
  * picking which data copy to read. The full counter must then be checked
  * with read_seqcount_latch_retry().
  */
-#define raw_read_seqcount_latch(s)						\
-({										\
-	/*									\
-	 * Pairs with the first smp_wmb() in raw_write_seqcount_latch().	\
-	 * Due to the dependent load, a full smp_rmb() is not needed.		\
-	 */									\
-	_Generic(*(s),								\
-		 seqcount_t:		  READ_ONCE(((seqcount_t *)s)->sequence),			\
-		 seqcount_raw_spinlock_t: READ_ONCE(((seqcount_raw_spinlock_t *)s)->seqcount.sequence),	\
-		 seqcount_latch_t:	  READ_ONCE(((seqcount_latch_t *)s)->seqcount.sequence));	\
-})
+static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s)
+{
+	/*
+	 * Pairs with the first smp_wmb() in raw_write_seqcount_latch().
+	 * Due to the dependent load, a full smp_rmb() is not needed.
+	 */
+	return READ_ONCE(s->seqcount.sequence);
+}
 
 /**
  * read_seqcount_latch_retry() - end a seqcount_latch_t read section
@@ -656,7 +653,7 @@ read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
 
 /**
  * raw_write_seqcount_latch() - redirect latch readers to even/odd copy
- * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t
+ * @s: Pointer to seqcount_latch_t
  *
  * The latch technique is a multiversion concurrency control method that allows
  * queries during non-atomic modifications. If you can guarantee queries never
@@ -735,14 +732,11 @@ read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
  *	When data is a dynamic data structure; one should use regular RCU
  *	patterns to manage the lifetimes of the objects within.
  */
-#define raw_write_seqcount_latch(s)						\
-{										\
-       smp_wmb();      /* prior stores before incrementing "sequence" */	\
-       _Generic(*(s),								\
-		seqcount_t:		((seqcount_t *)s)->sequence++,		\
-		seqcount_raw_spinlock_t:((seqcount_raw_spinlock_t *)s)->seqcount.sequence++, \
-		seqcount_latch_t:	((seqcount_latch_t *)s)->seqcount.sequence++); \
-       smp_wmb();      /* increment "sequence" before following stores */	\
+static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
+{
+	smp_wmb();	/* prior stores before incrementing "sequence" */
+	s->seqcount.sequence++;
+	smp_wmb();      /* increment "sequence" before following stores */
 }
 
 /*

^ permalink raw reply	[relevance 75%]

* [tip: locking/core] mm/swap: Do not abuse the seqcount_t latching API
  @ 2020-09-10 15:08 62% ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     6446a5131e24a834606c15a965fa920041581c2c
Gitweb:        https://git.kernel.org/tip/6446a5131e24a834606c15a965fa920041581c2c
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Thu, 27 Aug 2020 13:40:38 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:28 +02:00

mm/swap: Do not abuse the seqcount_t latching API

Commit eef1a429f234 ("mm/swap.c: piggyback lru_add_drain_all() calls")
implemented an optimization mechanism to exit the to-be-started LRU
drain operation (name it A) if another drain operation *started and
finished* while (A) was blocked on the LRU draining mutex.

This was done through a seqcount_t latch, which is an abuse of its
semantics:

  1. seqcount_t latching should be used for the purpose of switching
     between two storage places with sequence protection to allow
     interruptible, preemptible, writer sections. The referenced
     optimization mechanism has absolutely nothing to do with that.

  2. The used raw_write_seqcount_latch() has two SMP write memory
     barriers to insure one consistent storage place out of the two
     storage places available. A full memory barrier is required
     instead: to guarantee that the pagevec counter stores visible by
     local CPU are visible to other CPUs -- before loading the current
     drain generation.

Beside the seqcount_t API abuse, the semantics of a latch sequence
counter was force-fitted into the referenced optimization. What was
meant is to track "generations" of LRU draining operations, where
"global lru draining generation = x" implies that all generations
0 < n <= x are already *scheduled* for draining -- thus nothing needs
to be done if the current generation number n <= x.

Remove the conceptually-inappropriate seqcount_t latch usage. Manually
implement the referenced optimization using a counter and SMP memory
barriers.

Note, while at it, use the non-atomic variant of cpumask_set_cpu(),
__cpumask_set_cpu(), due to the already existing mutex protection.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/87y2pg9erj.fsf@vostro.fn.ogness.net
---
 mm/swap.c | 65 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 11 deletions(-)

diff --git a/mm/swap.c b/mm/swap.c
index d16d65d..a1ec807 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -763,10 +763,20 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
  */
 void lru_add_drain_all(void)
 {
-	static seqcount_t seqcount = SEQCNT_ZERO(seqcount);
-	static DEFINE_MUTEX(lock);
+	/*
+	 * lru_drain_gen - Global pages generation number
+	 *
+	 * (A) Definition: global lru_drain_gen = x implies that all generations
+	 *     0 < n <= x are already *scheduled* for draining.
+	 *
+	 * This is an optimization for the highly-contended use case where a
+	 * user space workload keeps constantly generating a flow of pages for
+	 * each CPU.
+	 */
+	static unsigned int lru_drain_gen;
 	static struct cpumask has_work;
-	int cpu, seq;
+	static DEFINE_MUTEX(lock);
+	unsigned cpu, this_gen;
 
 	/*
 	 * Make sure nobody triggers this path before mm_percpu_wq is fully
@@ -775,21 +785,54 @@ void lru_add_drain_all(void)
 	if (WARN_ON(!mm_percpu_wq))
 		return;
 
-	seq = raw_read_seqcount_latch(&seqcount);
+	/*
+	 * Guarantee pagevec counter stores visible by this CPU are visible to
+	 * other CPUs before loading the current drain generation.
+	 */
+	smp_mb();
+
+	/*
+	 * (B) Locally cache global LRU draining generation number
+	 *
+	 * The read barrier ensures that the counter is loaded before the mutex
+	 * is taken. It pairs with smp_mb() inside the mutex critical section
+	 * at (D).
+	 */
+	this_gen = smp_load_acquire(&lru_drain_gen);
 
 	mutex_lock(&lock);
 
 	/*
-	 * Piggyback on drain started and finished while we waited for lock:
-	 * all pages pended at the time of our enter were drained from vectors.
+	 * (C) Exit the draining operation if a newer generation, from another
+	 * lru_add_drain_all(), was already scheduled for draining. Check (A).
 	 */
-	if (__read_seqcount_retry(&seqcount, seq))
+	if (unlikely(this_gen != lru_drain_gen))
 		goto done;
 
-	raw_write_seqcount_latch(&seqcount);
+	/*
+	 * (D) Increment global generation number
+	 *
+	 * Pairs with smp_load_acquire() at (B), outside of the critical
+	 * section. Use a full memory barrier to guarantee that the new global
+	 * drain generation number is stored before loading pagevec counters.
+	 *
+	 * This pairing must be done here, before the for_each_online_cpu loop
+	 * below which drains the page vectors.
+	 *
+	 * Let x, y, and z represent some system CPU numbers, where x < y < z.
+	 * Assume CPU #z is is in the middle of the for_each_online_cpu loop
+	 * below and has already reached CPU #y's per-cpu data. CPU #x comes
+	 * along, adds some pages to its per-cpu vectors, then calls
+	 * lru_add_drain_all().
+	 *
+	 * If the paired barrier is done at any later step, e.g. after the
+	 * loop, CPU #x will just exit at (C) and miss flushing out all of its
+	 * added pages.
+	 */
+	WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1);
+	smp_mb();
 
 	cpumask_clear(&has_work);
-
 	for_each_online_cpu(cpu) {
 		struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
 
@@ -801,7 +844,7 @@ void lru_add_drain_all(void)
 		    need_activate_page_drain(cpu)) {
 			INIT_WORK(work, lru_add_drain_per_cpu);
 			queue_work_on(cpu, mm_percpu_wq, work);
-			cpumask_set_cpu(cpu, &has_work);
+			__cpumask_set_cpu(cpu, &has_work);
 		}
 	}
 
@@ -816,7 +859,7 @@ void lru_add_drain_all(void)
 {
 	lru_add_drain();
 }
-#endif
+#endif /* CONFIG_SMP */
 
 /**
  * release_pages - batched put_page()

^ permalink raw reply	[relevance 62%]

* [tip: locking/core] seqlock: Use unique prefix for seqcount_t property accessors
  2020-09-04 15:32 88% ` [PATCH v2 2/5] seqlock: Use unique prefix for seqcount_t property accessors Ahmed S. Darwish
@ 2020-09-10 15:08 75%   ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     5cdd25572a29e46f932d3e6eedbd07429de66431
Gitweb:        https://git.kernel.org/tip/5cdd25572a29e46f932d3e6eedbd07429de66431
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Fri, 04 Sep 2020 17:32:28 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:30 +02:00

seqlock: Use unique prefix for seqcount_t property accessors

At seqlock.h, the following set of functions:

    - __seqcount_ptr()
    - __seqcount_preemptible()
    - __seqcount_assert()

act as plain seqcount_t "property" accessors. Meanwhile, the following
group:

    - __seqcount_ptr()
    - __seqcount_lock_preemptible()
    - __seqcount_assert_lock_held()

act as the equivalent set, but in the generic form, taking either
seqcount_t or any of the seqcount_LOCKNAME_t variants.

This is quite confusing, especially the first member where it is called
exactly the same in both groups.

Differentiate the first group by using "__seqprop" as prefix, and also
use that same prefix for all of seqcount_LOCKNAME_t property accessors.

While at it, constify the property accessors first parameter when
appropriate.

References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200904153231.11994-3-a.darwish@linutronix.de
---
 include/linux/seqlock.h | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 820ace2..0b4a22f 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -157,7 +157,9 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  */
 
 /*
- * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers
+ * SEQCOUNT_LOCKNAME()	- Instantiate seqcount_LOCKNAME_t and helpers
+ * seqprop_LOCKNAME_*()	- Property accessors for seqcount_LOCKNAME_t
+ *
  * @lockname:		"LOCKNAME" part of seqcount_LOCKNAME_t
  * @locktype:		LOCKNAME canonical C data type
  * @preemptible:	preemptibility of above lockname
@@ -177,19 +179,19 @@ seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock)	\
 }									\
 									\
 static __always_inline seqcount_t *					\
-__seqcount_##lockname##_ptr(seqcount_##lockname##_t *s)			\
+__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s)			\
 {									\
 	return &s->seqcount;						\
 }									\
 									\
 static __always_inline bool						\
-__seqcount_##lockname##_preemptible(seqcount_##lockname##_t *s)		\
+__seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s)	\
 {									\
 	return preemptible;						\
 }									\
 									\
 static __always_inline void						\
-__seqcount_##lockname##_assert(seqcount_##lockname##_t *s)		\
+__seqprop_##lockname##_assert(const seqcount_##lockname##_t *s)		\
 {									\
 	__SEQ_LOCK(lockdep_assert_held(lockmember));			\
 }
@@ -198,17 +200,17 @@ __seqcount_##lockname##_assert(seqcount_##lockname##_t *s)		\
  * __seqprop() for seqcount_t
  */
 
-static inline seqcount_t *__seqcount_ptr(seqcount_t *s)
+static inline seqcount_t *__seqprop_ptr(seqcount_t *s)
 {
 	return s;
 }
 
-static inline bool __seqcount_preemptible(seqcount_t *s)
+static inline bool __seqprop_preemptible(const seqcount_t *s)
 {
 	return false;
 }
 
-static inline void __seqcount_assert(seqcount_t *s)
+static inline void __seqprop_assert(const seqcount_t *s)
 {
 	lockdep_assert_preemption_disabled();
 }
@@ -237,10 +239,10 @@ SEQCOUNT_LOCKNAME(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
 #define SEQCNT_WW_MUTEX_ZERO(name, lock) 	SEQCOUNT_LOCKNAME_ZERO(name, lock)
 
 #define __seqprop_case(s, lockname, prop)				\
-	seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s))
+	seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s))
 
 #define __seqprop(s, prop) _Generic(*(s),				\
-	seqcount_t:		__seqcount_##prop((void *)(s)),		\
+	seqcount_t:		__seqprop_##prop((void *)(s)),		\
 	__seqprop_case((s),	raw_spinlock,	prop),			\
 	__seqprop_case((s),	spinlock,	prop),			\
 	__seqprop_case((s),	rwlock,		prop),			\

^ permalink raw reply	[relevance 75%]

* [tip: locking/core] rbtree_latch: Use seqcount_latch_t
  2020-08-27 11:40 99%   ` [PATCH v1 7/8] rbtree_latch: " Ahmed S. Darwish
@ 2020-09-10 15:08 86%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     24bf401cebfd630cc9e2c3746e43945e836626f9
Gitweb:        https://git.kernel.org/tip/24bf401cebfd630cc9e2c3746e43945e836626f9
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Thu, 27 Aug 2020 13:40:43 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:29 +02:00

rbtree_latch: Use seqcount_latch_t

Latch sequence counters have unique read and write APIs, and thus
seqcount_latch_t was recently introduced at seqlock.h.

Use that new data type instead of plain seqcount_t. This adds the
necessary type-safety and ensures that only latching-safe seqcount APIs
are to be used.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200827114044.11173-8-a.darwish@linutronix.de
---
 include/linux/rbtree_latch.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
index 7d012fa..3d1a9e7 100644
--- a/include/linux/rbtree_latch.h
+++ b/include/linux/rbtree_latch.h
@@ -42,8 +42,8 @@ struct latch_tree_node {
 };
 
 struct latch_tree_root {
-	seqcount_t	seq;
-	struct rb_root	tree[2];
+	seqcount_latch_t	seq;
+	struct rb_root		tree[2];
 };
 
 /**
@@ -206,7 +206,7 @@ latch_tree_find(void *key, struct latch_tree_root *root,
 	do {
 		seq = raw_read_seqcount_latch(&root->seq);
 		node = __lt_find(key, root, seq & 1, ops->comp);
-	} while (read_seqcount_retry(&root->seq, seq));
+	} while (read_seqcount_latch_retry(&root->seq, seq));
 
 	return node;
 }

^ permalink raw reply	[relevance 86%]

* [tip: locking/core] time/sched_clock: Use raw_read_seqcount_latch() during suspend
  @ 2020-09-10 15:08 84% ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     58faf20a086bd34f91983609e26eac3d5fe76be3
Gitweb:        https://git.kernel.org/tip/58faf20a086bd34f91983609e26eac3d5fe76be3
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Thu, 27 Aug 2020 13:40:37 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:28 +02:00

time/sched_clock: Use raw_read_seqcount_latch() during suspend

sched_clock uses seqcount_t latching to switch between two storage
places protected by the sequence counter. This allows it to have
interruptible, NMI-safe, seqcount_t write side critical sections.

Since 7fc26327b756 ("seqlock: Introduce raw_read_seqcount_latch()"),
raw_read_seqcount_latch() became the standardized way for seqcount_t
latch read paths. Due to the dependent load, it has one read memory
barrier less than the currently used raw_read_seqcount() API.

Use raw_read_seqcount_latch() for the suspend path.

Commit aadd6e5caaac ("time/sched_clock: Use raw_read_seqcount_latch()")
missed changing that instance of raw_read_seqcount().

References: 1809bfa44e10 ("timers, sched/clock: Avoid deadlock during read from NMI")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200715092345.GA231464@debian-buster-darwi.lab.linutronix.de
---
 kernel/time/sched_clock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 1c03eec..8c6b5fe 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -258,7 +258,7 @@ void __init generic_sched_clock_init(void)
  */
 static u64 notrace suspended_sched_clock_read(void)
 {
-	unsigned int seq = raw_read_seqcount(&cd.seq);
+	unsigned int seq = raw_read_seqcount_latch(&cd.seq);
 
 	return cd.read_data[seq & 1].epoch_cyc;
 }

^ permalink raw reply	[relevance 84%]

* [tip: locking/core] time/sched_clock: Use seqcount_latch_t
  2020-08-27 11:40 99%   ` [PATCH v1 4/8] time/sched_clock: Use seqcount_latch_t Ahmed S. Darwish
@ 2020-09-10 15:08 86%     ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     a690ed07353ec45f056b0a6f87c23a12a59c030d
Gitweb:        https://git.kernel.org/tip/a690ed07353ec45f056b0a6f87c23a12a59c030d
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Thu, 27 Aug 2020 13:40:40 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:29 +02:00

time/sched_clock: Use seqcount_latch_t

Latch sequence counters have unique read and write APIs, and thus
seqcount_latch_t was recently introduced at seqlock.h.

Use that new data type instead of plain seqcount_t. This adds the
necessary type-safety and ensures only latching-safe seqcount APIs are
to be used.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200827114044.11173-5-a.darwish@linutronix.de
---
 kernel/time/sched_clock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 8c6b5fe..0642013 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -35,7 +35,7 @@
  * into a single 64-byte cache line.
  */
 struct clock_data {
-	seqcount_t		seq;
+	seqcount_latch_t	seq;
 	struct clock_read_data	read_data[2];
 	ktime_t			wrap_kt;
 	unsigned long		rate;
@@ -76,7 +76,7 @@ struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
 
 int sched_clock_read_retry(unsigned int seq)
 {
-	return read_seqcount_retry(&cd.seq, seq);
+	return read_seqcount_latch_retry(&cd.seq, seq);
 }
 
 unsigned long long notrace sched_clock(void)

^ permalink raw reply	[relevance 86%]

* [tip: locking/core] x86/tsc: Use seqcount_latch_t
  2020-08-27 11:40 94%   ` [PATCH v1 6/8] x86/tsc: " Ahmed S. Darwish
  @ 2020-09-10 15:08 81%     ` tip-bot2 for Ahmed S. Darwish
  1 sibling, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     a1f1066133d85d5f42217cc72a2490bb7aa889c5
Gitweb:        https://git.kernel.org/tip/a1f1066133d85d5f42217cc72a2490bb7aa889c5
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Thu, 27 Aug 2020 13:40:42 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:29 +02:00

x86/tsc: Use seqcount_latch_t

Latch sequence counters have unique read and write APIs, and thus
seqcount_latch_t was recently introduced at seqlock.h.

Use that new data type instead of plain seqcount_t. This adds the
necessary type-safety and ensures that only latching-safe seqcount APIs
are to be used.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
[peterz: unwreck cyc2ns_read_begin()]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200827114044.11173-7-a.darwish@linutronix.de
---
 arch/x86/kernel/tsc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 49d9250..f70dffc 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -54,7 +54,7 @@ struct clocksource *art_related_clocksource;
 
 struct cyc2ns {
 	struct cyc2ns_data data[2];	/*  0 + 2*16 = 32 */
-	seqcount_t	   seq;		/* 32 + 4    = 36 */
+	seqcount_latch_t   seq;		/* 32 + 4    = 36 */
 
 }; /* fits one cacheline */
 
@@ -73,14 +73,14 @@ __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
 	preempt_disable_notrace();
 
 	do {
-		seq = this_cpu_read(cyc2ns.seq.sequence);
+		seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
 		idx = seq & 1;
 
 		data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
 		data->cyc2ns_mul    = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
 		data->cyc2ns_shift  = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
 
-	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
+	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
 }
 
 __always_inline void cyc2ns_read_end(void)
@@ -186,7 +186,7 @@ static void __init cyc2ns_init_boot_cpu(void)
 {
 	struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
 
-	seqcount_init(&c2n->seq);
+	seqcount_latch_init(&c2n->seq);
 	__set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
 }
 
@@ -203,7 +203,7 @@ static void __init cyc2ns_init_secondary_cpus(void)
 
 	for_each_possible_cpu(cpu) {
 		if (cpu != this_cpu) {
-			seqcount_init(&c2n->seq);
+			seqcount_latch_init(&c2n->seq);
 			c2n = per_cpu_ptr(&cyc2ns, cpu);
 			c2n->data[0] = data[0];
 			c2n->data[1] = data[1];

^ permalink raw reply	[relevance 81%]

* [tip: locking/core] seqlock: PREEMPT_RT: Do not starve seqlock_t writers
  2020-09-04 15:32 84% ` [PATCH v2 5/5] seqlock: PREEMPT_RT: Do not starve seqlock_t writers Ahmed S. Darwish
@ 2020-09-10 15:08 71%   ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     1909760f5fc3f123e47b4e24e0ccdc0fc8f3f106
Gitweb:        https://git.kernel.org/tip/1909760f5fc3f123e47b4e24e0ccdc0fc8f3f106
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Fri, 04 Sep 2020 17:32:31 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:31 +02:00

seqlock: PREEMPT_RT: Do not starve seqlock_t writers

On PREEMPT_RT, seqlock_t is transformed to a sleeping lock that do not
disable preemption. A seqlock_t reader can thus preempt its write side
section and spin for the enter scheduler tick. If that reader belongs to
a real-time scheduling class, it can spin forever and the kernel will
livelock.

To break this livelock possibility on PREEMPT_RT, implement seqlock_t in
terms of "seqcount_spinlock_t" instead of plain "seqcount_t".

Beside its pure annotational value, this will leverage the existing
seqcount_LOCKNAME_T PREEMPT_RT anti-livelock mechanisms, without adding
any extra code.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200904153231.11994-6-a.darwish@linutronix.de
---
 include/linux/seqlock.h | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 2bc9510..f73c7eb 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -790,13 +790,17 @@ static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
  *    - Documentation/locking/seqlock.rst
  */
 typedef struct {
-	struct seqcount seqcount;
+	/*
+	 * Make sure that readers don't starve writers on PREEMPT_RT: use
+	 * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
+	 */
+	seqcount_spinlock_t seqcount;
 	spinlock_t lock;
 } seqlock_t;
 
 #define __SEQLOCK_UNLOCKED(lockname)					\
 	{								\
-		.seqcount = SEQCNT_ZERO(lockname),			\
+		.seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \
 		.lock =	__SPIN_LOCK_UNLOCKED(lockname)			\
 	}
 
@@ -806,8 +810,8 @@ typedef struct {
  */
 #define seqlock_init(sl)						\
 	do {								\
-		seqcount_init(&(sl)->seqcount);				\
 		spin_lock_init(&(sl)->lock);				\
+		seqcount_spinlock_init(&(sl)->seqcount, &(sl)->lock);	\
 	} while (0)
 
 /**
@@ -854,6 +858,12 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 	return read_seqcount_retry(&sl->seqcount, start);
 }
 
+/*
+ * For all seqlock_t write side functions, use write_seqcount_*t*_begin()
+ * instead of the generic write_seqcount_begin(). This way, no redundant
+ * lockdep_assert_held() checks are added.
+ */
+
 /**
  * write_seqlock() - start a seqlock_t write side critical section
  * @sl: Pointer to seqlock_t
@@ -870,7 +880,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 static inline void write_seqlock(seqlock_t *sl)
 {
 	spin_lock(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -882,7 +892,7 @@ static inline void write_seqlock(seqlock_t *sl)
  */
 static inline void write_sequnlock(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount.seqcount);
 	spin_unlock(&sl->lock);
 }
 
@@ -896,7 +906,7 @@ static inline void write_sequnlock(seqlock_t *sl)
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -909,7 +919,7 @@ static inline void write_seqlock_bh(seqlock_t *sl)
  */
 static inline void write_sequnlock_bh(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount.seqcount);
 	spin_unlock_bh(&sl->lock);
 }
 
@@ -923,7 +933,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -935,7 +945,7 @@ static inline void write_seqlock_irq(seqlock_t *sl)
  */
 static inline void write_sequnlock_irq(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount.seqcount);
 	spin_unlock_irq(&sl->lock);
 }
 
@@ -944,7 +954,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sl->lock, flags);
-	write_seqcount_t_begin(&sl->seqcount);
+	write_seqcount_t_begin(&sl->seqcount.seqcount);
 	return flags;
 }
 
@@ -973,7 +983,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 static inline void
 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 {
-	write_seqcount_t_end(&sl->seqcount);
+	write_seqcount_t_end(&sl->seqcount.seqcount);
 	spin_unlock_irqrestore(&sl->lock, flags);
 }
 

^ permalink raw reply	[relevance 71%]

* [tip: locking/core] seqlock: seqcount_LOCKNAME_t: Standardize naming convention
  2020-09-04 15:32 58% ` [PATCH v2 1/5] seqlock: seqcount_LOCKNAME_t: Standardize naming convention Ahmed S. Darwish
@ 2020-09-10 15:08 49%   ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     6dd699b13d53f26a7603702d8bada3482312df74
Gitweb:        https://git.kernel.org/tip/6dd699b13d53f26a7603702d8bada3482312df74
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Fri, 04 Sep 2020 17:32:27 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:30 +02:00

seqlock: seqcount_LOCKNAME_t: Standardize naming convention

At seqlock.h, sequence counters with associated locks are either called
seqcount_LOCKNAME_t, seqcount_LOCKTYPE_t, or seqcount_locktype_t.

Standardize on seqcount_LOCKNAME_t for all instances in comments,
kernel-doc, and SEQCOUNT_LOCKNAME() generative macro paramters.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200904153231.11994-2-a.darwish@linutronix.de
---
 include/linux/seqlock.h | 79 ++++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 39 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index f2a7a46..820ace2 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -53,7 +53,7 @@
  *
  * If the write serialization mechanism is one of the common kernel
  * locking primitives, use a sequence counter with associated lock
- * (seqcount_LOCKTYPE_t) instead.
+ * (seqcount_LOCKNAME_t) instead.
  *
  * If it's desired to automatically handle the sequence counter writer
  * serialization and non-preemptibility requirements, use a sequential
@@ -117,7 +117,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
 #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
 
 /*
- * Sequence counters with associated locks (seqcount_LOCKTYPE_t)
+ * Sequence counters with associated locks (seqcount_LOCKNAME_t)
  *
  * A sequence counter which associates the lock used for writer
  * serialization at initialization time. This enables lockdep to validate
@@ -138,30 +138,32 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
 #endif
 
 /**
- * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPE associated
+ * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated
  * @seqcount:	The real sequence counter
- * @lock:	Pointer to the associated spinlock
+ * @lock:	Pointer to the associated lock
  *
- * A plain sequence counter with external writer synchronization by a
- * spinlock. The spinlock is associated to the sequence count in the
+ * A plain sequence counter with external writer synchronization by
+ * LOCKNAME @lock. The lock is associated to the sequence counter in the
  * static initializer or init function. This enables lockdep to validate
  * that the write side critical section is properly serialized.
+ *
+ * LOCKNAME:	raw_spinlock, spinlock, rwlock, mutex, or ww_mutex.
  */
 
 /*
  * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t
  * @s:		Pointer to the seqcount_LOCKNAME_t instance
- * @lock:	Pointer to the associated LOCKTYPE
+ * @lock:	Pointer to the associated lock
  */
 
 /*
- * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers
- * @locktype:		actual typename
- * @lockname:		name
- * @preemptible:	preemptibility of above locktype
+ * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers
+ * @lockname:		"LOCKNAME" part of seqcount_LOCKNAME_t
+ * @locktype:		LOCKNAME canonical C data type
+ * @preemptible:	preemptibility of above lockname
  * @lockmember:		argument for lockdep_assert_held()
  */
-#define SEQCOUNT_LOCKTYPE(locktype, lockname, preemptible, lockmember)	\
+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember)	\
 typedef struct seqcount_##lockname {					\
 	seqcount_t		seqcount;				\
 	__SEQ_LOCK(locktype	*lock);					\
@@ -211,29 +213,28 @@ static inline void __seqcount_assert(seqcount_t *s)
 	lockdep_assert_preemption_disabled();
 }
 
-SEQCOUNT_LOCKTYPE(raw_spinlock_t,	raw_spinlock,	false,	s->lock)
-SEQCOUNT_LOCKTYPE(spinlock_t,		spinlock,	false,	s->lock)
-SEQCOUNT_LOCKTYPE(rwlock_t,		rwlock,		false,	s->lock)
-SEQCOUNT_LOCKTYPE(struct mutex,		mutex,		true,	s->lock)
-SEQCOUNT_LOCKTYPE(struct ww_mutex,	ww_mutex,	true,	&s->lock->base)
+SEQCOUNT_LOCKNAME(raw_spinlock,	raw_spinlock_t,		false,	s->lock)
+SEQCOUNT_LOCKNAME(spinlock,	spinlock_t,		false,	s->lock)
+SEQCOUNT_LOCKNAME(rwlock,	rwlock_t,		false,	s->lock)
+SEQCOUNT_LOCKNAME(mutex,	struct mutex,		true,	s->lock)
+SEQCOUNT_LOCKNAME(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
 
 /*
  * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
  * @name:	Name of the seqcount_LOCKNAME_t instance
- * @lock:	Pointer to the associated LOCKTYPE
+ * @lock:	Pointer to the associated LOCKNAME
  */
 
-#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) {			\
+#define SEQCOUNT_LOCKNAME_ZERO(seq_name, assoc_lock) {			\
 	.seqcount		= SEQCNT_ZERO(seq_name.seqcount),	\
 	__SEQ_LOCK(.lock	= (assoc_lock))				\
 }
 
-#define SEQCNT_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-#define SEQCNT_RWLOCK_ZERO(name, lock)		SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-#define SEQCNT_MUTEX_ZERO(name, lock)		SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-#define SEQCNT_WW_MUTEX_ZERO(name, lock) 	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
-
+#define SEQCNT_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_RWLOCK_ZERO(name, lock)		SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_MUTEX_ZERO(name, lock)		SEQCOUNT_LOCKNAME_ZERO(name, lock)
+#define SEQCNT_WW_MUTEX_ZERO(name, lock) 	SEQCOUNT_LOCKNAME_ZERO(name, lock)
 
 #define __seqprop_case(s, lockname, prop)				\
 	seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s))
@@ -252,7 +253,7 @@ SEQCOUNT_LOCKTYPE(struct ww_mutex,	ww_mutex,	true,	&s->lock->base)
 
 /**
  * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -283,7 +284,7 @@ repeat:
 
 /**
  * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * Return: count to be passed to read_seqcount_retry()
  */
@@ -299,7 +300,7 @@ static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
 
 /**
  * read_seqcount_begin() - begin a seqcount_t read critical section
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * Return: count to be passed to read_seqcount_retry()
  */
@@ -314,7 +315,7 @@ static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
 
 /**
  * raw_read_seqcount() - read the raw seqcount_t counter value
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * raw_read_seqcount opens a read critical section of the given
  * seqcount_t, without any lockdep checking, and without checking or
@@ -337,7 +338,7 @@ static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
 /**
  * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
  *                        lockdep and w/o counter stabilization
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * raw_seqcount_begin opens a read critical section of the given
  * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
@@ -365,7 +366,7 @@ static inline unsigned raw_seqcount_t_begin(const seqcount_t *s)
 
 /**
  * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  * @start: count, from read_seqcount_begin()
  *
  * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
@@ -389,7 +390,7 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 
 /**
  * read_seqcount_retry() - end a seqcount_t read critical section
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  * @start: count, from read_seqcount_begin()
  *
  * read_seqcount_retry closes the read critical section of given
@@ -409,7 +410,7 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
 
 /**
  * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  */
 #define raw_write_seqcount_begin(s)					\
 do {									\
@@ -428,7 +429,7 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
 
 /**
  * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  */
 #define raw_write_seqcount_end(s)					\
 do {									\
@@ -448,7 +449,7 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s)
 /**
  * write_seqcount_begin_nested() - start a seqcount_t write section with
  *                                 custom lockdep nesting level
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  * @subclass: lockdep nesting level
  *
  * See Documentation/locking/lockdep-design.rst
@@ -471,7 +472,7 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
 
 /**
  * write_seqcount_begin() - start a seqcount_t write side critical section
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * write_seqcount_begin opens a write side critical section of the given
  * seqcount_t.
@@ -497,7 +498,7 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
 
 /**
  * write_seqcount_end() - end a seqcount_t write side critical section
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * The write section must've been opened with write_seqcount_begin().
  */
@@ -517,7 +518,7 @@ static inline void write_seqcount_t_end(seqcount_t *s)
 
 /**
  * raw_write_seqcount_barrier() - do a seqcount_t write barrier
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * This can be used to provide an ordering guarantee instead of the usual
  * consistency guarantee. It is one wmb cheaper, because it can collapse
@@ -571,7 +572,7 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
 /**
  * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
  *                               side operations
- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
  * After write_seqcount_invalidate, no seqcount_t read side operations
  * will complete successfully and see data older than this.

^ permalink raw reply	[relevance 49%]

* [tip: locking/core] seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT support
                     ` (2 preceding siblings ...)
  2020-08-28  1:07 95% ` [PATCH v1 0/5] seqlock: Introduce PREEMPT_RT support Ahmed S. Darwish
@ 2020-09-10 15:08 63% ` tip-bot2 for Ahmed S. Darwish
  3 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     8117ab508f9c476e0a10b9db7f4818f784cf3176
Gitweb:        https://git.kernel.org/tip/8117ab508f9c476e0a10b9db7f4818f784cf3176
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Fri, 04 Sep 2020 17:32:30 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:31 +02:00

seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT support

Preemption must be disabled before entering a sequence counter write
side critical section.  Otherwise the read side section can preempt the
write side section and spin for the entire scheduler tick.  If that
reader belongs to a real-time scheduling class, it can spin forever and
the kernel will livelock.

Disabling preemption cannot be done for PREEMPT_RT though: it can lead
to higher latencies, and the write side sections will not be able to
acquire locks which become sleeping locks (e.g. spinlock_t).

To remain preemptible, while avoiding a possible livelock caused by the
reader preempting the writer, use a different technique: let the reader
detect if a seqcount_LOCKNAME_t writer is in progress. If that's the
case, acquire then release the associated LOCKNAME writer serialization
lock. This will allow any possibly-preempted writer to make progress
until the end of its writer serialization lock critical section.

Implement this lock-unlock technique for all seqcount_LOCKNAME_t with
an associated (PREEMPT_RT) sleeping lock.

References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200519214547.352050-1-a.darwish@linutronix.de
---
 include/linux/seqlock.h | 61 +++++++++++++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 10 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index f3b7827..2bc9510 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -17,6 +17,7 @@
 #include <linux/kcsan-checks.h>
 #include <linux/lockdep.h>
 #include <linux/mutex.h>
+#include <linux/ww_mutex.h>
 #include <linux/preempt.h>
 #include <linux/spinlock.h>
 
@@ -131,7 +132,23 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  * See Documentation/locking/seqlock.rst
  */
 
-#ifdef CONFIG_LOCKDEP
+/*
+ * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
+ * disable preemption. It can lead to higher latencies, and the write side
+ * sections will not be able to acquire locks which become sleeping locks
+ * (e.g. spinlock_t).
+ *
+ * To remain preemptible while avoiding a possible livelock caused by the
+ * reader preempting the writer, use a different technique: let the reader
+ * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
+ * case, acquire then release the associated LOCKNAME writer serialization
+ * lock. This will allow any possibly-preempted writer to make progress
+ * until the end of its writer serialization lock critical section.
+ *
+ * This lock-unlock technique must be implemented for all of PREEMPT_RT
+ * sleeping locks.  See Documentation/locking/locktypes.rst
+ */
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
 #define __SEQ_LOCK(expr)	expr
 #else
 #define __SEQ_LOCK(expr)
@@ -162,10 +179,12 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  *
  * @lockname:		"LOCKNAME" part of seqcount_LOCKNAME_t
  * @locktype:		LOCKNAME canonical C data type
- * @preemptible:	preemptibility of above lockname
+ * @preemptible:	preemptibility of above locktype
  * @lockmember:		argument for lockdep_assert_held()
+ * @lockbase:		associated lock release function (prefix only)
+ * @lock_acquire:	associated lock acquisition function (full call)
  */
-#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember)	\
+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \
 typedef struct seqcount_##lockname {					\
 	seqcount_t		seqcount;				\
 	__SEQ_LOCK(locktype	*lock);					\
@@ -187,13 +206,33 @@ __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s)			\
 static __always_inline unsigned						\
 __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s)	\
 {									\
-	return READ_ONCE(s->seqcount.sequence);				\
+	unsigned seq = READ_ONCE(s->seqcount.sequence);			\
+									\
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))				\
+		return seq;						\
+									\
+	if (preemptible && unlikely(seq & 1)) {				\
+		__SEQ_LOCK(lock_acquire);				\
+		__SEQ_LOCK(lockbase##_unlock(s->lock));			\
+									\
+		/*							\
+		 * Re-read the sequence counter since the (possibly	\
+		 * preempted) writer made progress.			\
+		 */							\
+		seq = READ_ONCE(s->seqcount.sequence);			\
+	}								\
+									\
+	return seq;							\
 }									\
 									\
 static __always_inline bool						\
 __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s)	\
 {									\
-	return preemptible;						\
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))				\
+		return preemptible;					\
+									\
+	/* PREEMPT_RT relies on the above LOCK+UNLOCK */		\
+	return false;							\
 }									\
 									\
 static __always_inline void						\
@@ -226,11 +265,13 @@ static inline void __seqprop_assert(const seqcount_t *s)
 	lockdep_assert_preemption_disabled();
 }
 
-SEQCOUNT_LOCKNAME(raw_spinlock,	raw_spinlock_t,		false,	s->lock)
-SEQCOUNT_LOCKNAME(spinlock,	spinlock_t,		false,	s->lock)
-SEQCOUNT_LOCKNAME(rwlock,	rwlock_t,		false,	s->lock)
-SEQCOUNT_LOCKNAME(mutex,	struct mutex,		true,	s->lock)
-SEQCOUNT_LOCKNAME(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
+#define __SEQ_RT	IS_ENABLED(CONFIG_PREEMPT_RT)
+
+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t,  false,    s->lock,        raw_spin, raw_spin_lock(s->lock))
+SEQCOUNT_LOCKNAME(spinlock,     spinlock_t,      __SEQ_RT, s->lock,        spin,     spin_lock(s->lock))
+SEQCOUNT_LOCKNAME(rwlock,       rwlock_t,        __SEQ_RT, s->lock,        read,     read_lock(s->lock))
+SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     s->lock,        mutex,    mutex_lock(s->lock))
+SEQCOUNT_LOCKNAME(ww_mutex,     struct ww_mutex, true,     &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL))
 
 /*
  * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t

^ permalink raw reply	[relevance 63%]

* [tip: locking/core] seqlock: seqcount_t: Implement all read APIs as statement expressions
  2020-09-04 15:32 80% ` [PATCH v2 3/5] seqlock: seqcount_t: Implement all read APIs as statement expressions Ahmed S. Darwish
@ 2020-09-10 15:08 68%   ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-09-10 15:08 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, LKML

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     52ac39e5db5148f70392edb654ad882ac8da88a8
Gitweb:        https://git.kernel.org/tip/52ac39e5db5148f70392edb654ad882ac8da88a8
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Fri, 04 Sep 2020 17:32:29 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 10 Sep 2020 11:19:31 +02:00

seqlock: seqcount_t: Implement all read APIs as statement expressions

The sequence counters read APIs are implemented as CPP macros, so they
can take either seqcount_t or any of the seqcount_LOCKNAME_t variants.
Such macros then get *directly* transformed to internal C functions that
only take plain seqcount_t.

Further commits need access to seqcount_LOCKNAME_t inside of the actual
read APIs code. Thus transform all of the seqcount read APIs to pure GCC
statement expressions instead.

This will not break type-safety: all of the transformed APIs resolve to
a _Generic() selection that does not have a "default" case.

This will also not affect the transformed APIs readability: previously
added kernel-doc above all of seqlock.h functions makes the expectations
quite clear for call-site developers.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200904153231.11994-4-a.darwish@linutronix.de
---
 include/linux/seqlock.h | 94 +++++++++++++++++++---------------------
 1 file changed, 45 insertions(+), 49 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 0b4a22f..f3b7827 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -184,6 +184,12 @@ __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s)			\
 	return &s->seqcount;						\
 }									\
 									\
+static __always_inline unsigned						\
+__seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s)	\
+{									\
+	return READ_ONCE(s->seqcount.sequence);				\
+}									\
+									\
 static __always_inline bool						\
 __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s)	\
 {									\
@@ -205,6 +211,11 @@ static inline seqcount_t *__seqprop_ptr(seqcount_t *s)
 	return s;
 }
 
+static inline unsigned __seqprop_sequence(const seqcount_t *s)
+{
+	return READ_ONCE(s->sequence);
+}
+
 static inline bool __seqprop_preemptible(const seqcount_t *s)
 {
 	return false;
@@ -250,6 +261,7 @@ SEQCOUNT_LOCKNAME(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
 	__seqprop_case((s),	ww_mutex,	prop))
 
 #define __seqcount_ptr(s)		__seqprop(s, ptr)
+#define __seqcount_sequence(s)		__seqprop(s, sequence)
 #define __seqcount_lock_preemptible(s)	__seqprop(s, preemptible)
 #define __seqcount_assert_lock_held(s)	__seqprop(s, assert)
 
@@ -268,21 +280,15 @@ SEQCOUNT_LOCKNAME(ww_mutex,	struct ww_mutex,	true,	&s->lock->base)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define __read_seqcount_begin(s)					\
-	__read_seqcount_t_begin(__seqcount_ptr(s))
-
-static inline unsigned __read_seqcount_t_begin(const seqcount_t *s)
-{
-	unsigned ret;
-
-repeat:
-	ret = READ_ONCE(s->sequence);
-	if (unlikely(ret & 1)) {
-		cpu_relax();
-		goto repeat;
-	}
-	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
-	return ret;
-}
+({									\
+	unsigned seq;							\
+									\
+	while ((seq = __seqcount_sequence(s)) & 1)			\
+		cpu_relax();						\
+									\
+	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);			\
+	seq;								\
+})
 
 /**
  * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
@@ -291,14 +297,12 @@ repeat:
  * Return: count to be passed to read_seqcount_retry()
  */
 #define raw_read_seqcount_begin(s)					\
-	raw_read_seqcount_t_begin(__seqcount_ptr(s))
-
-static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
-{
-	unsigned ret = __read_seqcount_t_begin(s);
-	smp_rmb();
-	return ret;
-}
+({									\
+	unsigned seq = __read_seqcount_begin(s);			\
+									\
+	smp_rmb();							\
+	seq;								\
+})
 
 /**
  * read_seqcount_begin() - begin a seqcount_t read critical section
@@ -307,13 +311,10 @@ static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define read_seqcount_begin(s)						\
-	read_seqcount_t_begin(__seqcount_ptr(s))
-
-static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
-{
-	seqcount_lockdep_reader_access(s);
-	return raw_read_seqcount_t_begin(s);
-}
+({									\
+	seqcount_lockdep_reader_access(__seqcount_ptr(s));		\
+	raw_read_seqcount_begin(s);					\
+})
 
 /**
  * raw_read_seqcount() - read the raw seqcount_t counter value
@@ -327,15 +328,13 @@ static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define raw_read_seqcount(s)						\
-	raw_read_seqcount_t(__seqcount_ptr(s))
-
-static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
-{
-	unsigned ret = READ_ONCE(s->sequence);
-	smp_rmb();
-	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
-	return ret;
-}
+({									\
+	unsigned seq = __seqcount_sequence(s);				\
+									\
+	smp_rmb();							\
+	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);			\
+	seq;								\
+})
 
 /**
  * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
@@ -355,16 +354,13 @@ static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
  * Return: count to be passed to read_seqcount_retry()
  */
 #define raw_seqcount_begin(s)						\
-	raw_seqcount_t_begin(__seqcount_ptr(s))
-
-static inline unsigned raw_seqcount_t_begin(const seqcount_t *s)
-{
-	/*
-	 * If the counter is odd, let read_seqcount_retry() fail
-	 * by decrementing the counter.
-	 */
-	return raw_read_seqcount_t(s) & ~1;
-}
+({									\
+	/*								\
+	 * If the counter is odd, let read_seqcount_retry() fail	\
+	 * by decrementing the counter.					\
+	 */								\
+	raw_read_seqcount(s) & ~1;					\
+})
 
 /**
  * __read_seqcount_retry() - end a seqcount_t read section w/o barrier

^ permalink raw reply	[relevance 68%]

* Re: [RFC 2/2] printk: Add more information about the printk caller
  @ 2020-09-24  4:24 99%   ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-09-24  4:24 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Sergey Senozhatsky, Steven Rostedt, John Ogness, Linus Torvalds,
	Thomas Gleixner, Prarit Bhargava, Mark Salyzyn, Chunyan Zhang,
	Orson Zhai, Changki Kim, Sergey Senozhatsky, linux-kernel

On Wed, Sep 23, 2020 at 03:56:17PM +0200, Petr Mladek wrote:
...
>
> -static inline u32 printk_caller_id(void)
> +static enum printk_caller_ctx get_printk_caller_ctx(void)
> +{
> +	if (in_nmi())
> +		return printk_ctx_nmi;
> +
> +	if (in_irq())
> +		return printk_ctx_hardirq;
> +
> +	if (in_softirq())
> +		return printk_ctx_softirq;
> +
> +	return printk_ctx_task;
> +}
> +

in_softirq() here will be true for both softirq contexts *and*
BH-disabled regions. Did you mean in_serving_softirq() instead?

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* Re: [PATCH v2 0/2] Add a seqcount between gup_fast and copy_page_range()
       [not found]     <0-v2-dfe9ecdb6c74+2066-gup_fork_jgg@nvidia.com>
@ 2020-11-02 22:19 90% ` Ahmed S. Darwish
         [not found]     ` <2-v2-dfe9ecdb6c74+2066-gup_fork_jgg@nvidia.com>
  1 sibling, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-11-02 22:19 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: linux-kernel, Peter Xu, Linus Torvalds, Andrea Arcangeli,
	Andrew Morton, Aneesh Kumar K.V, Christoph Hellwig, Hugh Dickins,
	Jan Kara, Jann Horn, John Hubbard, Kirill Shutemov, Kirill Tkhai,
	Leon Romanovsky, Linux-MM, Michal Hocko, Oleg Nesterov,
	Peter Zijlstra, Ingo Molnar, Will Deacon, Thomas Gleixner,
	Sebastian Siewior

Hello Jason,

Thanks for keeping me in the loop on this.

I've also added the locking maintainers in Cc. IMHO there are some
seqlock.h API violations in this series, and they should have the final
say on this.

On Fri, Oct 30, 2020 at 11:46:19AM -0300, Jason Gunthorpe wrote:
>
> As discussed and suggested by Linus use a seqcount to close the small race
> between gup_fast and copy_page_range().
>
> Unfortunately the good suggestion to just use write_seqcount_begin() blows
> up lockdep immediately due to the (new?) requirement that the write side
> of seqcount be in a preempt disabled region.

Disabling preemption for seqcount_t write-side critical sections was
never a new requirement. It has always been this way, for the reasons
explained at Documentation/locking/seqlock.rst, "Introduction" section.

The recent seqcount_t changes did not mandate any new rules. This was
done explicitly, and on-purpose, not to break any of the *large* set of
existing seqcount_t call sites. It added multiple lockdep asserts
though, to catch a number of (already) buggy users, and they were fixed
beforehand.

It seems you have a special case here, so I'll continue discussing this
at patch #2 where the code resides. Just wanted to answer the "(new?)"
part above.

>                                               For this application it does
> not seem like a good idea, nor is it necessary as we don't spin on retry.
> This is solved by being the first place to use raw_write_seqcount_t_begin()
>

Regardless of this series write side preemptibility requirements, the
"_write_seqcount_*t*_()" interfaces are internal to seqlock.h and should
_never_ be used outside of it.

For plain seqcount_t, raw_write_seqcount_begin() is equivalent to
raw_write_seqcount_*t*_begin() anyway, and should already satisfy your
needs.

/me jumps to patch #2 now...

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 90%]

* Re: [PATCH v2 0/2] Add a seqcount between gup_fast and copy_page_range()
  @ 2020-11-02 23:18 97%     ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-11-02 23:18 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Jason Gunthorpe, Linux Kernel Mailing List, Peter Xu,
	Andrea Arcangeli, Andrew Morton, Aneesh Kumar K.V,
	Christoph Hellwig, Hugh Dickins, Jan Kara, Jann Horn,
	John Hubbard, Kirill Shutemov, Kirill Tkhai, Leon Romanovsky,
	Linux-MM, Michal Hocko, Oleg Nesterov, Peter Zijlstra,
	Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian Siewior

On Mon, Nov 02, 2020 at 02:39:49PM -0800, Linus Torvalds wrote:
> On Mon, Nov 2, 2020 at 2:19 PM Ahmed S. Darwish <a.darwish@linutronix.de> wrote:
> >
> > Disabling preemption for seqcount_t write-side critical sections was
> > never a new requirement. It has always been this way, for the reasons
> > explained at Documentation/locking/seqlock.rst, "Introduction" section.
>
> Note that that is only true if you spin on the reading side (either of
> the two kinds of spinning: (a) spinning to wait for it to become even,
> or (b) repeating if they don't match)
>
> Which this code doesn't do, it just fails.
>
> I'm not sure how to perhaps document that.
>

Sure, and this is one of the reasons the lockdep non-preemptibility
check is only added to the non-raw variants of the seqcount write APIs.

Presumably, users of the raw_*() part of the API know what they're
doing, and they don't need to read seqlock.rst :)

(I'm in progress of replying to patch #2, which touches a bit on this
 and other points)..

>              Linus

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 97%]

* Re: [PATCH v2 2/2] mm: prevent gup_fast from racing with COW during fork
       [not found]     ` <2-v2-dfe9ecdb6c74+2066-gup_fork_jgg@nvidia.com>
  @ 2020-11-02 23:58 96%   ` Ahmed S. Darwish
  1 sibling, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-11-02 23:58 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: linux-kernel, Peter Xu, Linus Torvalds, Andrea Arcangeli,
	Andrew Morton, Aneesh Kumar K.V, Christoph Hellwig, Hugh Dickins,
	Jan Kara, Jann Horn, John Hubbard, Kirill Shutemov, Kirill Tkhai,
	Leon Romanovsky, Linux-MM, Michal Hocko, Oleg Nesterov,
	Peter Zijlstra, Ingo Molnar, Will Deacon, Thomas Gleixner,
	Sebastian Siewior

On Fri, Oct 30, 2020 at 11:46:21AM -0300, Jason Gunthorpe wrote:

...

> diff --git a/mm/memory.c b/mm/memory.c
> index c48f8df6e50268..294c2c3c4fe00d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1171,6 +1171,12 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
>  		mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
>  					0, src_vma, src_mm, addr, end);
>  		mmu_notifier_invalidate_range_start(&range);

> +		/*
> +		 * The read side doesn't spin, it goes to the mmap_lock, so the
> +		 * raw version is used to avoid disabling preemption here
> +		 */
> +		mmap_assert_write_locked(src_mm);
> +		raw_write_seqcount_t_begin(&src_mm->write_protect_seq);
>  	}
>

Please, s/raw_write_seqcount_t_begin()/raw_write_seqcount_begin()/g. For
plain seqcount_t, it's the same, while still respecting the seqlock.h
API boundaries.

Let's make the comment also a bit more clear (IMHO, "lockdep" needs to
be mentioned somewhere):

		/*
		 * Disabling preemption is not needed for the write side, as
		 * the read side doesn't spin, but goes to the mmap_lock.
		 *
		 * Use the raw variant of the seqcount_t write API to avoid
		 * lockdep complaining about preemptibility.
		 */
		mmap_assert_write_locked(src_mm);
		raw_write_seqcount_t_begin(&src_mm->write_protect_seq);

>  	ret = 0;
> @@ -1187,8 +1193,10 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
>  		}
>  	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
>
> -	if (is_cow)
> +	if (is_cow) {
> +		raw_write_seqcount_t_end(&src_mm->write_protect_seq);

ditto.

s/raw_write_seqcount_t_end()/raw_write_seqcount_end()/g

>  		mmu_notifier_invalidate_range_end(&range);
> +	}
>  	return ret;
>  }
>

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 96%]

* Re: [PATCH v2 2/2] mm: prevent gup_fast from racing with COW during fork
       [not found]         ` <20201030235121.GQ2620339@nvidia.com>
@ 2020-11-03  0:17 97%       ` Ahmed S. Darwish
       [not found]             ` <20201103002532.GL2620339@nvidia.com>
  0 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-11-03  0:17 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Peter Xu, linux-kernel, Linus Torvalds, Andrea Arcangeli,
	Andrew Morton, Aneesh Kumar K.V, Christoph Hellwig, Hugh Dickins,
	Jan Kara, Jann Horn, John Hubbard, Kirill Shutemov, Kirill Tkhai,
	Leon Romanovsky, Linux-MM, Michal Hocko, Oleg Nesterov,
	Peter Zijlstra, Ingo Molnar, Will Deacon, Thomas Gleixner,
	Sebastian Siewior

On Fri, Oct 30, 2020 at 08:51:21PM -0300, Jason Gunthorpe wrote:
> On Fri, Oct 30, 2020 at 06:52:50PM -0400, Peter Xu wrote:

...

>
> > > diff --git a/mm/memory.c b/mm/memory.c
> > > index c48f8df6e50268..294c2c3c4fe00d 100644
> > > +++ b/mm/memory.c
> > > @@ -1171,6 +1171,12 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
> > >  		mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
> > >  					0, src_vma, src_mm, addr, end);
> > >  		mmu_notifier_invalidate_range_start(&range);
> > > +		/*
> > > +		 * The read side doesn't spin, it goes to the mmap_lock, so the
> > > +		 * raw version is used to avoid disabling preemption here
> > > +		 */
> > > +		mmap_assert_write_locked(src_mm);
> > > +		raw_write_seqcount_t_begin(&src_mm->write_protect_seq);
> >
> > Would raw_write_seqcount_begin() be better here?
>
> Hum..
>
> I felt no because it had the preempt stuff added into it, however it
> would work - __seqcount_lock_preemptible() == false for the seqcount_t
> case (see below)
>
> Looking more closely, maybe the right API to pick is
> write_seqcount_t_begin() and write_seqcount_t_end() ??
>

No, that's not the right API: it is also internal to seqlock.h.

Please stick with the official exported API: raw_write_seqcount_begin().

It should satisfy your needs, and the raw_*() variant is created exactly
for contexts wishing to avoid the lockdep checks (e.g. NMI handlers
cannot invoke lockdep, etc.)

> However, no idea what the intention of the '*_seqcount_t_*' family is
> - it only seems to be used to implement the seqlock..
>

Exactly. '*_seqcount_t_*' is a seqlock.h implementation detail, and it
has _zero_ relevance to what is discussed in this thread actually.

...

> Ahmed explained in commit 8117ab508f the reason the seqcount_t write
> side has preemption disabled is because it can livelock RT kernels if
> the read side is spinning after preempting the write side. eg look at
> how __read_seqcount_begin() is implemented:
>
> 	while ((seq = __seqcount_sequence(s)) & 1)			\
> 		cpu_relax();						\
>
> However, in this patch, we don't spin on the read side.
>
> If the read side collides with a writer it immediately goes to the
> mmap_lock, which is sleeping, and so it will sort itself out properly,
> even if it was preempted.
>

Correct.

Thanks,

--
Ahmed Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 97%]

* Re: [PATCH v2 2/2] mm: prevent gup_fast from racing with COW during fork
  @ 2020-11-03  0:33 97% ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-11-03  0:33 UTC (permalink / raw)
  To: Peter Xu
  Cc: Jason Gunthorpe, linux-kernel, Linus Torvalds, Andrea Arcangeli,
	Andrew Morton, Aneesh Kumar K.V, Christoph Hellwig, Hugh Dickins,
	Jan Kara, Jann Horn, John Hubbard, Kirill Shutemov, Kirill Tkhai,
	Leon Romanovsky, Linux-MM, Michal Hocko, Oleg Nesterov,
	Peter Zijlstra, Ingo Molnar, Will Deacon, Thomas Gleixner,
	Sebastian Siewior

On Sat, Oct 31, 2020 at 11:26:05AM -0400, Peter Xu wrote:

...

> Shall we document this explicitly (if this patch still needs a repost)?

Yes, this patch series needs a v3 :)

> Seems not straightforward since that seems not the usual way to use seqcount,
> not sure whether I'm the only one that feels this way, though.

Yes, this usage is correct but not common. I've proposed a more explicit
comment above the write section code, in my reply to patch #2.

...

> The other thing is, considering this use of seqcount seems to be quite special
> as explained below, I'm just not sure whether this would confuse lockdep or
> kcsan, etc., if we decide to use write_seqcount_t_begin().
>

Lockdep won't be confused as it's not used in the raw_*() variant of the
seqcount APIs.

AFAIK KCSAN also has some margin to protect itself from this: see
seqlock.h KCSAN_SEQLOCK_REGION_MAX.

Thanks,

> Peter Xu

Ahmed Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 97%]

* Re: [PATCH v2 2/2] mm: prevent gup_fast from racing with COW during fork
       [not found]             ` <20201103002532.GL2620339@nvidia.com>
@ 2020-11-03  0:41 97%           ` Ahmed S. Darwish
    0 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-11-03  0:41 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Peter Xu, linux-kernel, Linus Torvalds, Andrea Arcangeli,
	Andrew Morton, Aneesh Kumar K.V, Christoph Hellwig, Hugh Dickins,
	Jan Kara, Jann Horn, John Hubbard, Kirill Shutemov, Kirill Tkhai,
	Leon Romanovsky, Linux-MM, Michal Hocko, Oleg Nesterov,
	Peter Zijlstra, Ingo Molnar, Will Deacon, Thomas Gleixner,
	Sebastian Siewior

On Mon, Nov 02, 2020 at 08:25:32PM -0400, Jason Gunthorpe wrote:
> On Tue, Nov 03, 2020 at 01:17:12AM +0100, Ahmed S. Darwish wrote:
>
> > Please stick with the official exported API: raw_write_seqcount_begin().
>
> How did you know this was 'offical exported API' ??
>

All the official exported seqlock.h APIs are marked with verbose
kernel-doc annotations on top. The rest are internal...

> > Exactly. '*_seqcount_t_*' is a seqlock.h implementation detail, and it
> > has _zero_ relevance to what is discussed in this thread actually.
>
> Add some leading __'s to them?
>

It's a bit more complicated than just adding some "__" prefixes, due to
the massive compile-time polymorphism done through _Generic().

The '*_seqcount_t_*' format was the best we could come up with to
distinguish (again, for internal seqlock.h code) between macros taking
all seqcount_LOCKNAME_t types, and macros/functions taking only plain
seqcount_t.

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 97%]

* Re: [PATCH v2 2/2] mm: prevent gup_fast from racing with COW during fork
  @ 2020-11-03  6:52 83%               ` Ahmed S. Darwish
    0 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-11-03  6:52 UTC (permalink / raw)
  To: John Hubbard
  Cc: Jason Gunthorpe, Peter Xu, linux-kernel, Linus Torvalds,
	Andrea Arcangeli, Andrew Morton, Aneesh Kumar K.V,
	Christoph Hellwig, Hugh Dickins, Jan Kara, Jann Horn,
	Kirill Shutemov, Kirill Tkhai, Leon Romanovsky, Linux-MM,
	Michal Hocko, Oleg Nesterov, Peter Zijlstra, Ingo Molnar,
	Will Deacon, Thomas Gleixner, Sebastian Siewior

On Mon, Nov 02, 2020 at 06:20:45PM -0800, John Hubbard wrote:
> On 11/2/20 4:41 PM, Ahmed S. Darwish wrote:
> > On Mon, Nov 02, 2020 at 08:25:32PM -0400, Jason Gunthorpe wrote:
> > > On Tue, Nov 03, 2020 at 01:17:12AM +0100, Ahmed S. Darwish wrote:
> > >
> > > > Please stick with the official exported API: raw_write_seqcount_begin().
> > >
> > > How did you know this was 'offical exported API' ??
> > >
> >
> > All the official exported seqlock.h APIs are marked with verbose
> > kernel-doc annotations on top. The rest are internal...
> >
>
> OK, but no one here was able to deduce that, probably because there is not
> enough consistency throughout the kernel to be able to assume such things--even
> though your seqlock project is internally consistent. It's just not *quite*
> enough communication.
>
> I think if we added the following it would be very nice:
>

The problem is, I've already documented seqlock.h to death.... There are
more comments than code in there, and there is "seqlock.rst" under
Documentation/ to further describe the big picture.

There comes a point where you decide what level of documentation to add,
and what level to skip.

Because in the end, you don't want to confuse "Joe, the general driver
developer" with too much details that's not relevant to their task at
hand.  (I work in the Embedded domain, and I've seen so much ugly code
from embedded drivers/SoC developers already, sorry)

See for example my reply to Linus, where any talk about the lockdep-free
and barrier-free parts of the API was explicitly not mentioned in
seqlock.rst. This was done on purpose: 1) you want to keep the generic
case simple, but the special case do-able, 2) you want to encourage
people to use the standard entry/exit points as much as possible.

> a) Short comments to the "unofficial and internal" routines, identifying them as
> such, and
>
> b) Short comments to the "official API for general use", also identifying
> those as such.
>

I really think the already added kernel-doc is sufficient...

See for example __read_seqcount_begin() and __read_seqcount_retry().
They begin with "__", but they are semi-external seqlock.h API that are
used by VFS to avoid barriers. And these APIs are then polymorphised
according to the write serialization lock type, and so on.

So the most consistent way for seqlock.h was to use kernel-doc as *the*
marker for exported functions.

This is not unique to seqlock.h by the way. The same pattern is heavily
used by the DRM folks.

Yes, of course, we can add even more comments to seqlock.h, but then, I
honestly think it would be too much that maybe people will just skip
reading the whole thing altogether...

> c) A comment about what makes "raw" actually raw, for seqlock.
>

That's already documented.

What more can really be written than what's in seqlock.h below??

  /**
   * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep

  /**
   * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
   *                        lockdep and w/o counter stabilization

  /**
   * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep

  /**
   * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep

>
> Since I'm proposing new work, I'll also offer to help, perhaps by putting together
> a small patch to get it kicked off, if you approve of the idea.
>

Patches are always welcome of course, and please put me in Cc. I don't
approve or deny anything though, that's the locking maintainers job :)

Kind regards,

> John Hubbard
> NVIDIA

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 83%]

* Re: [PATCH v2 2/2] mm: prevent gup_fast from racing with COW during fork
  @ 2020-11-04  1:32 59%                   ` Ahmed S. Darwish
    0 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-11-04  1:32 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: John Hubbard, Jason Gunthorpe, Peter Xu,
	Linux Kernel Mailing List, Andrea Arcangeli, Andrew Morton,
	Aneesh Kumar K.V, Christoph Hellwig, Hugh Dickins, Jan Kara,
	Jann Horn, Kirill Shutemov, Kirill Tkhai, Leon Romanovsky,
	Linux-MM, Michal Hocko, Oleg Nesterov, Peter Zijlstra,
	Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian Siewior

On Tue, Nov 03, 2020 at 09:40:22AM -0800, Linus Torvalds wrote:
> On Mon, Nov 2, 2020 at 10:52 PM Ahmed S. Darwish
> <a.darwish@linutronix.de> wrote:
> >
> > The problem is, I've already documented seqlock.h to death.... There are
> > more comments than code in there, and there is "seqlock.rst" under
> > Documentation/ to further describe the big picture.
>
> Well, honestly, I think the correct thing to do is to get rid of the
> *_seqcount_t_*() functions entirely.
>
> They add nothing but confusion, and they are entirely misnamed. That's
> not the pattern we use for "internal use only" functions, and they are
> *very* confusing.
>

I see. Would the enclosed patch #1 be OK? It basically uses the "__do_"
prefix instead, with some rationale.

>
> They have other issues too: like raw_write_seqcount_end() not being
> usable on its own when preemptibility isn't an issue like here. You
> basically _have_ to use raw_write_seqcount_t_end(), because otherwise
> it tries to re-enable preemption that was never there.
>

Hmmm, raw_write_seqcount_{begin,end}() *never* disable/enable preemption
for plain seqcount_t. This is why I kept recommending those for this
patch series instead of internal raw_write_seqcount_*t*_{begin,end}().

But..... given that multiple people made the exact same remark by now, I
guess that's due to:

#define raw_write_seqcount_begin(s)		\
do {						\
	if (__seqcount_lock_preemptible(s))	\
		preempt_disable();		\
						\
	...					\
} while (0);

#define raw_write_seqcount_end(s)		\
do {						\
	...					\
						\
	if (__seqcount_lock_preemptible(s))	\
		preempt_enable();		\
} while (0);

The tricky part is that __seqcount_lock_preemptible() is always false
for plain "seqcount_t".  With that data type, the _Generic() selection
makes it resolve to __seqprop_preemptible(), which just returns false.

Originally, __seqcount_lock_preemptible() was called:

  __seqcount_associated_lock_exists_and_is_preemptible()

but it got transformed to its current short form in the process of some
pre-mainline refactorings. Looking at it now after all the dust has
settled, maybe the verbose form was much more clear.

Please see the enclosed patch #2... Would that be OK too?

(I will submit these two patches in their own thread after some common
 ground is reached.)

Patches
-------

====>
====> patch #1:
====>

Subject: [RFC][PATCH 1/2] seqlock: Use __do_ prefix instead of non-standed
 _seqcount_t_ marker

The use of "*_seqcount_t_*" as a marker to denote internal seqlock.h
functions taking only plain seqcount_t instead of the whole
seqcount_LOCKNAME_t family is confusing users, as it's also not the
standard kernel pattern for denoting header file internal functions.

Use the __do_ prefix instead.

Note, a plain "__" prefix is not used since seqlock.h already uses it
for some of its exported functions; e.g. __read_seqcount_begin() and
__read_seqcount_retry().

Reported-by: Jason Gunthorpe <jgg@nvidia.com>
Reported-by: John Hubbard <jhubbard@nvidia.com>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lkml.kernel.org/r/CAHk-=wgB8nyOQufpn0o6a5BpJCJPnXvH+kRxApujhsgG+7qAwQ@mail.gmail.com
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 62 ++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index cbfc78b92b65..5de043841d33 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -425,9 +425,9 @@ SEQCOUNT_LOCKNAME(ww_mutex,     struct ww_mutex, true,     &s->lock->base, ww_mu
  * Return: true if a read section retry is required, else false
  */
 #define __read_seqcount_retry(s, start)					\
-	__read_seqcount_t_retry(__seqcount_ptr(s), start)
+	__do___read_seqcount_retry(__seqcount_ptr(s), start)

-static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
+static inline int __do___read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
 	kcsan_atomic_next(0);
 	return unlikely(READ_ONCE(s->sequence) != start);
@@ -445,12 +445,12 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
  * Return: true if a read section retry is required, else false
  */
 #define read_seqcount_retry(s, start)					\
-	read_seqcount_t_retry(__seqcount_ptr(s), start)
+	__do_read_seqcount_retry(__seqcount_ptr(s), start)

-static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
+static inline int __do_read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
 	smp_rmb();
-	return __read_seqcount_t_retry(s, start);
+	return __do___read_seqcount_retry(s, start);
 }

 /**
@@ -462,10 +462,10 @@ do {									\
 	if (__seqcount_lock_preemptible(s))				\
 		preempt_disable();					\
 									\
-	raw_write_seqcount_t_begin(__seqcount_ptr(s));			\
+	__do_raw_write_seqcount_begin(__seqcount_ptr(s));		\
 } while (0)

-static inline void raw_write_seqcount_t_begin(seqcount_t *s)
+static inline void __do_raw_write_seqcount_begin(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
 	s->sequence++;
@@ -478,13 +478,13 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
  */
 #define raw_write_seqcount_end(s)					\
 do {									\
-	raw_write_seqcount_t_end(__seqcount_ptr(s));			\
+	__do_raw_write_seqcount_end(__seqcount_ptr(s));			\
 									\
 	if (__seqcount_lock_preemptible(s))				\
 		preempt_enable();					\
 } while (0)

-static inline void raw_write_seqcount_t_end(seqcount_t *s)
+static inline void __do_raw_write_seqcount_end(seqcount_t *s)
 {
 	smp_wmb();
 	s->sequence++;
@@ -506,12 +506,12 @@ do {									\
 	if (__seqcount_lock_preemptible(s))				\
 		preempt_disable();					\
 									\
-	write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass);	\
+	__do_write_seqcount_begin_nested(__seqcount_ptr(s), subclass);	\
 } while (0)

-static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
+static inline void __do_write_seqcount_begin_nested(seqcount_t *s, int subclass)
 {
-	raw_write_seqcount_t_begin(s);
+	__do_raw_write_seqcount_begin(s);
 	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
 }

@@ -533,12 +533,12 @@ do {									\
 	if (__seqcount_lock_preemptible(s))				\
 		preempt_disable();					\
 									\
-	write_seqcount_t_begin(__seqcount_ptr(s));			\
+	__do_write_seqcount_begin(__seqcount_ptr(s));			\
 } while (0)

-static inline void write_seqcount_t_begin(seqcount_t *s)
+static inline void __do_write_seqcount_begin(seqcount_t *s)
 {
-	write_seqcount_t_begin_nested(s, 0);
+	__do_write_seqcount_begin_nested(s, 0);
 }

 /**
@@ -549,16 +549,16 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
  */
 #define write_seqcount_end(s)						\
 do {									\
-	write_seqcount_t_end(__seqcount_ptr(s));			\
+	__do_write_seqcount_end(__seqcount_ptr(s));			\
 									\
 	if (__seqcount_lock_preemptible(s))				\
 		preempt_enable();					\
 } while (0)

-static inline void write_seqcount_t_end(seqcount_t *s)
+static inline void __do_write_seqcount_end(seqcount_t *s)
 {
 	seqcount_release(&s->dep_map, _RET_IP_);
-	raw_write_seqcount_t_end(s);
+	__do_raw_write_seqcount_end(s);
 }

 /**
@@ -603,9 +603,9 @@ static inline void write_seqcount_t_end(seqcount_t *s)
  *      }
  */
 #define raw_write_seqcount_barrier(s)					\
-	raw_write_seqcount_t_barrier(__seqcount_ptr(s))
+	__do_raw_write_seqcount_barrier(__seqcount_ptr(s))

-static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
+static inline void __do_raw_write_seqcount_barrier(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
 	s->sequence++;
@@ -623,9 +623,9 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
  * will complete successfully and see data older than this.
  */
 #define write_seqcount_invalidate(s)					\
-	write_seqcount_t_invalidate(__seqcount_ptr(s))
+	__do_write_seqcount_invalidate(__seqcount_ptr(s))

-static inline void write_seqcount_t_invalidate(seqcount_t *s)
+static inline void __do_write_seqcount_invalidate(seqcount_t *s)
 {
 	smp_wmb();
 	kcsan_nestable_atomic_begin();
@@ -865,7 +865,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 }

 /*
- * For all seqlock_t write side functions, use write_seqcount_*t*_begin()
+ * For all seqlock_t write side functions, use __do_write_seqcount_begin()
  * instead of the generic write_seqcount_begin(). This way, no redundant
  * lockdep_assert_held() checks are added.
  */
@@ -886,7 +886,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 static inline void write_seqlock(seqlock_t *sl)
 {
 	spin_lock(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount.seqcount);
+	__do_write_seqcount_begin(&sl->seqcount.seqcount);
 }

 /**
@@ -898,7 +898,7 @@ static inline void write_seqlock(seqlock_t *sl)
  */
 static inline void write_sequnlock(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount.seqcount);
+	__do_write_seqcount_end(&sl->seqcount.seqcount);
 	spin_unlock(&sl->lock);
 }

@@ -912,7 +912,7 @@ static inline void write_sequnlock(seqlock_t *sl)
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount.seqcount);
+	__do_write_seqcount_begin(&sl->seqcount.seqcount);
 }

 /**
@@ -925,7 +925,7 @@ static inline void write_seqlock_bh(seqlock_t *sl)
  */
 static inline void write_sequnlock_bh(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount.seqcount);
+	__do_write_seqcount_end(&sl->seqcount.seqcount);
 	spin_unlock_bh(&sl->lock);
 }

@@ -939,7 +939,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount.seqcount);
+	__do_write_seqcount_begin(&sl->seqcount.seqcount);
 }

 /**
@@ -951,7 +951,7 @@ static inline void write_seqlock_irq(seqlock_t *sl)
  */
 static inline void write_sequnlock_irq(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount.seqcount);
+	__do_write_seqcount_end(&sl->seqcount.seqcount);
 	spin_unlock_irq(&sl->lock);
 }

@@ -960,7 +960,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 	unsigned long flags;

 	spin_lock_irqsave(&sl->lock, flags);
-	write_seqcount_t_begin(&sl->seqcount.seqcount);
+	__do_write_seqcount_begin(&sl->seqcount.seqcount);
 	return flags;
 }

@@ -989,7 +989,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 static inline void
 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 {
-	write_seqcount_t_end(&sl->seqcount.seqcount);
+	__do_write_seqcount_end(&sl->seqcount.seqcount);
 	spin_unlock_irqrestore(&sl->lock, flags);
 }

====>
====> patch #2:
====>

Subject: [PATCH 2/2] seqlock: seqcount_LOCKAME_t: Use more verbose macro names

As evidenced by multiple discussions over LKML so far, it's not clear
that __seqcount_lock_preemptible() is always false for plain seqcount_t.
For that data type, the _Generic() selection resolves to
__seqprop_preemptible(), which just returns false.

Use __seqcount_associated_lock_exists_and_is_preemptible() instead,
which hints that "preemptibility" is for the associated write
serialization lock (if any), not for the seqcount itself.

Similarly, rename __seqcount_assert_lock_held() to
__seqcount_assert_associated_lock_held().

Link: https://lkml.kernel.org/r/CAHk-=wgB8nyOQufpn0o6a5BpJCJPnXvH+kRxApujhsgG+7qAwQ@mail.gmail.com
Link: https://lkml.kernel.org/r/20201030235121.GQ2620339@nvidia.com
Link: https://lkml.kernel.org/r/20201103170327.GJ20600@xz-x1
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 5de043841d33..eb1e5a822e44 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -307,10 +307,10 @@ SEQCOUNT_LOCKNAME(ww_mutex,     struct ww_mutex, true,     &s->lock->base, ww_mu
 	__seqprop_case((s),	mutex,		prop),			\
 	__seqprop_case((s),	ww_mutex,	prop))

-#define __seqcount_ptr(s)		__seqprop(s, ptr)
-#define __seqcount_sequence(s)		__seqprop(s, sequence)
-#define __seqcount_lock_preemptible(s)	__seqprop(s, preemptible)
-#define __seqcount_assert_lock_held(s)	__seqprop(s, assert)
+#define __seqcount_ptr(s)					__seqprop(s, ptr)
+#define __seqcount_sequence(s)					__seqprop(s, sequence)
+#define __seqcount_associated_lock_exists_and_is_preemptible(s)	__seqprop(s, preemptible)
+#define __seqcount_assert_associated_lock_held(s)		__seqprop(s, assert)

 /**
  * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
@@ -459,7 +459,7 @@ static inline int __do_read_seqcount_retry(const seqcount_t *s, unsigned start)
  */
 #define raw_write_seqcount_begin(s)					\
 do {									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seqcount_associated_lock_exists_and_is_preemptible(s))	\
 		preempt_disable();					\
 									\
 	__do_raw_write_seqcount_begin(__seqcount_ptr(s));		\
@@ -480,7 +480,7 @@ static inline void __do_raw_write_seqcount_begin(seqcount_t *s)
 do {									\
 	__do_raw_write_seqcount_end(__seqcount_ptr(s));			\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seqcount_associated_lock_exists_and_is_preemptible(s))	\
 		preempt_enable();					\
 } while (0)

@@ -501,9 +501,9 @@ static inline void __do_raw_write_seqcount_end(seqcount_t *s)
  */
 #define write_seqcount_begin_nested(s, subclass)			\
 do {									\
-	__seqcount_assert_lock_held(s);					\
+	__seqcount_assert_associated_lock_held(s);			\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seqcount_associated_lock_exists_and_is_preemptible(s))	\
 		preempt_disable();					\
 									\
 	__do_write_seqcount_begin_nested(__seqcount_ptr(s), subclass);	\
@@ -528,9 +528,9 @@ static inline void __do_write_seqcount_begin_nested(seqcount_t *s, int subclass)
  */
 #define write_seqcount_begin(s)						\
 do {									\
-	__seqcount_assert_lock_held(s);					\
+	__seqcount_assert_associated_lock_held(s);			\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seqcount_associated_lock_exists_and_is_preemptible(s))	\
 		preempt_disable();					\
 									\
 	__do_write_seqcount_begin(__seqcount_ptr(s));			\
@@ -551,7 +551,7 @@ static inline void __do_write_seqcount_begin(seqcount_t *s)
 do {									\
 	__do_write_seqcount_end(__seqcount_ptr(s));			\
 									\
-	if (__seqcount_lock_preemptible(s))				\
+	if (__seqcount_associated_lock_exists_and_is_preemptible(s))	\
 		preempt_enable();					\
 } while (0)

>                    Linus

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 59%]

* Re: [PATCH v2 2/2] mm: prevent gup_fast from racing with COW during fork
  @ 2020-11-04  3:17 94%                       ` Ahmed S. Darwish
    0 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-11-04  3:17 UTC (permalink / raw)
  To: John Hubbard
  Cc: Linus Torvalds, Jason Gunthorpe, Peter Xu,
	Linux Kernel Mailing List, Andrea Arcangeli, Andrew Morton,
	Aneesh Kumar K.V, Christoph Hellwig, Hugh Dickins, Jan Kara,
	Jann Horn, Kirill Shutemov, Kirill Tkhai, Leon Romanovsky,
	Linux-MM, Michal Hocko, Oleg Nesterov, Peter Zijlstra,
	Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian Siewior

On Tue, Nov 03, 2020 at 06:01:30PM -0800, John Hubbard wrote:
> On 11/3/20 5:32 PM, Ahmed S. Darwish wrote:
...
> >   #define __read_seqcount_retry(s, start)					\
> > -	__read_seqcount_t_retry(__seqcount_ptr(s), start)
> > +	__do___read_seqcount_retry(__seqcount_ptr(s), start)
>
...
> A nit: while various numbers of leading underscores are sometimes used, it's a lot
> less common to use, say, 3 consecutive underscores (as above) *within* the name. And
> I don't think you need it for uniqueness, at least from a quick look around here.
>
...
> But again, either way, I think "do" is helping a *lot* here (as is getting rid
> of the _t_ idea).

The three underscores are needed because there's a do_ version for
read_seqcount_retry(), and another for __read_seqcount_retry().

Similarly for {__,}read_seqcount_begin(). You want to be very careful
with this, and never mistaknely mix the two, because it affects some VFS
hot paths.

Nonetheless, as you mentioned in the later (dropped) part of your
message, I think do_ is better than __do_, so the final result will be:

  do___read_seqcount_retry()
  do_read_seqcount_retry()
  do_raw_write_seqcount_begin()
  do_raw_write_seqcount_end()
  do_write_seqcount_begin()
  ...

and so on.

I'll wait for some further feedback on the two patches (possibly from
Linus or PeterZ), then send a mini patch series.

(This shouldn't block a v3 of Jason's mm patch series though, as it will
 be using the external seqlock.h APIs anyway...).

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 94%]

* Re: [PATCH v2 2/2] mm: prevent gup_fast from racing with COW during fork
  @ 2020-11-04 19:54 99%                           ` Ahmed S. Darwish
  2020-12-09 18:38 60%                           ` [tip: locking/core] seqlock: Prefix internal seqcount_t-only macros with a "do_" tip-bot2 for Ahmed S. Darwish
  2020-12-09 18:38 75%                           ` [tip: locking/core] seqlock: kernel-doc: Specify when preemption is automatically altered tip-bot2 for Ahmed S. Darwish
  2 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-11-04 19:54 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: John Hubbard, Jason Gunthorpe, Peter Xu,
	Linux Kernel Mailing List, Andrea Arcangeli, Andrew Morton,
	Aneesh Kumar K.V, Christoph Hellwig, Hugh Dickins, Jan Kara,
	Jann Horn, Kirill Shutemov, Kirill Tkhai, Leon Romanovsky,
	Linux-MM, Michal Hocko, Oleg Nesterov, Peter Zijlstra,
	Ingo Molnar, Will Deacon, Thomas Gleixner, Sebastian Siewior

On Wed, Nov 04, 2020 at 10:38:57AM -0800, Linus Torvalds wrote:
...
>
> Looks reasonable to me.
>
> And can you add a few comments to the magic type macros, so that it's
> a lot more obvious what the end result was.
...
>
> I can see it when I really look, but when looking at the actual use,
> it's very non-obvious indeed.
>

ACK, will do.

>                  Linus

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* Re: [PATCH v3 2/2] mm: prevent gup_fast from racing with COW during fork
       [not found]     ` <2-v3-7358966cab09+14e9-gup_fork_jgg@nvidia.com>
@ 2020-11-06 17:17 97%   ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-11-06 17:17 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: linux-kernel, Peter Xu, Linus Torvalds, Andrea Arcangeli,
	Andrew Morton, Aneesh Kumar K.V, Christoph Hellwig, Hugh Dickins,
	Jan Kara, Jann Horn, John Hubbard, Kirill Shutemov, Kirill Tkhai,
	Leon Romanovsky, Linux-MM, Michal Hocko, Oleg Nesterov,
	Peter Zijlstra, Sebastian A. Siewior, Thomas Gleixner

Hi Jason,

On Fri, Nov 06, 2020 at 11:55:14AM -0400, Jason Gunthorpe wrote:
...
> +	if (gup_flags & FOLL_PIN) {
> +		seq = raw_read_seqcount(&current->mm->write_protect_seq);
> +		if (seq & 1)
> +			return 0;
> +	}
> +
...
> +	if (gup_flags & FOLL_PIN) {
> +		if (read_seqcount_t_retry(&current->mm->write_protect_seq,
> +					  seq)) {
> +			unpin_user_pages(pages, nr_pinned);
> +			return 0;
> +		}
> +	}

From seqlock.h:

    /**
     * raw_read_seqcount() - read the raw seqcount_t counter value
     * ...
     * Return: count to be passed to read_seqcount_retry()
     */
    #define raw_read_seqcount(s)

Please avoid using the internal API (read_seqcount_*t*_retry) and just
use read_seqcount_retry() as the documentation suggests.

(I guess you just missed changing that last one... I'm in process of
 changing all these "*_seqcount_t_*" stuff to "do_*" as we talked on the
 v2 thread. Hopefully there will be no more confusion after that.)

Kind regards,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 97%]

* Re: [PATCH v3 0/2] Add a seqcount between gup_fast and copy_page_range()
       [not found]     <0-v3-7358966cab09+14e9-gup_fork_jgg@nvidia.com>
       [not found]     ` <2-v3-7358966cab09+14e9-gup_fork_jgg@nvidia.com>
@ 2020-11-06 18:52 99% ` Ahmed S. Darwish
  1 sibling, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-11-06 18:52 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: linux-kernel, Peter Xu, Linus Torvalds, Andrea Arcangeli,
	Andrew Morton, Aneesh Kumar K.V, Christoph Hellwig, Hugh Dickins,
	Jan Kara, Jann Horn, John Hubbard, Kirill Shutemov, Kirill Tkhai,
	Leon Romanovsky, Linux-MM, Michal Hocko, Oleg Nesterov,
	Konstantin Ryabitsev, Sebastian A. Siewior, Thomas Gleixner

On Fri, Nov 06, 2020 at 11:55:12AM -0400, Jason Gunthorpe wrote:
...
>
>  arch/x86/kernel/tboot.c    |   1 +
>  drivers/firmware/efi/efi.c |   1 +
>  include/linux/mm_types.h   |   8 +++
>  kernel/fork.c              |   1 +
>  mm/gup.c                   | 118 +++++++++++++++++++++++--------------
>  mm/init-mm.c               |   1 +
>  mm/memory.c                |  13 +++-
>  7 files changed, 97 insertions(+), 46 deletions(-)
>

Nitpick: Please also use the "--base" option of git format-patch. This
will produce a nice "base-commit: " tag behind the diffstat.

Konstantin's amazing "b4" tool gets much happier with that ;-)

All the best,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* Re: [PATCH v4 2/2] mm: prevent gup_fast from racing with COW during fork
       [not found]     ` <2-v4-908497cf359a+4782-gup_fork_jgg@nvidia.com>
@ 2020-11-12  7:41 99%   ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-11-12  7:41 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: linux-kernel, Peter Xu, Linus Torvalds, Andrea Arcangeli,
	Andrew Morton, Aneesh Kumar K.V, Christoph Hellwig, Hugh Dickins,
	Jan Kara, Jann Horn, John Hubbard, Kirill Shutemov, Kirill Tkhai,
	Leon Romanovsky, Linux-MM, Michal Hocko, Oleg Nesterov

On Tue, Nov 10, 2020 at 07:44:09PM -0400, Jason Gunthorpe wrote:
...
>
> Fixes: f3c64eda3e50 ("mm: avoid early COW write protect games during fork()")
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Link: https://lore.kernel.org/r/CAHk-=wi=iCnYCARbPGjkVJu9eyYeZ13N64tZYLdOB8CP5Q_PLw@mail.gmail.com
> Reviewed-by: John Hubbard <jhubbard@nvidia.com>
> Reviewed-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---

Thanks for the v3 and v4 updates.

For the seqcount_t parts:

  Acked-by: "Ahmed S. Darwish" <a.darwish@linutronix.de>

^ permalink raw reply	[relevance 99%]

* [PATCH v2] scsi: NCR5380: Remove context check
    @ 2020-12-04 15:32 49% ` Ahmed S. Darwish
  1 sibling, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-04 15:32 UTC (permalink / raw)
  To: Finn Thain
  Cc: Michael Schmitz, Geert Uytterhoeven, James E.J. Bottomley,
	Martin K. Petersen, Sebastian A. Siewior, Thomas Gleixner,
	linux-scsi, linux-kernel, Ahmed S. Darwish

NCR5380_poll_politely2() uses in_interrupt() and irqs_disabled() to
check if it is safe to sleep.

Such usage in drivers is phased out and Linus clearly requested that
code which changes behaviour depending on context should either be
separated, or the context be explicitly conveyed in an argument passed
by the caller.

Below is a context analysis of NCR5380_poll_politely2() uppermost
callers:

  - NCR5380_maybe_reset_bus(), task, invoked during device probe.
    -> NCR5380_poll_politely()
    -> do_abort()

  - NCR5380_select(), task, but can only sleep in the "release, then
    re-acquire" regions of the spinlock held by its caller.
    Sleeping invocations (lock released):
    -> NCR5380_poll_politely2()

    Atomic invocations (lock acquired):
    -> NCR5380_reselect()
       -> NCR5380_poll_politely()
       -> do_abort()
       -> NCR5380_transfer_pio()

  - NCR5380_intr(), interrupt handler
    -> NCR5380_dma_complete()
       -> NCR5380_transfer_pio()
	  -> NCR5380_poll_politely()
    -> NCR5380_reselect() (see above)

  - NCR5380_information_transfer(), task, but can only sleep in the
    "release, then re-acquire" regions of the caller-held spinlock.
    Sleeping invocations (lock released):
      - NCR5380_transfer_pio() -> NCR5380_poll_politely()
      - NCR5380_poll_politely()

    Atomic invocations (lock acquired):
      - NCR5380_transfer_dma()
	-> NCR5380_dma_recv_setup()
           => generic_NCR5380_precv() -> NCR5380_poll_politely()
	   => macscsi_pread() -> NCR5380_poll_politely()

	-> NCR5380_dma_send_setup()
 	   => generic_NCR5380_psend -> NCR5380_poll_politely2()
	   => macscsi_pwrite() -> NCR5380_poll_politely()

	-> NCR5380_poll_politely2()
        -> NCR5380_dma_complete()
           -> NCR5380_transfer_pio()
	      -> NCR5380_poll_politely()
      - NCR5380_transfer_pio() -> NCR5380_poll_politely

  - NCR5380_reselect(), atomic, always called with hostdata spinlock
    held.

Since NCR5380_poll_politely2() already takes a "wait" argument in
jiffies, use it to determine if the function can sleep. Modify atomic
callers, which passed an unused wait value in terms of HZ, to pass zero.

Suggested-by: Finn Thain <fthain@telegraphics.com.au>
Co-developed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: Michael Schmitz <schmitzmic@gmail.com>
Cc: <linux-m68k@lists.linux-m68k.org>
---
 drivers/scsi/NCR5380.c   | 77 ++++++++++++++++++++++------------------
 drivers/scsi/NCR5380.h   |  3 +-
 drivers/scsi/g_NCR5380.c | 12 +++----
 drivers/scsi/mac_scsi.c  | 10 +++---
 4 files changed, 55 insertions(+), 47 deletions(-)

diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index d654a6cc4162..60200f61592e 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -132,7 +132,7 @@
 static unsigned int disconnect_mask = ~0;
 module_param(disconnect_mask, int, 0444);
 
-static int do_abort(struct Scsi_Host *);
+static int do_abort(struct Scsi_Host *, unsigned int);
 static void do_reset(struct Scsi_Host *);
 static void bus_reset_cleanup(struct Scsi_Host *);
 
@@ -197,7 +197,7 @@ static inline void set_resid_from_SCp(struct scsi_cmnd *cmd)
  * @reg2: Second 5380 register to poll
  * @bit2: Second bitmask to check
  * @val2: Second expected value
- * @wait: Time-out in jiffies
+ * @wait: Time-out in jiffies, 0 if sleeping is not allowed
  *
  * Polls the chip in a reasonably efficient manner waiting for an
  * event to occur. After a short quick poll we begin to yield the CPU
@@ -213,7 +213,7 @@ static int NCR5380_poll_politely2(struct NCR5380_hostdata *hostdata,
                                   unsigned long wait)
 {
 	unsigned long n = hostdata->poll_loops;
-	unsigned long deadline = jiffies + wait;
+	unsigned long deadline;
 
 	do {
 		if ((NCR5380_read(reg1) & bit1) == val1)
@@ -223,10 +223,11 @@ static int NCR5380_poll_politely2(struct NCR5380_hostdata *hostdata,
 		cpu_relax();
 	} while (n--);
 
-	if (irqs_disabled() || in_interrupt())
+	if (!wait)
 		return -ETIMEDOUT;
 
 	/* Repeatedly sleep for 1 ms until deadline */
+	deadline = jiffies + wait;
 	while (time_is_after_jiffies(deadline)) {
 		schedule_timeout_uninterruptible(1);
 		if ((NCR5380_read(reg1) & bit1) == val1)
@@ -486,7 +487,7 @@ static int NCR5380_maybe_reset_bus(struct Scsi_Host *instance)
 			break;
 		case 2:
 			shost_printk(KERN_ERR, instance, "bus busy, attempting abort\n");
-			do_abort(instance);
+			do_abort(instance, 1);
 			break;
 		case 4:
 			shost_printk(KERN_ERR, instance, "bus busy, attempting reset\n");
@@ -818,7 +819,7 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
 			if (toPIO > 0) {
 				dsprintk(NDEBUG_DMA, instance,
 				         "Doing %d byte PIO to 0x%p\n", cnt, *data);
-				NCR5380_transfer_pio(instance, &p, &cnt, data);
+				NCR5380_transfer_pio(instance, &p, &cnt, data, 0);
 				*count -= toPIO - cnt;
 			}
 		}
@@ -1185,7 +1186,7 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 		goto out;
 	}
 	if (!hostdata->selecting) {
-		do_abort(instance);
+		do_abort(instance, 0);
 		return false;
 	}
 
@@ -1196,7 +1197,7 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 	len = 1;
 	data = tmp;
 	phase = PHASE_MSGOUT;
-	NCR5380_transfer_pio(instance, &phase, &len, &data);
+	NCR5380_transfer_pio(instance, &phase, &len, &data, 0);
 	if (len) {
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 		cmd->result = DID_ERROR << 16;
@@ -1234,7 +1235,8 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
  *
  * Inputs : instance - instance of driver, *phase - pointer to
  * what phase is expected, *count - pointer to number of
- * bytes to transfer, **data - pointer to data pointer.
+ * bytes to transfer, **data - pointer to data pointer,
+ * can_sleep - 1 or 0 when sleeping is permitted or not, respectively.
  *
  * Returns : -1 when different phase is entered without transferring
  * maximum number of bytes, 0 if all bytes are transferred or exit
@@ -1253,7 +1255,7 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 
 static int NCR5380_transfer_pio(struct Scsi_Host *instance,
 				unsigned char *phase, int *count,
-				unsigned char **data)
+				unsigned char **data, unsigned int can_sleep)
 {
 	struct NCR5380_hostdata *hostdata = shost_priv(instance);
 	unsigned char p = *phase, tmp;
@@ -1274,7 +1276,8 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
 		 * valid
 		 */
 
-		if (NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, SR_REQ, HZ) < 0)
+		if (NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, SR_REQ,
+					  HZ * can_sleep) < 0)
 			break;
 
 		dsprintk(NDEBUG_HANDSHAKE, instance, "REQ asserted\n");
@@ -1320,7 +1323,7 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
 		}
 
 		if (NCR5380_poll_politely(hostdata,
-		                          STATUS_REG, SR_REQ, 0, 5 * HZ) < 0)
+		                          STATUS_REG, SR_REQ, 0, 5 * HZ * can_sleep) < 0)
 			break;
 
 		dsprintk(NDEBUG_HANDSHAKE, instance, "REQ negated, handshake complete\n");
@@ -1395,11 +1398,12 @@ static void do_reset(struct Scsi_Host *instance)
  * do_abort - abort the currently established nexus by going to
  * MESSAGE OUT phase and sending an ABORT message.
  * @instance: relevant scsi host instance
+ * @can_sleep: 1 or 0 when sleeping is permitted or not, respectively
  *
  * Returns 0 on success, negative error code on failure.
  */
 
-static int do_abort(struct Scsi_Host *instance)
+static int do_abort(struct Scsi_Host *instance, unsigned int can_sleep)
 {
 	struct NCR5380_hostdata *hostdata = shost_priv(instance);
 	unsigned char *msgptr, phase, tmp;
@@ -1419,7 +1423,8 @@ static int do_abort(struct Scsi_Host *instance)
 	 * the target sees, so we just handshake.
 	 */
 
-	rc = NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, SR_REQ, 10 * HZ);
+	rc = NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, SR_REQ,
+				   10 * HZ * can_sleep);
 	if (rc < 0)
 		goto out;
 
@@ -1430,7 +1435,8 @@ static int do_abort(struct Scsi_Host *instance)
 	if (tmp != PHASE_MSGOUT) {
 		NCR5380_write(INITIATOR_COMMAND_REG,
 		              ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
-		rc = NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, 0, 3 * HZ);
+		rc = NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, 0,
+					   3 * HZ * can_sleep);
 		if (rc < 0)
 			goto out;
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
@@ -1440,7 +1446,7 @@ static int do_abort(struct Scsi_Host *instance)
 	msgptr = &tmp;
 	len = 1;
 	phase = PHASE_MSGOUT;
-	NCR5380_transfer_pio(instance, &phase, &len, &msgptr);
+	NCR5380_transfer_pio(instance, &phase, &len, &msgptr, can_sleep);
 	if (len)
 		rc = -ENXIO;
 
@@ -1619,12 +1625,12 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
 			 */
 
 			if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
-			                          BASR_DRQ, BASR_DRQ, HZ) < 0) {
+			                          BASR_DRQ, BASR_DRQ, 0) < 0) {
 				result = -1;
 				shost_printk(KERN_ERR, instance, "PDMA read: DRQ timeout\n");
 			}
 			if (NCR5380_poll_politely(hostdata, STATUS_REG,
-			                          SR_REQ, 0, HZ) < 0) {
+			                          SR_REQ, 0, 0) < 0) {
 				result = -1;
 				shost_printk(KERN_ERR, instance, "PDMA read: !REQ timeout\n");
 			}
@@ -1636,7 +1642,7 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
 			 */
 			if (NCR5380_poll_politely2(hostdata,
 			     BUS_AND_STATUS_REG, BASR_DRQ, BASR_DRQ,
-			     BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, HZ) < 0) {
+			     BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, 0) < 0) {
 				result = -1;
 				shost_printk(KERN_ERR, instance, "PDMA write: DRQ and phase timeout\n");
 			}
@@ -1733,7 +1739,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 #if (NDEBUG & NDEBUG_NO_DATAOUT)
 				shost_printk(KERN_DEBUG, instance, "NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n");
 				sink = 1;
-				do_abort(instance);
+				do_abort(instance, 0);
 				cmd->result = DID_ERROR << 16;
 				complete_cmd(instance, cmd);
 				hostdata->connected = NULL;
@@ -1789,7 +1795,8 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 							   NCR5380_PIO_CHUNK_SIZE);
 					len = transfersize;
 					NCR5380_transfer_pio(instance, &phase, &len,
-					                     (unsigned char **)&cmd->SCp.ptr);
+					                     (unsigned char **)&cmd->SCp.ptr,
+							     0);
 					cmd->SCp.this_residual -= transfersize - len;
 				}
 #ifdef CONFIG_SUN3
@@ -1800,7 +1807,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 			case PHASE_MSGIN:
 				len = 1;
 				data = &tmp;
-				NCR5380_transfer_pio(instance, &phase, &len, &data);
+				NCR5380_transfer_pio(instance, &phase, &len, &data, 0);
 				cmd->SCp.Message = tmp;
 
 				switch (tmp) {
@@ -1907,7 +1914,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 					len = 2;
 					data = extended_msg + 1;
 					phase = PHASE_MSGIN;
-					NCR5380_transfer_pio(instance, &phase, &len, &data);
+					NCR5380_transfer_pio(instance, &phase, &len, &data, 1);
 					dsprintk(NDEBUG_EXTENDED, instance, "length %d, code 0x%02x\n",
 					         (int)extended_msg[1],
 					         (int)extended_msg[2]);
@@ -1920,7 +1927,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 						data = extended_msg + 3;
 						phase = PHASE_MSGIN;
 
-						NCR5380_transfer_pio(instance, &phase, &len, &data);
+						NCR5380_transfer_pio(instance, &phase, &len, &data, 1);
 						dsprintk(NDEBUG_EXTENDED, instance, "message received, residual %d\n",
 						         len);
 
@@ -1967,7 +1974,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 				len = 1;
 				data = &msgout;
 				hostdata->last_message = msgout;
-				NCR5380_transfer_pio(instance, &phase, &len, &data);
+				NCR5380_transfer_pio(instance, &phase, &len, &data, 0);
 				if (msgout == ABORT) {
 					hostdata->connected = NULL;
 					hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun);
@@ -1986,12 +1993,12 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 				 * PSEUDO-DMA architecture we should probably
 				 * use the dma transfer function.
 				 */
-				NCR5380_transfer_pio(instance, &phase, &len, &data);
+				NCR5380_transfer_pio(instance, &phase, &len, &data, 0);
 				break;
 			case PHASE_STATIN:
 				len = 1;
 				data = &tmp;
-				NCR5380_transfer_pio(instance, &phase, &len, &data);
+				NCR5380_transfer_pio(instance, &phase, &len, &data, 0);
 				cmd->SCp.Status = tmp;
 				break;
 			default:
@@ -2050,7 +2057,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
 
 	NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY);
 	if (NCR5380_poll_politely(hostdata,
-	                          STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) {
+	                          STATUS_REG, SR_SEL, 0, 0) < 0) {
 		shost_printk(KERN_ERR, instance, "reselect: !SEL timeout\n");
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 		return;
@@ -2062,12 +2069,12 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
 	 */
 
 	if (NCR5380_poll_politely(hostdata,
-	                          STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) {
+	                          STATUS_REG, SR_REQ, SR_REQ, 0) < 0) {
 		if ((NCR5380_read(STATUS_REG) & (SR_BSY | SR_SEL)) == 0)
 			/* BUS FREE phase */
 			return;
 		shost_printk(KERN_ERR, instance, "reselect: REQ timeout\n");
-		do_abort(instance);
+		do_abort(instance, 0);
 		return;
 	}
 
@@ -2083,10 +2090,10 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
 		unsigned char *data = msg;
 		unsigned char phase = PHASE_MSGIN;
 
-		NCR5380_transfer_pio(instance, &phase, &len, &data);
+		NCR5380_transfer_pio(instance, &phase, &len, &data, 0);
 
 		if (len) {
-			do_abort(instance);
+			do_abort(instance, 0);
 			return;
 		}
 	}
@@ -2096,7 +2103,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
 		shost_printk(KERN_ERR, instance, "expecting IDENTIFY message, got ");
 		spi_print_msg(msg);
 		printk("\n");
-		do_abort(instance);
+		do_abort(instance, 0);
 		return;
 	}
 	lun = msg[0] & 0x07;
@@ -2136,7 +2143,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
 		 * Since we have an established nexus that we can't do anything
 		 * with, we must abort it.
 		 */
-		if (do_abort(instance) == 0)
+		if (do_abort(instance, 0) == 0)
 			hostdata->busy[target] &= ~(1 << lun);
 		return;
 	}
@@ -2283,7 +2290,7 @@ static int NCR5380_abort(struct scsi_cmnd *cmd)
 		dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
 		hostdata->connected = NULL;
 		hostdata->dma_len = 0;
-		if (do_abort(instance) < 0) {
+		if (do_abort(instance, 0) < 0) {
 			set_host_byte(cmd, DID_ERROR);
 			complete_cmd(instance, cmd);
 			result = FAILED;
diff --git a/drivers/scsi/NCR5380.h b/drivers/scsi/NCR5380.h
index 5935fd6d1a05..8a3b41932288 100644
--- a/drivers/scsi/NCR5380.h
+++ b/drivers/scsi/NCR5380.h
@@ -277,7 +277,8 @@ static const char *NCR5380_info(struct Scsi_Host *instance);
 static void NCR5380_reselect(struct Scsi_Host *instance);
 static bool NCR5380_select(struct Scsi_Host *, struct scsi_cmnd *);
 static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data);
-static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data);
+static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data,
+				unsigned int can_sleep);
 static int NCR5380_poll_politely2(struct NCR5380_hostdata *,
                                   unsigned int, u8, u8,
                                   unsigned int, u8, u8, unsigned long);
diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 29e4cdcade72..2df2f38a9b12 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -529,14 +529,14 @@ static inline int generic_NCR5380_precv(struct NCR5380_hostdata *hostdata,
 		if (start == len - 128) {
 			/* Ignore End of DMA interrupt for the final buffer */
 			if (NCR5380_poll_politely(hostdata, hostdata->c400_ctl_status,
-			                          CSR_HOST_BUF_NOT_RDY, 0, HZ / 64) < 0)
+			                          CSR_HOST_BUF_NOT_RDY, 0, 0) < 0)
 				break;
 		} else {
 			if (NCR5380_poll_politely2(hostdata, hostdata->c400_ctl_status,
 			                           CSR_HOST_BUF_NOT_RDY, 0,
 			                           hostdata->c400_ctl_status,
 			                           CSR_GATED_53C80_IRQ,
-			                           CSR_GATED_53C80_IRQ, HZ / 64) < 0 ||
+			                           CSR_GATED_53C80_IRQ, 0) < 0 ||
 			    NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
 				break;
 		}
@@ -565,7 +565,7 @@ static inline int generic_NCR5380_precv(struct NCR5380_hostdata *hostdata,
 	if (residual == 0 && NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
 	                                           BASR_END_DMA_TRANSFER,
 	                                           BASR_END_DMA_TRANSFER,
-	                                           HZ / 64) < 0)
+						   0) < 0)
 		scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA timeout\n",
 		            __func__);
 
@@ -597,7 +597,7 @@ static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
 		                           CSR_HOST_BUF_NOT_RDY, 0,
 		                           hostdata->c400_ctl_status,
 		                           CSR_GATED_53C80_IRQ,
-		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0 ||
+		                           CSR_GATED_53C80_IRQ, 0) < 0 ||
 		    NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY) {
 			/* Both 128 B buffers are in use */
 			if (start >= 128)
@@ -644,13 +644,13 @@ static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
 	if (residual == 0) {
 		if (NCR5380_poll_politely(hostdata, TARGET_COMMAND_REG,
 		                          TCR_LAST_BYTE_SENT, TCR_LAST_BYTE_SENT,
-		                          HZ / 64) < 0)
+					  0) < 0)
 			scmd_printk(KERN_ERR, hostdata->connected,
 			            "%s: Last Byte Sent timeout\n", __func__);
 
 		if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
 		                          BASR_END_DMA_TRANSFER, BASR_END_DMA_TRANSFER,
-		                          HZ / 64) < 0)
+					  0) < 0)
 			scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA timeout\n",
 			            __func__);
 	}
diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c
index b5dde9d0d054..5c808fbc6ce2 100644
--- a/drivers/scsi/mac_scsi.c
+++ b/drivers/scsi/mac_scsi.c
@@ -285,7 +285,7 @@ static inline int macscsi_pread(struct NCR5380_hostdata *hostdata,
 
 	while (!NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
 	                              BASR_DRQ | BASR_PHASE_MATCH,
-	                              BASR_DRQ | BASR_PHASE_MATCH, HZ / 64)) {
+	                              BASR_DRQ | BASR_PHASE_MATCH, 0)) {
 		int bytes;
 
 		if (macintosh_config->ident == MAC_MODEL_IIFX)
@@ -304,7 +304,7 @@ static inline int macscsi_pread(struct NCR5380_hostdata *hostdata,
 
 		if (NCR5380_poll_politely2(hostdata, STATUS_REG, SR_REQ, SR_REQ,
 		                           BUS_AND_STATUS_REG, BASR_ACK,
-		                           BASR_ACK, HZ / 64) < 0)
+		                           BASR_ACK, 0) < 0)
 			scmd_printk(KERN_DEBUG, hostdata->connected,
 			            "%s: !REQ and !ACK\n", __func__);
 		if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH))
@@ -344,7 +344,7 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata,
 
 	while (!NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
 	                              BASR_DRQ | BASR_PHASE_MATCH,
-	                              BASR_DRQ | BASR_PHASE_MATCH, HZ / 64)) {
+	                              BASR_DRQ | BASR_PHASE_MATCH, 0)) {
 		int bytes;
 
 		if (macintosh_config->ident == MAC_MODEL_IIFX)
@@ -362,7 +362,7 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata,
 			if (NCR5380_poll_politely(hostdata, TARGET_COMMAND_REG,
 			                          TCR_LAST_BYTE_SENT,
 			                          TCR_LAST_BYTE_SENT,
-			                          HZ / 64) < 0) {
+			                          0) < 0) {
 				scmd_printk(KERN_ERR, hostdata->connected,
 				            "%s: Last Byte Sent timeout\n", __func__);
 				result = -1;
@@ -372,7 +372,7 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata,
 
 		if (NCR5380_poll_politely2(hostdata, STATUS_REG, SR_REQ, SR_REQ,
 		                           BUS_AND_STATUS_REG, BASR_ACK,
-		                           BASR_ACK, HZ / 64) < 0)
+		                           BASR_ACK, 0) < 0)
 			scmd_printk(KERN_DEBUG, hostdata->connected,
 			            "%s: !REQ and !ACK\n", __func__);
 		if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH))
-- 
2.29.2


^ permalink raw reply	[relevance 49%]

* Re: [PATCH] scsi/NCR5380: Remove in_interrupt() test
  @ 2020-12-04 16:08 99%     ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-04 16:08 UTC (permalink / raw)
  To: Finn Thain
  Cc: Sebastian Andrzej Siewior, Michael Schmitz, James E.J. Bottomley,
	Martin K. Petersen, Thomas Gleixner, linux-scsi, linux-kernel

On Fri, Dec 04, 2020 at 10:08:08AM +1100, Finn Thain wrote:
...
>
> You've put your finger on a known problem with certain
> NCR5380_poll_politely() call sites. That is, the nominal timeout, HZ / 64,
> is meaningless because it is ignored in atomic context. So you may as well
> specify 0 jiffies at these call sites. (There will be a 1 jiffy timeout
> applied regardless.)
...
>
> However, I can see the value in your approach, i.e. passing a zero timeout
> to NCR5380_poll_politely() whenever that argument is unused. And I agree
> that this could then be used to inhibit sleeping, rather than testing
> irqs_disabled().
>
> So if you really don't like irqs_disabled(), perhaps you can just keep the
> better parts of your two attempts, i.e. passing 0 to
> NCR5380_poll_politely() where appropriate and facilitating that by adding
> a new can_sleep parameter to do_abort() and NCR5380_transfer_pio(), as in,
...
>
> Does that sound like a reasonable compromise?
>

Yes, of course. Thanks a lot.

I've sent a v2.

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* [PATCH v3] scsi: NCR5380: Remove context check
  @ 2020-12-06  7:51 50% ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-06  7:51 UTC (permalink / raw)
  To: Finn Thain
  Cc: Michael Schmitz, Geert Uytterhoeven, James E.J. Bottomley,
	Martin K. Petersen, Sebastian A. Siewior, Thomas Gleixner,
	linux-scsi, linux-kernel, Ahmed S. Darwish

NCR5380_poll_politely2() uses in_interrupt() and irqs_disabled() to
check if it is safe to sleep.

Such usage in drivers is phased out and Linus clearly requested that
code which changes behaviour depending on context should either be
separated, or the context be explicitly conveyed in an argument passed
by the caller.

Below is a context analysis of NCR5380_poll_politely2() uppermost
callers:

  - NCR5380_maybe_reset_bus(), task, invoked during device probe.
    -> NCR5380_poll_politely()
    -> do_abort()

  - NCR5380_select(), task, but can only sleep in the "release, then
    re-acquire" regions of the spinlock held by its caller.
    Sleeping invocations (lock released):
    -> NCR5380_poll_politely2()

    Atomic invocations (lock acquired):
    -> NCR5380_reselect()
       -> NCR5380_poll_politely()
       -> do_abort()
       -> NCR5380_transfer_pio()

  - NCR5380_intr(), interrupt handler
    -> NCR5380_dma_complete()
       -> NCR5380_transfer_pio()
	  -> NCR5380_poll_politely()
    -> NCR5380_reselect() (see above)

  - NCR5380_information_transfer(), task, but can only sleep in the
    "release, then re-acquire" regions of the caller-held spinlock.
    Sleeping invocations (lock released):
      - NCR5380_transfer_pio() -> NCR5380_poll_politely()
      - NCR5380_poll_politely()

    Atomic invocations (lock acquired):
      - NCR5380_transfer_dma()
	-> NCR5380_dma_recv_setup()
           => generic_NCR5380_precv() -> NCR5380_poll_politely()
	   => macscsi_pread() -> NCR5380_poll_politely()

	-> NCR5380_dma_send_setup()
 	   => generic_NCR5380_psend -> NCR5380_poll_politely2()
	   => macscsi_pwrite() -> NCR5380_poll_politely()

	-> NCR5380_poll_politely2()
        -> NCR5380_dma_complete()
           -> NCR5380_transfer_pio()
	      -> NCR5380_poll_politely()
      - NCR5380_transfer_pio() -> NCR5380_poll_politely

  - NCR5380_reselect(), atomic, always called with hostdata spinlock
    held.

Since NCR5380_poll_politely2() already takes a "wait" argument in
jiffies, use it to determine if the function can sleep. Modify atomic
callers, which passed an unused wait value in terms of HZ, to pass zero.

Suggested-by: Finn Thain <fthain@telegraphics.com.au>
Co-developed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: Michael Schmitz <schmitzmic@gmail.com>
Cc: <linux-m68k@lists.linux-m68k.org>
---
 drivers/scsi/NCR5380.c   | 74 ++++++++++++++++++++++------------------
 drivers/scsi/NCR5380.h   |  3 +-
 drivers/scsi/g_NCR5380.c | 12 +++----
 drivers/scsi/mac_scsi.c  | 10 +++---
 4 files changed, 53 insertions(+), 46 deletions(-)

diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index d654a6cc4162..448cd22214c0 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -132,7 +132,7 @@
 static unsigned int disconnect_mask = ~0;
 module_param(disconnect_mask, int, 0444);
 
-static int do_abort(struct Scsi_Host *);
+static int do_abort(struct Scsi_Host *, unsigned int);
 static void do_reset(struct Scsi_Host *);
 static void bus_reset_cleanup(struct Scsi_Host *);
 
@@ -197,7 +197,7 @@ static inline void set_resid_from_SCp(struct scsi_cmnd *cmd)
  * @reg2: Second 5380 register to poll
  * @bit2: Second bitmask to check
  * @val2: Second expected value
- * @wait: Time-out in jiffies
+ * @wait: Time-out in jiffies, 0 if sleeping is not allowed
  *
  * Polls the chip in a reasonably efficient manner waiting for an
  * event to occur. After a short quick poll we begin to yield the CPU
@@ -223,7 +223,7 @@ static int NCR5380_poll_politely2(struct NCR5380_hostdata *hostdata,
 		cpu_relax();
 	} while (n--);
 
-	if (irqs_disabled() || in_interrupt())
+	if (!wait)
 		return -ETIMEDOUT;
 
 	/* Repeatedly sleep for 1 ms until deadline */
@@ -486,7 +486,7 @@ static int NCR5380_maybe_reset_bus(struct Scsi_Host *instance)
 			break;
 		case 2:
 			shost_printk(KERN_ERR, instance, "bus busy, attempting abort\n");
-			do_abort(instance);
+			do_abort(instance, 1);
 			break;
 		case 4:
 			shost_printk(KERN_ERR, instance, "bus busy, attempting reset\n");
@@ -818,7 +818,7 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
 			if (toPIO > 0) {
 				dsprintk(NDEBUG_DMA, instance,
 				         "Doing %d byte PIO to 0x%p\n", cnt, *data);
-				NCR5380_transfer_pio(instance, &p, &cnt, data);
+				NCR5380_transfer_pio(instance, &p, &cnt, data, 0);
 				*count -= toPIO - cnt;
 			}
 		}
@@ -1185,7 +1185,7 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 		goto out;
 	}
 	if (!hostdata->selecting) {
-		do_abort(instance);
+		do_abort(instance, 0);
 		return false;
 	}
 
@@ -1196,7 +1196,7 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 	len = 1;
 	data = tmp;
 	phase = PHASE_MSGOUT;
-	NCR5380_transfer_pio(instance, &phase, &len, &data);
+	NCR5380_transfer_pio(instance, &phase, &len, &data, 0);
 	if (len) {
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 		cmd->result = DID_ERROR << 16;
@@ -1234,7 +1234,8 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
  *
  * Inputs : instance - instance of driver, *phase - pointer to
  * what phase is expected, *count - pointer to number of
- * bytes to transfer, **data - pointer to data pointer.
+ * bytes to transfer, **data - pointer to data pointer,
+ * can_sleep - 1 or 0 when sleeping is permitted or not, respectively.
  *
  * Returns : -1 when different phase is entered without transferring
  * maximum number of bytes, 0 if all bytes are transferred or exit
@@ -1253,7 +1254,7 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 
 static int NCR5380_transfer_pio(struct Scsi_Host *instance,
 				unsigned char *phase, int *count,
-				unsigned char **data)
+				unsigned char **data, unsigned int can_sleep)
 {
 	struct NCR5380_hostdata *hostdata = shost_priv(instance);
 	unsigned char p = *phase, tmp;
@@ -1274,7 +1275,8 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
 		 * valid
 		 */
 
-		if (NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, SR_REQ, HZ) < 0)
+		if (NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, SR_REQ,
+					  HZ * can_sleep) < 0)
 			break;
 
 		dsprintk(NDEBUG_HANDSHAKE, instance, "REQ asserted\n");
@@ -1320,7 +1322,7 @@ static int NCR5380_transfer_pio(struct Scsi_Host *instance,
 		}
 
 		if (NCR5380_poll_politely(hostdata,
-		                          STATUS_REG, SR_REQ, 0, 5 * HZ) < 0)
+		                          STATUS_REG, SR_REQ, 0, 5 * HZ * can_sleep) < 0)
 			break;
 
 		dsprintk(NDEBUG_HANDSHAKE, instance, "REQ negated, handshake complete\n");
@@ -1395,11 +1397,12 @@ static void do_reset(struct Scsi_Host *instance)
  * do_abort - abort the currently established nexus by going to
  * MESSAGE OUT phase and sending an ABORT message.
  * @instance: relevant scsi host instance
+ * @can_sleep: 1 or 0 when sleeping is permitted or not, respectively
  *
  * Returns 0 on success, negative error code on failure.
  */
 
-static int do_abort(struct Scsi_Host *instance)
+static int do_abort(struct Scsi_Host *instance, unsigned int can_sleep)
 {
 	struct NCR5380_hostdata *hostdata = shost_priv(instance);
 	unsigned char *msgptr, phase, tmp;
@@ -1419,7 +1422,8 @@ static int do_abort(struct Scsi_Host *instance)
 	 * the target sees, so we just handshake.
 	 */
 
-	rc = NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, SR_REQ, 10 * HZ);
+	rc = NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, SR_REQ,
+				   10 * HZ * can_sleep);
 	if (rc < 0)
 		goto out;
 
@@ -1430,7 +1434,8 @@ static int do_abort(struct Scsi_Host *instance)
 	if (tmp != PHASE_MSGOUT) {
 		NCR5380_write(INITIATOR_COMMAND_REG,
 		              ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
-		rc = NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, 0, 3 * HZ);
+		rc = NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, 0,
+					   3 * HZ * can_sleep);
 		if (rc < 0)
 			goto out;
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
@@ -1440,7 +1445,7 @@ static int do_abort(struct Scsi_Host *instance)
 	msgptr = &tmp;
 	len = 1;
 	phase = PHASE_MSGOUT;
-	NCR5380_transfer_pio(instance, &phase, &len, &msgptr);
+	NCR5380_transfer_pio(instance, &phase, &len, &msgptr, can_sleep);
 	if (len)
 		rc = -ENXIO;
 
@@ -1619,12 +1624,12 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
 			 */
 
 			if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
-			                          BASR_DRQ, BASR_DRQ, HZ) < 0) {
+			                          BASR_DRQ, BASR_DRQ, 0) < 0) {
 				result = -1;
 				shost_printk(KERN_ERR, instance, "PDMA read: DRQ timeout\n");
 			}
 			if (NCR5380_poll_politely(hostdata, STATUS_REG,
-			                          SR_REQ, 0, HZ) < 0) {
+			                          SR_REQ, 0, 0) < 0) {
 				result = -1;
 				shost_printk(KERN_ERR, instance, "PDMA read: !REQ timeout\n");
 			}
@@ -1636,7 +1641,7 @@ static int NCR5380_transfer_dma(struct Scsi_Host *instance,
 			 */
 			if (NCR5380_poll_politely2(hostdata,
 			     BUS_AND_STATUS_REG, BASR_DRQ, BASR_DRQ,
-			     BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, HZ) < 0) {
+			     BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, 0) < 0) {
 				result = -1;
 				shost_printk(KERN_ERR, instance, "PDMA write: DRQ and phase timeout\n");
 			}
@@ -1733,7 +1738,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 #if (NDEBUG & NDEBUG_NO_DATAOUT)
 				shost_printk(KERN_DEBUG, instance, "NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n");
 				sink = 1;
-				do_abort(instance);
+				do_abort(instance, 0);
 				cmd->result = DID_ERROR << 16;
 				complete_cmd(instance, cmd);
 				hostdata->connected = NULL;
@@ -1789,7 +1794,8 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 							   NCR5380_PIO_CHUNK_SIZE);
 					len = transfersize;
 					NCR5380_transfer_pio(instance, &phase, &len,
-					                     (unsigned char **)&cmd->SCp.ptr);
+					                     (unsigned char **)&cmd->SCp.ptr,
+							     0);
 					cmd->SCp.this_residual -= transfersize - len;
 				}
 #ifdef CONFIG_SUN3
@@ -1800,7 +1806,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 			case PHASE_MSGIN:
 				len = 1;
 				data = &tmp;
-				NCR5380_transfer_pio(instance, &phase, &len, &data);
+				NCR5380_transfer_pio(instance, &phase, &len, &data, 0);
 				cmd->SCp.Message = tmp;
 
 				switch (tmp) {
@@ -1907,7 +1913,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 					len = 2;
 					data = extended_msg + 1;
 					phase = PHASE_MSGIN;
-					NCR5380_transfer_pio(instance, &phase, &len, &data);
+					NCR5380_transfer_pio(instance, &phase, &len, &data, 1);
 					dsprintk(NDEBUG_EXTENDED, instance, "length %d, code 0x%02x\n",
 					         (int)extended_msg[1],
 					         (int)extended_msg[2]);
@@ -1920,7 +1926,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 						data = extended_msg + 3;
 						phase = PHASE_MSGIN;
 
-						NCR5380_transfer_pio(instance, &phase, &len, &data);
+						NCR5380_transfer_pio(instance, &phase, &len, &data, 1);
 						dsprintk(NDEBUG_EXTENDED, instance, "message received, residual %d\n",
 						         len);
 
@@ -1967,7 +1973,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 				len = 1;
 				data = &msgout;
 				hostdata->last_message = msgout;
-				NCR5380_transfer_pio(instance, &phase, &len, &data);
+				NCR5380_transfer_pio(instance, &phase, &len, &data, 0);
 				if (msgout == ABORT) {
 					hostdata->connected = NULL;
 					hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun);
@@ -1986,12 +1992,12 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
 				 * PSEUDO-DMA architecture we should probably
 				 * use the dma transfer function.
 				 */
-				NCR5380_transfer_pio(instance, &phase, &len, &data);
+				NCR5380_transfer_pio(instance, &phase, &len, &data, 0);
 				break;
 			case PHASE_STATIN:
 				len = 1;
 				data = &tmp;
-				NCR5380_transfer_pio(instance, &phase, &len, &data);
+				NCR5380_transfer_pio(instance, &phase, &len, &data, 0);
 				cmd->SCp.Status = tmp;
 				break;
 			default:
@@ -2050,7 +2056,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
 
 	NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY);
 	if (NCR5380_poll_politely(hostdata,
-	                          STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) {
+	                          STATUS_REG, SR_SEL, 0, 0) < 0) {
 		shost_printk(KERN_ERR, instance, "reselect: !SEL timeout\n");
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 		return;
@@ -2062,12 +2068,12 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
 	 */
 
 	if (NCR5380_poll_politely(hostdata,
-	                          STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) {
+	                          STATUS_REG, SR_REQ, SR_REQ, 0) < 0) {
 		if ((NCR5380_read(STATUS_REG) & (SR_BSY | SR_SEL)) == 0)
 			/* BUS FREE phase */
 			return;
 		shost_printk(KERN_ERR, instance, "reselect: REQ timeout\n");
-		do_abort(instance);
+		do_abort(instance, 0);
 		return;
 	}
 
@@ -2083,10 +2089,10 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
 		unsigned char *data = msg;
 		unsigned char phase = PHASE_MSGIN;
 
-		NCR5380_transfer_pio(instance, &phase, &len, &data);
+		NCR5380_transfer_pio(instance, &phase, &len, &data, 0);
 
 		if (len) {
-			do_abort(instance);
+			do_abort(instance, 0);
 			return;
 		}
 	}
@@ -2096,7 +2102,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
 		shost_printk(KERN_ERR, instance, "expecting IDENTIFY message, got ");
 		spi_print_msg(msg);
 		printk("\n");
-		do_abort(instance);
+		do_abort(instance, 0);
 		return;
 	}
 	lun = msg[0] & 0x07;
@@ -2136,7 +2142,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
 		 * Since we have an established nexus that we can't do anything
 		 * with, we must abort it.
 		 */
-		if (do_abort(instance) == 0)
+		if (do_abort(instance, 0) == 0)
 			hostdata->busy[target] &= ~(1 << lun);
 		return;
 	}
@@ -2283,7 +2289,7 @@ static int NCR5380_abort(struct scsi_cmnd *cmd)
 		dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
 		hostdata->connected = NULL;
 		hostdata->dma_len = 0;
-		if (do_abort(instance) < 0) {
+		if (do_abort(instance, 0) < 0) {
 			set_host_byte(cmd, DID_ERROR);
 			complete_cmd(instance, cmd);
 			result = FAILED;
diff --git a/drivers/scsi/NCR5380.h b/drivers/scsi/NCR5380.h
index 5935fd6d1a05..8a3b41932288 100644
--- a/drivers/scsi/NCR5380.h
+++ b/drivers/scsi/NCR5380.h
@@ -277,7 +277,8 @@ static const char *NCR5380_info(struct Scsi_Host *instance);
 static void NCR5380_reselect(struct Scsi_Host *instance);
 static bool NCR5380_select(struct Scsi_Host *, struct scsi_cmnd *);
 static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data);
-static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data);
+static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data,
+				unsigned int can_sleep);
 static int NCR5380_poll_politely2(struct NCR5380_hostdata *,
                                   unsigned int, u8, u8,
                                   unsigned int, u8, u8, unsigned long);
diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 29e4cdcade72..2df2f38a9b12 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -529,14 +529,14 @@ static inline int generic_NCR5380_precv(struct NCR5380_hostdata *hostdata,
 		if (start == len - 128) {
 			/* Ignore End of DMA interrupt for the final buffer */
 			if (NCR5380_poll_politely(hostdata, hostdata->c400_ctl_status,
-			                          CSR_HOST_BUF_NOT_RDY, 0, HZ / 64) < 0)
+			                          CSR_HOST_BUF_NOT_RDY, 0, 0) < 0)
 				break;
 		} else {
 			if (NCR5380_poll_politely2(hostdata, hostdata->c400_ctl_status,
 			                           CSR_HOST_BUF_NOT_RDY, 0,
 			                           hostdata->c400_ctl_status,
 			                           CSR_GATED_53C80_IRQ,
-			                           CSR_GATED_53C80_IRQ, HZ / 64) < 0 ||
+			                           CSR_GATED_53C80_IRQ, 0) < 0 ||
 			    NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
 				break;
 		}
@@ -565,7 +565,7 @@ static inline int generic_NCR5380_precv(struct NCR5380_hostdata *hostdata,
 	if (residual == 0 && NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
 	                                           BASR_END_DMA_TRANSFER,
 	                                           BASR_END_DMA_TRANSFER,
-	                                           HZ / 64) < 0)
+						   0) < 0)
 		scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA timeout\n",
 		            __func__);
 
@@ -597,7 +597,7 @@ static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
 		                           CSR_HOST_BUF_NOT_RDY, 0,
 		                           hostdata->c400_ctl_status,
 		                           CSR_GATED_53C80_IRQ,
-		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0 ||
+		                           CSR_GATED_53C80_IRQ, 0) < 0 ||
 		    NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY) {
 			/* Both 128 B buffers are in use */
 			if (start >= 128)
@@ -644,13 +644,13 @@ static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
 	if (residual == 0) {
 		if (NCR5380_poll_politely(hostdata, TARGET_COMMAND_REG,
 		                          TCR_LAST_BYTE_SENT, TCR_LAST_BYTE_SENT,
-		                          HZ / 64) < 0)
+					  0) < 0)
 			scmd_printk(KERN_ERR, hostdata->connected,
 			            "%s: Last Byte Sent timeout\n", __func__);
 
 		if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
 		                          BASR_END_DMA_TRANSFER, BASR_END_DMA_TRANSFER,
-		                          HZ / 64) < 0)
+					  0) < 0)
 			scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA timeout\n",
 			            __func__);
 	}
diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c
index b5dde9d0d054..5c808fbc6ce2 100644
--- a/drivers/scsi/mac_scsi.c
+++ b/drivers/scsi/mac_scsi.c
@@ -285,7 +285,7 @@ static inline int macscsi_pread(struct NCR5380_hostdata *hostdata,
 
 	while (!NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
 	                              BASR_DRQ | BASR_PHASE_MATCH,
-	                              BASR_DRQ | BASR_PHASE_MATCH, HZ / 64)) {
+	                              BASR_DRQ | BASR_PHASE_MATCH, 0)) {
 		int bytes;
 
 		if (macintosh_config->ident == MAC_MODEL_IIFX)
@@ -304,7 +304,7 @@ static inline int macscsi_pread(struct NCR5380_hostdata *hostdata,
 
 		if (NCR5380_poll_politely2(hostdata, STATUS_REG, SR_REQ, SR_REQ,
 		                           BUS_AND_STATUS_REG, BASR_ACK,
-		                           BASR_ACK, HZ / 64) < 0)
+		                           BASR_ACK, 0) < 0)
 			scmd_printk(KERN_DEBUG, hostdata->connected,
 			            "%s: !REQ and !ACK\n", __func__);
 		if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH))
@@ -344,7 +344,7 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata,
 
 	while (!NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
 	                              BASR_DRQ | BASR_PHASE_MATCH,
-	                              BASR_DRQ | BASR_PHASE_MATCH, HZ / 64)) {
+	                              BASR_DRQ | BASR_PHASE_MATCH, 0)) {
 		int bytes;
 
 		if (macintosh_config->ident == MAC_MODEL_IIFX)
@@ -362,7 +362,7 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata,
 			if (NCR5380_poll_politely(hostdata, TARGET_COMMAND_REG,
 			                          TCR_LAST_BYTE_SENT,
 			                          TCR_LAST_BYTE_SENT,
-			                          HZ / 64) < 0) {
+			                          0) < 0) {
 				scmd_printk(KERN_ERR, hostdata->connected,
 				            "%s: Last Byte Sent timeout\n", __func__);
 				result = -1;
@@ -372,7 +372,7 @@ static inline int macscsi_pwrite(struct NCR5380_hostdata *hostdata,
 
 		if (NCR5380_poll_politely2(hostdata, STATUS_REG, SR_REQ, SR_REQ,
 		                           BUS_AND_STATUS_REG, BASR_ACK,
-		                           BASR_ACK, HZ / 64) < 0)
+		                           BASR_ACK, 0) < 0)
 			scmd_printk(KERN_DEBUG, hostdata->connected,
 			            "%s: !REQ and !ACK\n", __func__);
 		if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH))
-- 
2.29.2


^ permalink raw reply	[relevance 50%]

* [PATCH -tip v1 2/3] seqlock: Prefix internal seqcount_t-only macros with a "do_"
  2020-12-06 16:21 89% [PATCH -tip v1 0/3] seqlock: assorted cleanups Ahmed S. Darwish
  2020-12-06 16:21 91% ` [PATCH -tip v1 1/3] Documentation: seqlock: s/LOCKTYPE/LOCKNAME/g Ahmed S. Darwish
@ 2020-12-06 16:21 71% ` Ahmed S. Darwish
  2020-12-06 16:21 86% ` [PATCH -tip v1 3/3] seqlock: kernel-doc: Specify when preemption is automatically altered Ahmed S. Darwish
  2 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-06 16:21 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Linus Torvalds, Thomas Gleixner, Paul E. McKenney,
	Steven Rostedt, Jonathan Corbet, Jason Gunthorpe, John Hubbard,
	LKML, Sebastian A. Siewior, Ahmed S. Darwish

When the seqcount_LOCKNAME_t group of data types were introduced, two
classes of seqlock.h sequence counter macros were added:

  - An external public API which can either take a plain seqcount_t or
    any of the seqcount_LOCKNAME_t variants.

  - An internal API which takes only a plain seqcount_t.

To distinguish between the two groups, the "*_seqcount_t_*" pattern was
used for the latter. This confused a number of mm/ call-site developers,
and Linus also commented that it was not a standard practice for marking
seqlock.h internal APIs.

Distinguish the latter group of macros by prefixing a "do_".

Link: https://lkml.kernel.org/r/CAHk-=wgB8nyOQufpn0o6a5BpJCJPnXvH+kRxApujhsgG+7qAwQ@mail.gmail.com
Link: https://lkml.kernel.org/r/CAHk-=wikhGExmprXgaW+MVXG1zsGpztBbVwOb23vetk41EtTBQ@mail.gmail.com
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 include/linux/seqlock.h | 66 ++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index d89134c74fba..235cbc65fd71 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -425,9 +425,9 @@ SEQCOUNT_LOCKNAME(ww_mutex,     struct ww_mutex, true,     &s->lock->base, ww_mu
  * Return: true if a read section retry is required, else false
  */
 #define __read_seqcount_retry(s, start)					\
-	__read_seqcount_t_retry(seqprop_ptr(s), start)
+	do___read_seqcount_retry(seqprop_ptr(s), start)
 
-static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
+static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
 	kcsan_atomic_next(0);
 	return unlikely(READ_ONCE(s->sequence) != start);
@@ -445,12 +445,12 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
  * Return: true if a read section retry is required, else false
  */
 #define read_seqcount_retry(s, start)					\
-	read_seqcount_t_retry(seqprop_ptr(s), start)
+	do_read_seqcount_retry(seqprop_ptr(s), start)
 
-static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
+static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
 	smp_rmb();
-	return __read_seqcount_t_retry(s, start);
+	return do___read_seqcount_retry(s, start);
 }
 
 /**
@@ -462,10 +462,10 @@ do {									\
 	if (seqprop_preemptible(s))					\
 		preempt_disable();					\
 									\
-	raw_write_seqcount_t_begin(seqprop_ptr(s));			\
+	do_raw_write_seqcount_begin(seqprop_ptr(s));			\
 } while (0)
 
-static inline void raw_write_seqcount_t_begin(seqcount_t *s)
+static inline void do_raw_write_seqcount_begin(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
 	s->sequence++;
@@ -478,13 +478,13 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
  */
 #define raw_write_seqcount_end(s)					\
 do {									\
-	raw_write_seqcount_t_end(seqprop_ptr(s));			\
+	do_raw_write_seqcount_end(seqprop_ptr(s));			\
 									\
 	if (seqprop_preemptible(s))					\
 		preempt_enable();					\
 } while (0)
 
-static inline void raw_write_seqcount_t_end(seqcount_t *s)
+static inline void do_raw_write_seqcount_end(seqcount_t *s)
 {
 	smp_wmb();
 	s->sequence++;
@@ -506,12 +506,12 @@ do {									\
 	if (seqprop_preemptible(s))					\
 		preempt_disable();					\
 									\
-	write_seqcount_t_begin_nested(seqprop_ptr(s), subclass);	\
+	do_write_seqcount_begin_nested(seqprop_ptr(s), subclass);	\
 } while (0)
 
-static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
+static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass)
 {
-	raw_write_seqcount_t_begin(s);
+	do_raw_write_seqcount_begin(s);
 	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
 }
 
@@ -533,12 +533,12 @@ do {									\
 	if (seqprop_preemptible(s))					\
 		preempt_disable();					\
 									\
-	write_seqcount_t_begin(seqprop_ptr(s));				\
+	do_write_seqcount_begin(seqprop_ptr(s));			\
 } while (0)
 
-static inline void write_seqcount_t_begin(seqcount_t *s)
+static inline void do_write_seqcount_begin(seqcount_t *s)
 {
-	write_seqcount_t_begin_nested(s, 0);
+	do_write_seqcount_begin_nested(s, 0);
 }
 
 /**
@@ -549,16 +549,16 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
  */
 #define write_seqcount_end(s)						\
 do {									\
-	write_seqcount_t_end(seqprop_ptr(s));				\
+	do_write_seqcount_end(seqprop_ptr(s));				\
 									\
 	if (seqprop_preemptible(s))					\
 		preempt_enable();					\
 } while (0)
 
-static inline void write_seqcount_t_end(seqcount_t *s)
+static inline void do_write_seqcount_end(seqcount_t *s)
 {
 	seqcount_release(&s->dep_map, _RET_IP_);
-	raw_write_seqcount_t_end(s);
+	do_raw_write_seqcount_end(s);
 }
 
 /**
@@ -603,9 +603,9 @@ static inline void write_seqcount_t_end(seqcount_t *s)
  *      }
  */
 #define raw_write_seqcount_barrier(s)					\
-	raw_write_seqcount_t_barrier(seqprop_ptr(s))
+	do_raw_write_seqcount_barrier(seqprop_ptr(s))
 
-static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
+static inline void do_raw_write_seqcount_barrier(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
 	s->sequence++;
@@ -623,9 +623,9 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
  * will complete successfully and see data older than this.
  */
 #define write_seqcount_invalidate(s)					\
-	write_seqcount_t_invalidate(seqprop_ptr(s))
+	do_write_seqcount_invalidate(seqprop_ptr(s))
 
-static inline void write_seqcount_t_invalidate(seqcount_t *s)
+static inline void do_write_seqcount_invalidate(seqcount_t *s)
 {
 	smp_wmb();
 	kcsan_nestable_atomic_begin();
@@ -865,9 +865,9 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 }
 
 /*
- * For all seqlock_t write side functions, use write_seqcount_*t*_begin()
- * instead of the generic write_seqcount_begin(). This way, no redundant
- * lockdep_assert_held() checks are added.
+ * For all seqlock_t write side functions, use the the internal
+ * do_write_seqcount_begin() instead of generic write_seqcount_begin().
+ * This way, no redundant lockdep_assert_held() checks are added.
  */
 
 /**
@@ -886,7 +886,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 static inline void write_seqlock(seqlock_t *sl)
 {
 	spin_lock(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount.seqcount);
+	do_write_seqcount_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -898,7 +898,7 @@ static inline void write_seqlock(seqlock_t *sl)
  */
 static inline void write_sequnlock(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount.seqcount);
+	do_write_seqcount_end(&sl->seqcount.seqcount);
 	spin_unlock(&sl->lock);
 }
 
@@ -912,7 +912,7 @@ static inline void write_sequnlock(seqlock_t *sl)
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount.seqcount);
+	do_write_seqcount_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -925,7 +925,7 @@ static inline void write_seqlock_bh(seqlock_t *sl)
  */
 static inline void write_sequnlock_bh(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount.seqcount);
+	do_write_seqcount_end(&sl->seqcount.seqcount);
 	spin_unlock_bh(&sl->lock);
 }
 
@@ -939,7 +939,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount.seqcount);
+	do_write_seqcount_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -951,7 +951,7 @@ static inline void write_seqlock_irq(seqlock_t *sl)
  */
 static inline void write_sequnlock_irq(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount.seqcount);
+	do_write_seqcount_end(&sl->seqcount.seqcount);
 	spin_unlock_irq(&sl->lock);
 }
 
@@ -960,7 +960,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sl->lock, flags);
-	write_seqcount_t_begin(&sl->seqcount.seqcount);
+	do_write_seqcount_begin(&sl->seqcount.seqcount);
 	return flags;
 }
 
@@ -989,7 +989,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 static inline void
 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 {
-	write_seqcount_t_end(&sl->seqcount.seqcount);
+	do_write_seqcount_end(&sl->seqcount.seqcount);
 	spin_unlock_irqrestore(&sl->lock, flags);
 }
 
-- 
2.29.2


^ permalink raw reply	[relevance 71%]

* [PATCH -tip v1 3/3] seqlock: kernel-doc: Specify when preemption is automatically altered
  2020-12-06 16:21 89% [PATCH -tip v1 0/3] seqlock: assorted cleanups Ahmed S. Darwish
  2020-12-06 16:21 91% ` [PATCH -tip v1 1/3] Documentation: seqlock: s/LOCKTYPE/LOCKNAME/g Ahmed S. Darwish
  2020-12-06 16:21 71% ` [PATCH -tip v1 2/3] seqlock: Prefix internal seqcount_t-only macros with a "do_" Ahmed S. Darwish
@ 2020-12-06 16:21 86% ` Ahmed S. Darwish
    2 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-12-06 16:21 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Linus Torvalds, Thomas Gleixner, Paul E. McKenney,
	Steven Rostedt, Jonathan Corbet, Jason Gunthorpe, John Hubbard,
	LKML, Sebastian A. Siewior, Ahmed S. Darwish

The kernel-doc annotations for sequence counters write side functions
are incomplete: they do not specify when preemption is automatically
disabled and re-enabled.

This has confused a number of call-site developers. Fix it.

Link: https://lkml.kernel.org/r/20201030235121.GQ2620339@nvidia.com
Link: https://lkml.kernel.org/r/CAHk-=wikhGExmprXgaW+MVXG1zsGpztBbVwOb23vetk41EtTBQ@mail.gmail.com
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/seqlock.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 235cbc65fd71..2f7bb92b4c9e 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -456,6 +456,8 @@ static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start)
 /**
  * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
  * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
+ *
+ * Context: check write_seqcount_begin()
  */
 #define raw_write_seqcount_begin(s)					\
 do {									\
@@ -475,6 +477,8 @@ static inline void do_raw_write_seqcount_begin(seqcount_t *s)
 /**
  * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
  * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
+ *
+ * Context: check write_seqcount_end()
  */
 #define raw_write_seqcount_end(s)					\
 do {									\
@@ -498,6 +502,7 @@ static inline void do_raw_write_seqcount_end(seqcount_t *s)
  * @subclass: lockdep nesting level
  *
  * See Documentation/locking/lockdep-design.rst
+ * Context: check write_seqcount_begin()
  */
 #define write_seqcount_begin_nested(s, subclass)			\
 do {									\
@@ -519,11 +524,10 @@ static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass)
  * write_seqcount_begin() - start a seqcount_t write side critical section
  * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
- * write_seqcount_begin opens a write side critical section of the given
- * seqcount_t.
- *
- * Context: seqcount_t write side critical sections must be serialized and
- * non-preemptible. If readers can be invoked from hardirq or softirq
+ * Context: sequence counter write side sections must be serialized and
+ * non-preemptible. Preemption will be automatically disabled if and
+ * only if the seqcount write serialization lock is associated, and
+ * preemptible.  If readers can be invoked from hardirq or softirq
  * context, interrupts or bottom halves must be respectively disabled.
  */
 #define write_seqcount_begin(s)						\
@@ -545,7 +549,8 @@ static inline void do_write_seqcount_begin(seqcount_t *s)
  * write_seqcount_end() - end a seqcount_t write side critical section
  * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
- * The write section must've been opened with write_seqcount_begin().
+ * Context: Preemption will be automatically re-enabled if and only if
+ * the seqcount write serialization lock is associated, and preemptible.
  */
 #define write_seqcount_end(s)						\
 do {									\
-- 
2.29.2


^ permalink raw reply	[relevance 86%]

* [PATCH -tip v1 0/3] seqlock: assorted cleanups
@ 2020-12-06 16:21 89% Ahmed S. Darwish
  2020-12-06 16:21 91% ` [PATCH -tip v1 1/3] Documentation: seqlock: s/LOCKTYPE/LOCKNAME/g Ahmed S. Darwish
                   ` (2 more replies)
  0 siblings, 3 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-06 16:21 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Linus Torvalds, Thomas Gleixner, Paul E. McKenney,
	Steven Rostedt, Jonathan Corbet, Jason Gunthorpe, John Hubbard,
	LKML, Sebastian A. Siewior, Ahmed S. Darwish

Hi,

When the seqcount_LOCKNAME_t group of data types were introduced, two
classes of seqlock.h sequence counter macros were added:

  - An external public API which can either take a plain seqcount_t or
    any of the seqcount_LOCKNAME_t variants.

  - An internal API which takes only a plain seqcount_t.

To distinguish between the two groups, the "*_seqcount_t_*" pattern was
used for the latter. This confused a number of mm/ call-site developers,
and Linus also commented that this was not the standard practice for
marking kernel internal APIs. [1]

Distinguish the latter group of macros by prefixing a "do_".

A number of call-site developers also complained that the automatic
preemption disable/enable for the write side macros was not obvious, or
documented. [2] Linus also suggested adding few comments explaining that
behavior. [3] Fix it by completing the seqcount write side kernel-doc
annotations.

Finally, fix a minor naming inconsistency w.r.t. seqlock.h
vs. Documentation/locking/seqlock.rst.

This series does not change the output "allyesconfig" kernel binary:

    text         data            bss      ...        filename
  247616963    289662125       81498728   ...  ../build-x86-64/vmlinux.old
  247616963    289662125       81498728   ...  ../build-x86-64/vmlinux
  145054028    78270273        18435468   ...  ../build-arm/vmlinux.old
  145054028    78270273        18435468   ...  ../build-arm/vmlinux

Note: based over -tip locking/core, instead of latest -rc, due to -tip
      ab440b2c604b ("seqlock: Rename __seqprop() users").

References:

[1] https://lkml.kernel.org/r/CAHk-=wgB8nyOQufpn0o6a5BpJCJPnXvH+kRxApujhsgG+7qAwQ@mail.gmail.com
[2] https://lkml.kernel.org/r/20201030235121.GQ2620339@nvidia.com
[3] https://lkml.kernel.org/r/CAHk-=wikhGExmprXgaW+MVXG1zsGpztBbVwOb23vetk41EtTBQ@mail.gmail.com

8<--------------

Ahmed S. Darwish (3):
  Documentation: seqlock: s/LOCKTYPE/LOCKNAME/g
  seqlock: Prefix internal seqcount_t-only macros with a "do_"
  seqlock: kernel-doc: Specify when preemption is automatically altered

 Documentation/locking/seqlock.rst | 21 ++++----
 include/linux/seqlock.h           | 83 ++++++++++++++++---------------
 2 files changed, 54 insertions(+), 50 deletions(-)

base-commit: 97d62caa32d6d79dadae3f8d19af5c92ea9a589a
--
2.29.2

^ permalink raw reply	[relevance 89%]

* [PATCH -tip v1 1/3] Documentation: seqlock: s/LOCKTYPE/LOCKNAME/g
  2020-12-06 16:21 89% [PATCH -tip v1 0/3] seqlock: assorted cleanups Ahmed S. Darwish
@ 2020-12-06 16:21 91% ` Ahmed S. Darwish
  2020-12-09 18:38 79%   ` [tip: locking/core] " tip-bot2 for Ahmed S. Darwish
  2020-12-06 16:21 71% ` [PATCH -tip v1 2/3] seqlock: Prefix internal seqcount_t-only macros with a "do_" Ahmed S. Darwish
  2020-12-06 16:21 86% ` [PATCH -tip v1 3/3] seqlock: kernel-doc: Specify when preemption is automatically altered Ahmed S. Darwish
  2 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-12-06 16:21 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Will Deacon
  Cc: Linus Torvalds, Thomas Gleixner, Paul E. McKenney,
	Steven Rostedt, Jonathan Corbet, Jason Gunthorpe, John Hubbard,
	LKML, Sebastian A. Siewior, Ahmed S. Darwish

Sequence counters with an associated write serialization lock are called
seqcount_LOCKNAME_t. Fix the documentation accordingly.

While at it, remove a paragraph that inappropriately discussed a
seqlock.h implementation detail.

Fixes: 6dd699b13d53 ("seqlock: seqcount_LOCKNAME_t: Standardize naming convention")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: stable@vger.kernel.org
Cc: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/locking/seqlock.rst | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst
index a334b584f2b3..64405e5da63e 100644
--- a/Documentation/locking/seqlock.rst
+++ b/Documentation/locking/seqlock.rst
@@ -89,7 +89,7 @@ Read path::
 
 .. _seqcount_locktype_t:
 
-Sequence counters with associated locks (``seqcount_LOCKTYPE_t``)
+Sequence counters with associated locks (``seqcount_LOCKNAME_t``)
 -----------------------------------------------------------------
 
 As discussed at :ref:`seqcount_t`, sequence count write side critical
@@ -115,27 +115,26 @@ The following sequence counters with associated locks are defined:
   - ``seqcount_mutex_t``
   - ``seqcount_ww_mutex_t``
 
-The plain seqcount read and write APIs branch out to the specific
-seqcount_LOCKTYPE_t implementation at compile-time. This avoids kernel
-API explosion per each new seqcount LOCKTYPE.
+The sequence counter read and write APIs can take either a plain
+seqcount_t or any of the seqcount_LOCKNAME_t variants above.
 
-Initialization (replace "LOCKTYPE" with one of the supported locks)::
+Initialization (replace "LOCKNAME" with one of the supported locks)::
 
 	/* dynamic */
-	seqcount_LOCKTYPE_t foo_seqcount;
-	seqcount_LOCKTYPE_init(&foo_seqcount, &lock);
+	seqcount_LOCKNAME_t foo_seqcount;
+	seqcount_LOCKNAME_init(&foo_seqcount, &lock);
 
 	/* static */
-	static seqcount_LOCKTYPE_t foo_seqcount =
-		SEQCNT_LOCKTYPE_ZERO(foo_seqcount, &lock);
+	static seqcount_LOCKNAME_t foo_seqcount =
+		SEQCNT_LOCKNAME_ZERO(foo_seqcount, &lock);
 
 	/* C99 struct init */
 	struct {
-		.seq   = SEQCNT_LOCKTYPE_ZERO(foo.seq, &lock),
+		.seq   = SEQCNT_LOCKNAME_ZERO(foo.seq, &lock),
 	} foo;
 
 Write path: same as in :ref:`seqcount_t`, while running from a context
-with the associated LOCKTYPE lock acquired.
+with the associated write serialization lock acquired.
 
 Read path: same as in :ref:`seqcount_t`.
 
-- 
2.29.2


^ permalink raw reply	[relevance 91%]

* Re: [PATCH -tip v1 3/3] seqlock: kernel-doc: Specify when preemption is automatically altered
  @ 2020-12-08 14:31 85%     ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-08 14:31 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Peter Zijlstra, Ingo Molnar, Will Deacon, Linus Torvalds,
	Thomas Gleixner, Paul E. McKenney, Steven Rostedt,
	Jonathan Corbet, John Hubbard, LKML, Sebastian A. Siewior

Hi Jason,

On Mon, Dec 07, 2020 at 04:43:16PM -0400, Jason Gunthorpe wrote:
...
>
> The thing that was confusing is if it was appropriate to use a
> seqcount in case where write side preemption was not disabled - which
> is safe only if the read side doesn't spin.
>

No, that's not correct.

What was confusing was that *everyone* in that mm/ thread, including
yourself, thought that write_seqcount_begin() will automatically disable
preemption for plain seqcount_t:

  https://lkml.kernel.org/r/20201030235121.GQ2620339@nvidia.com

Quoting Peter Xu: "My understanding is that we used
raw_write_seqcount_t_begin() because we're with spin lock so assuming we
disabled preemption already."

Quoting you: "write_seqcount_begin - Enforces preemption off".

And that's why this patch explicitly adds to the kernel-doc: "Preemption
will be automatically disabled if and only if the seqcount write
serialization lock is associated, and preemptible."

Honestly, I should've added that statement anyway in my original
"sequence counters with associated locks" submission. I take the blame
for the resulting confusion, and I'm sorry...

~~~~~~~~~~~~~~~~~~~~

Now regarding the other point, where the seqcount_t write side does not
need to disable preemption if the reader does not spin. We've already
discussed that (at length) last time.

There comes a point where you decide what level of documentation to add,
and what level to skip.

Because in the end, you don't want to confuse "Joe, the generic driver
developer" with too much details that's not relevant to their task at
hand.

You want to keep the general case simple, but the special case do-able.
And you also want to encourage people to use the standard write side
critical section entry and exit functions, as much as possible.

Because, oh, look at that:

  [PATCH v2 0/6] seqlock: seqcount_t call sites bugfixes
  https://lkml.kernel.org/r/20200603144949.1122421-1-a.darwish@linutronix.de

Sequence counters are already hard to get right. And driver developers
get things wrong, a lot -- you don't want to confuse them even further.

For developers who're advanced enough to know the difference, they don't
need the kernel-doc anyway. And that's why I've kindly asked to add the
following to your mm/ patch (which you did, thanks):

	/*
	 * Disabling preemption is not needed for the write side, as
	 * the read side does not spin, but goes to mmap_lock.
	 * ...
	 */

And IMHO, that should be enough. Developers of such special cases are
already assumed to know what they're doing.

Thanks a lot,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 85%]

* [tip: locking/core] Documentation: seqlock: s/LOCKTYPE/LOCKNAME/g
  2020-12-06 16:21 91% ` [PATCH -tip v1 1/3] Documentation: seqlock: s/LOCKTYPE/LOCKNAME/g Ahmed S. Darwish
@ 2020-12-09 18:38 79%   ` tip-bot2 for Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-12-09 18:38 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), stable, x86, linux-kernel

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     cf48647243cc28d15280600292db5777592606c5
Gitweb:        https://git.kernel.org/tip/cf48647243cc28d15280600292db5777592606c5
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Sun, 06 Dec 2020 17:21:41 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 09 Dec 2020 17:08:49 +01:00

Documentation: seqlock: s/LOCKTYPE/LOCKNAME/g

Sequence counters with an associated write serialization lock are called
seqcount_LOCKNAME_t. Fix the documentation accordingly.

While at it, remove a paragraph that inappropriately discussed a
seqlock.h implementation detail.

Fixes: 6dd699b13d53 ("seqlock: seqcount_LOCKNAME_t: Standardize naming convention")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20201206162143.14387-2-a.darwish@linutronix.de
---
 Documentation/locking/seqlock.rst | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst
index a334b58..64405e5 100644
--- a/Documentation/locking/seqlock.rst
+++ b/Documentation/locking/seqlock.rst
@@ -89,7 +89,7 @@ Read path::
 
 .. _seqcount_locktype_t:
 
-Sequence counters with associated locks (``seqcount_LOCKTYPE_t``)
+Sequence counters with associated locks (``seqcount_LOCKNAME_t``)
 -----------------------------------------------------------------
 
 As discussed at :ref:`seqcount_t`, sequence count write side critical
@@ -115,27 +115,26 @@ The following sequence counters with associated locks are defined:
   - ``seqcount_mutex_t``
   - ``seqcount_ww_mutex_t``
 
-The plain seqcount read and write APIs branch out to the specific
-seqcount_LOCKTYPE_t implementation at compile-time. This avoids kernel
-API explosion per each new seqcount LOCKTYPE.
+The sequence counter read and write APIs can take either a plain
+seqcount_t or any of the seqcount_LOCKNAME_t variants above.
 
-Initialization (replace "LOCKTYPE" with one of the supported locks)::
+Initialization (replace "LOCKNAME" with one of the supported locks)::
 
 	/* dynamic */
-	seqcount_LOCKTYPE_t foo_seqcount;
-	seqcount_LOCKTYPE_init(&foo_seqcount, &lock);
+	seqcount_LOCKNAME_t foo_seqcount;
+	seqcount_LOCKNAME_init(&foo_seqcount, &lock);
 
 	/* static */
-	static seqcount_LOCKTYPE_t foo_seqcount =
-		SEQCNT_LOCKTYPE_ZERO(foo_seqcount, &lock);
+	static seqcount_LOCKNAME_t foo_seqcount =
+		SEQCNT_LOCKNAME_ZERO(foo_seqcount, &lock);
 
 	/* C99 struct init */
 	struct {
-		.seq   = SEQCNT_LOCKTYPE_ZERO(foo.seq, &lock),
+		.seq   = SEQCNT_LOCKNAME_ZERO(foo.seq, &lock),
 	} foo;
 
 Write path: same as in :ref:`seqcount_t`, while running from a context
-with the associated LOCKTYPE lock acquired.
+with the associated write serialization lock acquired.
 
 Read path: same as in :ref:`seqcount_t`.
 

^ permalink raw reply	[relevance 79%]

* [tip: locking/core] seqlock: Prefix internal seqcount_t-only macros with a "do_"
    2020-11-04 19:54 99%                           ` Ahmed S. Darwish
@ 2020-12-09 18:38 60%                           ` tip-bot2 for Ahmed S. Darwish
  2020-12-09 18:38 75%                           ` [tip: locking/core] seqlock: kernel-doc: Specify when preemption is automatically altered tip-bot2 for Ahmed S. Darwish
  2 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-12-09 18:38 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, linux-kernel

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     66bcfcdf89d00f2409f4b5da0f8c20c08318dc72
Gitweb:        https://git.kernel.org/tip/66bcfcdf89d00f2409f4b5da0f8c20c08318dc72
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Sun, 06 Dec 2020 17:21:42 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 09 Dec 2020 17:08:49 +01:00

seqlock: Prefix internal seqcount_t-only macros with a "do_"

When the seqcount_LOCKNAME_t group of data types were introduced, two
classes of seqlock.h sequence counter macros were added:

  - An external public API which can either take a plain seqcount_t or
    any of the seqcount_LOCKNAME_t variants.

  - An internal API which takes only a plain seqcount_t.

To distinguish between the two groups, the "*_seqcount_t_*" pattern was
used for the latter. This confused a number of mm/ call-site developers,
and Linus also commented that it was not a standard practice for marking
seqlock.h internal APIs.

Distinguish the latter group of macros by prefixing a "do_".

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/CAHk-=wikhGExmprXgaW+MVXG1zsGpztBbVwOb23vetk41EtTBQ@mail.gmail.com
---
 include/linux/seqlock.h | 66 ++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index d89134c..235cbc6 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -425,9 +425,9 @@ SEQCOUNT_LOCKNAME(ww_mutex,     struct ww_mutex, true,     &s->lock->base, ww_mu
  * Return: true if a read section retry is required, else false
  */
 #define __read_seqcount_retry(s, start)					\
-	__read_seqcount_t_retry(seqprop_ptr(s), start)
+	do___read_seqcount_retry(seqprop_ptr(s), start)
 
-static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
+static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
 	kcsan_atomic_next(0);
 	return unlikely(READ_ONCE(s->sequence) != start);
@@ -445,12 +445,12 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
  * Return: true if a read section retry is required, else false
  */
 #define read_seqcount_retry(s, start)					\
-	read_seqcount_t_retry(seqprop_ptr(s), start)
+	do_read_seqcount_retry(seqprop_ptr(s), start)
 
-static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
+static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
 	smp_rmb();
-	return __read_seqcount_t_retry(s, start);
+	return do___read_seqcount_retry(s, start);
 }
 
 /**
@@ -462,10 +462,10 @@ do {									\
 	if (seqprop_preemptible(s))					\
 		preempt_disable();					\
 									\
-	raw_write_seqcount_t_begin(seqprop_ptr(s));			\
+	do_raw_write_seqcount_begin(seqprop_ptr(s));			\
 } while (0)
 
-static inline void raw_write_seqcount_t_begin(seqcount_t *s)
+static inline void do_raw_write_seqcount_begin(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
 	s->sequence++;
@@ -478,13 +478,13 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
  */
 #define raw_write_seqcount_end(s)					\
 do {									\
-	raw_write_seqcount_t_end(seqprop_ptr(s));			\
+	do_raw_write_seqcount_end(seqprop_ptr(s));			\
 									\
 	if (seqprop_preemptible(s))					\
 		preempt_enable();					\
 } while (0)
 
-static inline void raw_write_seqcount_t_end(seqcount_t *s)
+static inline void do_raw_write_seqcount_end(seqcount_t *s)
 {
 	smp_wmb();
 	s->sequence++;
@@ -506,12 +506,12 @@ do {									\
 	if (seqprop_preemptible(s))					\
 		preempt_disable();					\
 									\
-	write_seqcount_t_begin_nested(seqprop_ptr(s), subclass);	\
+	do_write_seqcount_begin_nested(seqprop_ptr(s), subclass);	\
 } while (0)
 
-static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
+static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass)
 {
-	raw_write_seqcount_t_begin(s);
+	do_raw_write_seqcount_begin(s);
 	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
 }
 
@@ -533,12 +533,12 @@ do {									\
 	if (seqprop_preemptible(s))					\
 		preempt_disable();					\
 									\
-	write_seqcount_t_begin(seqprop_ptr(s));				\
+	do_write_seqcount_begin(seqprop_ptr(s));			\
 } while (0)
 
-static inline void write_seqcount_t_begin(seqcount_t *s)
+static inline void do_write_seqcount_begin(seqcount_t *s)
 {
-	write_seqcount_t_begin_nested(s, 0);
+	do_write_seqcount_begin_nested(s, 0);
 }
 
 /**
@@ -549,16 +549,16 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
  */
 #define write_seqcount_end(s)						\
 do {									\
-	write_seqcount_t_end(seqprop_ptr(s));				\
+	do_write_seqcount_end(seqprop_ptr(s));				\
 									\
 	if (seqprop_preemptible(s))					\
 		preempt_enable();					\
 } while (0)
 
-static inline void write_seqcount_t_end(seqcount_t *s)
+static inline void do_write_seqcount_end(seqcount_t *s)
 {
 	seqcount_release(&s->dep_map, _RET_IP_);
-	raw_write_seqcount_t_end(s);
+	do_raw_write_seqcount_end(s);
 }
 
 /**
@@ -603,9 +603,9 @@ static inline void write_seqcount_t_end(seqcount_t *s)
  *      }
  */
 #define raw_write_seqcount_barrier(s)					\
-	raw_write_seqcount_t_barrier(seqprop_ptr(s))
+	do_raw_write_seqcount_barrier(seqprop_ptr(s))
 
-static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
+static inline void do_raw_write_seqcount_barrier(seqcount_t *s)
 {
 	kcsan_nestable_atomic_begin();
 	s->sequence++;
@@ -623,9 +623,9 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
  * will complete successfully and see data older than this.
  */
 #define write_seqcount_invalidate(s)					\
-	write_seqcount_t_invalidate(seqprop_ptr(s))
+	do_write_seqcount_invalidate(seqprop_ptr(s))
 
-static inline void write_seqcount_t_invalidate(seqcount_t *s)
+static inline void do_write_seqcount_invalidate(seqcount_t *s)
 {
 	smp_wmb();
 	kcsan_nestable_atomic_begin();
@@ -865,9 +865,9 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 }
 
 /*
- * For all seqlock_t write side functions, use write_seqcount_*t*_begin()
- * instead of the generic write_seqcount_begin(). This way, no redundant
- * lockdep_assert_held() checks are added.
+ * For all seqlock_t write side functions, use the the internal
+ * do_write_seqcount_begin() instead of generic write_seqcount_begin().
+ * This way, no redundant lockdep_assert_held() checks are added.
  */
 
 /**
@@ -886,7 +886,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 static inline void write_seqlock(seqlock_t *sl)
 {
 	spin_lock(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount.seqcount);
+	do_write_seqcount_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -898,7 +898,7 @@ static inline void write_seqlock(seqlock_t *sl)
  */
 static inline void write_sequnlock(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount.seqcount);
+	do_write_seqcount_end(&sl->seqcount.seqcount);
 	spin_unlock(&sl->lock);
 }
 
@@ -912,7 +912,7 @@ static inline void write_sequnlock(seqlock_t *sl)
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount.seqcount);
+	do_write_seqcount_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -925,7 +925,7 @@ static inline void write_seqlock_bh(seqlock_t *sl)
  */
 static inline void write_sequnlock_bh(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount.seqcount);
+	do_write_seqcount_end(&sl->seqcount.seqcount);
 	spin_unlock_bh(&sl->lock);
 }
 
@@ -939,7 +939,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
 	spin_lock_irq(&sl->lock);
-	write_seqcount_t_begin(&sl->seqcount.seqcount);
+	do_write_seqcount_begin(&sl->seqcount.seqcount);
 }
 
 /**
@@ -951,7 +951,7 @@ static inline void write_seqlock_irq(seqlock_t *sl)
  */
 static inline void write_sequnlock_irq(seqlock_t *sl)
 {
-	write_seqcount_t_end(&sl->seqcount.seqcount);
+	do_write_seqcount_end(&sl->seqcount.seqcount);
 	spin_unlock_irq(&sl->lock);
 }
 
@@ -960,7 +960,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sl->lock, flags);
-	write_seqcount_t_begin(&sl->seqcount.seqcount);
+	do_write_seqcount_begin(&sl->seqcount.seqcount);
 	return flags;
 }
 
@@ -989,7 +989,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
 static inline void
 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 {
-	write_seqcount_t_end(&sl->seqcount.seqcount);
+	do_write_seqcount_end(&sl->seqcount.seqcount);
 	spin_unlock_irqrestore(&sl->lock, flags);
 }
 

^ permalink raw reply	[relevance 60%]

* [tip: locking/core] seqlock: kernel-doc: Specify when preemption is automatically altered
    2020-11-04 19:54 99%                           ` Ahmed S. Darwish
  2020-12-09 18:38 60%                           ` [tip: locking/core] seqlock: Prefix internal seqcount_t-only macros with a "do_" tip-bot2 for Ahmed S. Darwish
@ 2020-12-09 18:38 75%                           ` tip-bot2 for Ahmed S. Darwish
  2 siblings, 0 replies; 200+ results
From: tip-bot2 for Ahmed S. Darwish @ 2020-12-09 18:38 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ahmed S. Darwish, Peter Zijlstra (Intel), x86, linux-kernel

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     cb262935a166bdef0ccfe6e2adffa00c0f2d038a
Gitweb:        https://git.kernel.org/tip/cb262935a166bdef0ccfe6e2adffa00c0f2d038a
Author:        Ahmed S. Darwish <a.darwish@linutronix.de>
AuthorDate:    Sun, 06 Dec 2020 17:21:43 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 09 Dec 2020 17:08:49 +01:00

seqlock: kernel-doc: Specify when preemption is automatically altered

The kernel-doc annotations for sequence counters write side functions
are incomplete: they do not specify when preemption is automatically
disabled and re-enabled.

This has confused a number of call-site developers. Fix it.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/CAHk-=wikhGExmprXgaW+MVXG1zsGpztBbVwOb23vetk41EtTBQ@mail.gmail.com
---
 include/linux/seqlock.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 235cbc6..2f7bb92 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -456,6 +456,8 @@ static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start)
 /**
  * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
  * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
+ *
+ * Context: check write_seqcount_begin()
  */
 #define raw_write_seqcount_begin(s)					\
 do {									\
@@ -475,6 +477,8 @@ static inline void do_raw_write_seqcount_begin(seqcount_t *s)
 /**
  * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
  * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
+ *
+ * Context: check write_seqcount_end()
  */
 #define raw_write_seqcount_end(s)					\
 do {									\
@@ -498,6 +502,7 @@ static inline void do_raw_write_seqcount_end(seqcount_t *s)
  * @subclass: lockdep nesting level
  *
  * See Documentation/locking/lockdep-design.rst
+ * Context: check write_seqcount_begin()
  */
 #define write_seqcount_begin_nested(s, subclass)			\
 do {									\
@@ -519,11 +524,10 @@ static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass)
  * write_seqcount_begin() - start a seqcount_t write side critical section
  * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
- * write_seqcount_begin opens a write side critical section of the given
- * seqcount_t.
- *
- * Context: seqcount_t write side critical sections must be serialized and
- * non-preemptible. If readers can be invoked from hardirq or softirq
+ * Context: sequence counter write side sections must be serialized and
+ * non-preemptible. Preemption will be automatically disabled if and
+ * only if the seqcount write serialization lock is associated, and
+ * preemptible.  If readers can be invoked from hardirq or softirq
  * context, interrupts or bottom halves must be respectively disabled.
  */
 #define write_seqcount_begin(s)						\
@@ -545,7 +549,8 @@ static inline void do_write_seqcount_begin(seqcount_t *s)
  * write_seqcount_end() - end a seqcount_t write side critical section
  * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
  *
- * The write section must've been opened with write_seqcount_begin().
+ * Context: Preemption will be automatically re-enabled if and only if
+ * the seqcount write serialization lock is associated, and preemptible.
  */
 #define write_seqcount_end(s)						\
 do {									\

^ permalink raw reply	[relevance 75%]

* [PATCH 00/11] scsi: libsas: Remove in_interrupt() check
@ 2020-12-18 20:43 88% Ahmed S. Darwish
  2020-12-18 20:43 99% ` [PATCH 01/11] Documentation: scsi: libsas: Remove notify_ha_event() Ahmed S. Darwish
                   ` (10 more replies)
  0 siblings, 11 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-18 20:43 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, John Garry, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior,
	Ahmed S. Darwish

Folks,

In the discussion about preempt count consistency across kernel
configurations:

  https://lkml.kernel.org/r/20200914204209.256266093@linutronix.de

it was concluded that the usage of in_interrupt() and related context
checks should be removed from non-core code.

This includes memory allocation mode decisions (GFP_*). In the long run,
usage of in_interrupt() and its siblings should be banned from driver
code completely.

This series addresses SCSI libsas. Basically, the function:

  => drivers/scsi/libsas/sas_init.c:
  struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy)
  {
        ...
        gfp_t flags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
        event = kmem_cache_zalloc(sas_event_cache, flags);
        ...
  }

is transformed so that callers explicitly pass the gfp_t memory
allocation flags. Affected libsas clients are modified accordingly.

The first six patches have "Fixes: " tags and address bugs the were
noticed during the context analysis.

Thanks!

8<--------------

Ahmed S. Darwish (11):
  Documentation: scsi: libsas: Remove notify_ha_event()
  scsi: libsas: Introduce a _gfp() variant of event notifiers
  scsi: mvsas: Pass gfp_t flags to libsas event notifiers
  scsi: isci: port: link down: Pass gfp_t flags
  scsi: isci: port: link up: Pass gfp_t flags
  scsi: isci: port: broadcast change: Pass gfp_t flags
  scsi: libsas: Pass gfp_t flags to event notifiers
  scsi: pm80xx: Pass gfp_t flags to libsas event notifiers
  scsi: aic94xx: Pass gfp_t flags to libsas event notifiers
  scsi: hisi_sas: Pass gfp_t flags to libsas event notifiers
  scsi: libsas: event notifiers: Remove non _gfp() variants

 Documentation/scsi/libsas.rst          |  5 ++--
 drivers/scsi/aic94xx/aic94xx_scb.c     | 18 ++++++------
 drivers/scsi/hisi_sas/hisi_sas.h       |  3 +-
 drivers/scsi/hisi_sas/hisi_sas_main.c  | 26 ++++++++++--------
 drivers/scsi/hisi_sas/hisi_sas_v1_hw.c |  5 ++--
 drivers/scsi/hisi_sas/hisi_sas_v2_hw.c |  5 ++--
 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c |  5 ++--
 drivers/scsi/isci/port.c               | 14 ++++++----
 drivers/scsi/libsas/sas_event.c        | 21 ++++++++------
 drivers/scsi/libsas/sas_init.c         | 11 ++++----
 drivers/scsi/libsas/sas_internal.h     |  4 +--
 drivers/scsi/mvsas/mv_sas.c            | 22 +++++++--------
 drivers/scsi/pm8001/pm8001_hwi.c       | 38 +++++++++++++-------------
 drivers/scsi/pm8001/pm8001_sas.c       |  8 +++---
 drivers/scsi/pm8001/pm80xx_hwi.c       | 30 ++++++++++----------
 include/scsi/libsas.h                  |  4 +--
 16 files changed, 116 insertions(+), 103 deletions(-)

base-commit: 2c85ebc57b3e1817b6ce1a6b703928e113a90442
--
2.29.2

^ permalink raw reply	[relevance 88%]

* [PATCH 01/11] Documentation: scsi: libsas: Remove notify_ha_event()
  2020-12-18 20:43 88% [PATCH 00/11] scsi: libsas: Remove in_interrupt() check Ahmed S. Darwish
@ 2020-12-18 20:43 99% ` Ahmed S. Darwish
  2020-12-18 20:43 68% ` [PATCH 02/11] scsi: libsas: Introduce a _gfp() variant of event notifiers Ahmed S. Darwish
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-18 20:43 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, John Garry, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior,
	Ahmed S. Darwish

The ->notify_ha_event() hook has long been removed from the libsas event
interface.

Remove it from documentation.

Fixes: 042ebd293b86 ("scsi: libsas: kill useless ha_event and do some cleanup")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: stable@vger.kernel.org
Cc: John Garry <john.garry@huawei.com>
Cc: Jason Yan <yanaijie@huawei.com>
---
 Documentation/scsi/libsas.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/scsi/libsas.rst b/Documentation/scsi/libsas.rst
index 7216b5d25800..f9b77c7879db 100644
--- a/Documentation/scsi/libsas.rst
+++ b/Documentation/scsi/libsas.rst
@@ -189,7 +189,6 @@ num_phys
 The event interface::
 
 	/* LLDD calls these to notify the class of an event. */
-	void (*notify_ha_event)(struct sas_ha_struct *, enum ha_event);
 	void (*notify_port_event)(struct sas_phy *, enum port_event);
 	void (*notify_phy_event)(struct sas_phy *, enum phy_event);
 
-- 
2.29.2


^ permalink raw reply	[relevance 99%]

* [PATCH 02/11] scsi: libsas: Introduce a _gfp() variant of event notifiers
  2020-12-18 20:43 88% [PATCH 00/11] scsi: libsas: Remove in_interrupt() check Ahmed S. Darwish
  2020-12-18 20:43 99% ` [PATCH 01/11] Documentation: scsi: libsas: Remove notify_ha_event() Ahmed S. Darwish
@ 2020-12-18 20:43 68% ` Ahmed S. Darwish
  2020-12-18 20:43 81% ` [PATCH 03/11] scsi: mvsas: Pass gfp_t flags to libsas " Ahmed S. Darwish
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-18 20:43 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, John Garry, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior,
	Ahmed S. Darwish

sas_alloc_event() uses in_interrupt() to decide which allocation should
be used.

The usage of in_interrupt() in drivers is phased out and Linus clearly
requested that code which changes behaviour depending on context should
either be separated or the context be conveyed in an argument passed by
the caller, which usually knows the context.

The in_interrupt() check is also only partially correct, because it
fails to choose the correct code path when just preemption or interrupts
are disabled. For example, as in the following call chain:

  drivers/scsi/mvsas/mv_sas.c :: mvs_work_queue() [process context]
  spin_lock_irqsave()
    -> sas_ha->notify_phy_event()
    == drivers/scsi/libsas/sas_event.c :: sas_notify_phy_event()
      -> sas_alloc_event()
        -> in_interrupt() = false
          -> invalid GFP_KERNEL allocation
    -> sas_ha->notify_port_event()
    == drivers/scsi/libsas/sas_event.c :: sas_notify_port_event()
      -> sas_alloc_event()
        -> in_interrupt() = false
          -> invalid GFP_KERNEL allocation

Introduce sas_alloc_event_gfp(), sas_ha_struct::notify_port_event_gfp(),
and sas_ha_struct::notify_phy_event_gfp(), which all behave like the non
_gfp() variants but use a caller passed GFP mask for allocations.

After all callers are converted to pass the GFP context, the non _gfp()
variants will be removed.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: stable@vger.kernel.org
Cc: John Garry <john.garry@huawei.com>
Cc: Jason Yan <yanaijie@huawei.com>
---
 Documentation/scsi/libsas.rst      |  2 +
 drivers/scsi/libsas/sas_event.c    | 65 +++++++++++++++++++++++++-----
 drivers/scsi/libsas/sas_init.c     | 20 ++++++---
 drivers/scsi/libsas/sas_internal.h |  2 +
 include/scsi/libsas.h              |  2 +
 5 files changed, 75 insertions(+), 16 deletions(-)

diff --git a/Documentation/scsi/libsas.rst b/Documentation/scsi/libsas.rst
index f9b77c7879db..dc85d0e4c107 100644
--- a/Documentation/scsi/libsas.rst
+++ b/Documentation/scsi/libsas.rst
@@ -191,6 +191,8 @@ The event interface::
 	/* LLDD calls these to notify the class of an event. */
 	void (*notify_port_event)(struct sas_phy *, enum port_event);
 	void (*notify_phy_event)(struct sas_phy *, enum phy_event);
+	void (*notify_port_event_gfp)(struct sas_phy *, enum port_event, gfp_t);
+	void (*notify_phy_event_gfp)(struct sas_phy *, enum phy_event, gfp_t);
 
 When sas_register_ha() returns, those are set and can be
 called by the LLDD to notify the SAS layer of such events
diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
index a1852f6c042b..ed5a8325dca7 100644
--- a/drivers/scsi/libsas/sas_event.c
+++ b/drivers/scsi/libsas/sas_event.c
@@ -131,18 +131,14 @@ static void sas_phy_event_worker(struct work_struct *work)
 	sas_free_event(ev);
 }
 
-static int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event)
+static int __sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event,
+				   struct asd_sas_event *ev)
 {
-	struct asd_sas_event *ev;
 	struct sas_ha_struct *ha = phy->ha;
 	int ret;
 
 	BUG_ON(event >= PORT_NUM_EVENTS);
 
-	ev = sas_alloc_event(phy);
-	if (!ev)
-		return -ENOMEM;
-
 	INIT_SAS_EVENT(ev, sas_port_event_worker, phy, event);
 
 	ret = sas_queue_event(event, &ev->work, ha);
@@ -152,18 +148,39 @@ static int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event)
 	return ret;
 }
 
-int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event)
+static int sas_notify_port_event_gfp(struct asd_sas_phy *phy,
+				     enum port_event event,
+				     gfp_t gfp_flags)
 {
 	struct asd_sas_event *ev;
+
+	ev = sas_alloc_event_gfp(phy, gfp_flags);
+	if (!ev)
+		return -ENOMEM;
+
+	return __sas_notify_port_event(phy, event, ev);
+}
+
+static int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event)
+{
+	struct asd_sas_event *ev;
+
+	ev = sas_alloc_event(phy);
+	if (!ev)
+		return -ENOMEM;
+
+	return __sas_notify_port_event(phy, event, ev);
+}
+
+static inline int __sas_notify_phy_event(struct asd_sas_phy *phy,
+					 enum phy_event event,
+					 struct asd_sas_event *ev)
+{
 	struct sas_ha_struct *ha = phy->ha;
 	int ret;
 
 	BUG_ON(event >= PHY_NUM_EVENTS);
 
-	ev = sas_alloc_event(phy);
-	if (!ev)
-		return -ENOMEM;
-
 	INIT_SAS_EVENT(ev, sas_phy_event_worker, phy, event);
 
 	ret = sas_queue_event(event, &ev->work, ha);
@@ -173,10 +190,36 @@ int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event)
 	return ret;
 }
 
+int sas_notify_phy_event_gfp(struct asd_sas_phy *phy, enum phy_event event,
+			     gfp_t gfp_flags)
+{
+	struct asd_sas_event *ev;
+
+	ev = sas_alloc_event_gfp(phy, gfp_flags);
+	if (!ev)
+		return -ENOMEM;
+
+	return __sas_notify_phy_event(phy, event, ev);
+}
+
+int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event)
+{
+	struct asd_sas_event *ev;
+
+	ev = sas_alloc_event(phy);
+	if (!ev)
+		return -ENOMEM;
+
+	return __sas_notify_phy_event(phy, event, ev);
+}
+
 int sas_init_events(struct sas_ha_struct *sas_ha)
 {
 	sas_ha->notify_port_event = sas_notify_port_event;
 	sas_ha->notify_phy_event = sas_notify_phy_event;
 
+	sas_ha->notify_port_event_gfp = sas_notify_port_event_gfp;
+	sas_ha->notify_phy_event_gfp = sas_notify_phy_event_gfp;
+
 	return 0;
 }
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index 21c43b18d5d5..9269d524158f 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -590,16 +590,15 @@ sas_domain_attach_transport(struct sas_domain_function_template *dft)
 }
 EXPORT_SYMBOL_GPL(sas_domain_attach_transport);
 
-
-struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy)
+static struct asd_sas_event * __sas_alloc_event(struct asd_sas_phy *phy,
+						gfp_t gfp_flags)
 {
 	struct asd_sas_event *event;
-	gfp_t flags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
 	struct sas_ha_struct *sas_ha = phy->ha;
 	struct sas_internal *i =
 		to_sas_internal(sas_ha->core.shost->transportt);
 
-	event = kmem_cache_zalloc(sas_event_cache, flags);
+	event = kmem_cache_zalloc(sas_event_cache, gfp_flags);
 	if (!event)
 		return NULL;
 
@@ -610,7 +609,7 @@ struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy)
 			if (cmpxchg(&phy->in_shutdown, 0, 1) == 0) {
 				pr_notice("The phy%d bursting events, shut it down.\n",
 					  phy->id);
-				sas_notify_phy_event(phy, PHYE_SHUTDOWN);
+				sas_notify_phy_event_gfp(phy, PHYE_SHUTDOWN, gfp_flags);
 			}
 		} else {
 			/* Do not support PHY control, stop allocating events */
@@ -624,6 +623,17 @@ struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy)
 	return event;
 }
 
+struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy)
+{
+	return __sas_alloc_event(phy, in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
+}
+
+struct asd_sas_event *sas_alloc_event_gfp(struct asd_sas_phy *phy,
+					  gfp_t gfp_flags)
+{
+	return __sas_alloc_event(phy, gfp_flags);
+}
+
 void sas_free_event(struct asd_sas_event *event)
 {
 	struct asd_sas_phy *phy = event->phy;
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index 1f1d01901978..437a697b6f73 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -49,6 +49,7 @@ int  sas_register_phys(struct sas_ha_struct *sas_ha);
 void sas_unregister_phys(struct sas_ha_struct *sas_ha);
 
 struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy);
+struct asd_sas_event *sas_alloc_event_gfp(struct asd_sas_phy *phy, gfp_t gfp_flags);
 void sas_free_event(struct asd_sas_event *event);
 
 int  sas_register_ports(struct sas_ha_struct *sas_ha);
@@ -78,6 +79,7 @@ int sas_smp_phy_control(struct domain_device *dev, int phy_id,
 int sas_smp_get_phy_events(struct sas_phy *phy);
 
 int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event);
+int sas_notify_phy_event_gfp(struct asd_sas_phy *phy, enum phy_event event, gfp_t flags);
 void sas_device_set_phy(struct domain_device *dev, struct sas_port *port);
 struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy);
 struct domain_device *sas_ex_to_ata(struct domain_device *ex_dev, int phy_id);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 4e2d61e8fb1e..f7c2530bbd9d 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -394,6 +394,8 @@ struct sas_ha_struct {
 	/* LLDD calls these to notify the class of an event. */
 	int (*notify_port_event)(struct asd_sas_phy *, enum port_event);
 	int (*notify_phy_event)(struct asd_sas_phy *, enum phy_event);
+	int (*notify_port_event_gfp)(struct asd_sas_phy *, enum port_event, gfp_t);
+	int (*notify_phy_event_gfp)(struct asd_sas_phy *, enum phy_event, gfp_t);
 
 	void *lldd_ha;		  /* not touched by sas class code */
 
-- 
2.29.2


^ permalink raw reply	[relevance 68%]

* [PATCH 03/11] scsi: mvsas: Pass gfp_t flags to libsas event notifiers
  2020-12-18 20:43 88% [PATCH 00/11] scsi: libsas: Remove in_interrupt() check Ahmed S. Darwish
  2020-12-18 20:43 99% ` [PATCH 01/11] Documentation: scsi: libsas: Remove notify_ha_event() Ahmed S. Darwish
  2020-12-18 20:43 68% ` [PATCH 02/11] scsi: libsas: Introduce a _gfp() variant of event notifiers Ahmed S. Darwish
@ 2020-12-18 20:43 81% ` Ahmed S. Darwish
  2020-12-18 20:43 73% ` [PATCH 04/11] scsi: isci: port: link down: Pass gfp_t flags Ahmed S. Darwish
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-18 20:43 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, John Garry, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior,
	Ahmed S. Darwish

mvsas calls the non _gfp version of the libsas event notifiers API,
leading to the buggy call chains below:

  mvsas/mv_sas.c :: mvs_work_queue() [process context]
  spin_lock_irqsave(mvs_info::lock, )
    -> sas_ha->notify_phy_event()
    == libsas/sas_event.c :: sas_notify_phy_event()
      -> sas_alloc_event()
        -> in_interrupt() = false
          -> invalid GFP_KERNEL allocation
    -> sas_ha->notify_port_event()
    == libsas/sas_event.c :: sas_notify_port_event()
      -> sas_alloc_event()
        -> in_interrupt() = false
          -> invalid GFP_KERNEL allocation

Use the new event notifiers API instead, which requires callers to
explicitly pass the gfp_t memory allocation flags.

Below are context analysis for the modified functions:

=> mvs_bytes_dmaed():

Since it is invoked from both process and atomic contexts, let its
callers pass the gfp_t flags. Call chains:

  scsi_scan.c: do_scsi_scan_host() [has msleep()]
    -> shost->hostt->scan_start()
    -> [mvsas/mv_init.c: Scsi_Host::scsi_host_template .scan_start = mvs_scan_start()]
    -> mvsas/mv_sas.c: mvs_scan_start()
      -> mvs_bytes_dmaed(..., GFP_KERNEL)

  mvsas/mv_sas.c: mvs_work_queue()
  spin_lock_irqsave(mvs_info::lock,)
    -> mvs_bytes_dmaed(..., GFP_ATOMIC)

  mvsas/mv_64xx.c: mvs_64xx_isr() || mvsas/mv_94xx.c: mvs_94xx_isr()
    -> mvsas/mv_chips.h: mvs_int_full()
      -> mvsas/mv_sas.c: mvs_int_port()
        -> mvs_bytes_dmaed(..., GFP_ATOMIC);

=> mvs_work_queue():

Invoked from process context, but it calls all the libsas event notifier
APIs under a spin_lock_irqsave(). Pass GFP_ATOMIC.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: stable@vger.kernel.org
Cc: John Garry <john.garry@huawei.com>
Cc: Jason Yan <yanaijie@huawei.com>
---
 drivers/scsi/mvsas/mv_sas.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index a920eced92ec..30a34c5bdf6b 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -216,7 +216,7 @@ void mvs_set_sas_addr(struct mvs_info *mvi, int port_id, u32 off_lo,
 	MVS_CHIP_DISP->write_port_cfg_data(mvi, port_id, hi);
 }
 
-static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
+static void mvs_bytes_dmaed(struct mvs_info *mvi, int i, gfp_t gfp_flags)
 {
 	struct mvs_phy *phy = &mvi->phy[i];
 	struct asd_sas_phy *sas_phy = &phy->sas_phy;
@@ -230,7 +230,7 @@ static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
 	}
 
 	sas_ha = mvi->sas;
-	sas_ha->notify_phy_event(sas_phy, PHYE_OOB_DONE);
+	sas_ha->notify_phy_event_gfp(sas_phy, PHYE_OOB_DONE, gfp_flags);
 
 	if (sas_phy->phy) {
 		struct sas_phy *sphy = sas_phy->phy;
@@ -262,8 +262,8 @@ static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
 
 	sas_phy->frame_rcvd_size = phy->frame_rcvd_size;
 
-	mvi->sas->notify_port_event(sas_phy,
-				   PORTE_BYTES_DMAED);
+	mvi->sas->notify_port_event_gfp(sas_phy,
+					PORTE_BYTES_DMAED, gfp_flags);
 }
 
 void mvs_scan_start(struct Scsi_Host *shost)
@@ -279,7 +279,7 @@ void mvs_scan_start(struct Scsi_Host *shost)
 	for (j = 0; j < core_nr; j++) {
 		mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[j];
 		for (i = 0; i < mvi->chip->n_phy; ++i)
-			mvs_bytes_dmaed(mvi, i);
+			mvs_bytes_dmaed(mvi, i, GFP_KERNEL);
 	}
 	mvs_prv->scan_finished = 1;
 }
@@ -1895,21 +1895,21 @@ static void mvs_work_queue(struct work_struct *work)
 			if (!(tmp & PHY_READY_MASK)) {
 				sas_phy_disconnected(sas_phy);
 				mvs_phy_disconnected(phy);
-				sas_ha->notify_phy_event(sas_phy,
-					PHYE_LOSS_OF_SIGNAL);
+				sas_ha->notify_phy_event_gfp(sas_phy,
+							     PHYE_LOSS_OF_SIGNAL, GFP_ATOMIC);
 				mv_dprintk("phy%d Removed Device\n", phy_no);
 			} else {
 				MVS_CHIP_DISP->detect_porttype(mvi, phy_no);
 				mvs_update_phyinfo(mvi, phy_no, 1);
-				mvs_bytes_dmaed(mvi, phy_no);
+				mvs_bytes_dmaed(mvi, phy_no, GFP_ATOMIC);
 				mvs_port_notify_formed(sas_phy, 0);
 				mv_dprintk("phy%d Attached Device\n", phy_no);
 			}
 		}
 	} else if (mwq->handler & EXP_BRCT_CHG) {
 		phy->phy_event &= ~EXP_BRCT_CHG;
-		sas_ha->notify_port_event(sas_phy,
-				PORTE_BROADCAST_RCVD);
+		sas_ha->notify_port_event_gfp(sas_phy,
+					      PORTE_BROADCAST_RCVD, GFP_ATOMIC);
 		mv_dprintk("phy%d Got Broadcast Change\n", phy_no);
 	}
 	list_del(&mwq->entry);
@@ -2026,7 +2026,7 @@ void mvs_int_port(struct mvs_info *mvi, int phy_no, u32 events)
 				mdelay(10);
 			}
 
-			mvs_bytes_dmaed(mvi, phy_no);
+			mvs_bytes_dmaed(mvi, phy_no, GFP_ATOMIC);
 			/* whether driver is going to handle hot plug */
 			if (phy->phy_event & PHY_PLUG_OUT) {
 				mvs_port_notify_formed(&phy->sas_phy, 0);
-- 
2.29.2


^ permalink raw reply	[relevance 81%]

* [PATCH 04/11] scsi: isci: port: link down: Pass gfp_t flags
  2020-12-18 20:43 88% [PATCH 00/11] scsi: libsas: Remove in_interrupt() check Ahmed S. Darwish
                   ` (2 preceding siblings ...)
  2020-12-18 20:43 81% ` [PATCH 03/11] scsi: mvsas: Pass gfp_t flags to libsas " Ahmed S. Darwish
@ 2020-12-18 20:43 73% ` Ahmed S. Darwish
  2020-12-18 20:43 85% ` [PATCH 05/11] scsi: isci: port: link up: " Ahmed S. Darwish
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-18 20:43 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, John Garry, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior,
	Ahmed S. Darwish

Use the new libsas event notifiers API, which requires callers to
explicitly pass the gfp_t memory allocation flags.

The libsas ->notify_phy_event() hook is exclusively called from
isci_port_link_down(). Below is the context analysis for all of its
call chains:

port.c: port_timeout(), atomic, timer callback                  (*)
spin_lock_irqsave(isci_host::scic_lock, )
  -> port_state_machine_change(..., SCI_PORT_FAILED)
    -> enter SCI port state: *SCI_PORT_FAILED*
      -> sci_port_failed_state_enter()
        -> isci_port_hard_reset_complete()
          -> isci_port_link_down()

port.c: isci_port_perform_hard_reset()
spin_lock_irqsave(isci_host::scic_lock, )
  -> port.c: sci_port_hard_reset(), atomic                      (*)
    -> phy.c: sci_phy_reset()
      -> sci_change_state(SCI_PHY_RESETTING)
        -> enter SCI PHY state: *SCI_PHY_RESETTING*
          -> sci_phy_resetting_state_enter()
            -> port.c: sci_port_deactivate_phy()
	      -> isci_port_link_down()

port.c: enter SCI port state: *SCI_PORT_READY*                  # Cont. from [1]
  -> sci_port_ready_state_enter()
    -> isci_port_hard_reset_complete()
      -> isci_port_link_down()

phy.c: enter SCI state: *SCI_PHY_STOPPED*                       # Cont. from [2]
  -> sci_phy_stopped_state_enter()
    -> host.c: sci_controller_link_down()
      -> ->link_down_handler()
      == port_config.c: sci_apc_agent_link_down()
        -> port.c: sci_port_remove_phy()
          -> sci_port_deactivate_phy()
            -> isci_port_link_down()
      == port_config.c: sci_mpc_agent_link_down()
        -> port.c: sci_port_link_down()
          -> sci_port_deactivate_phy()
            -> isci_port_link_down()

phy.c: enter SCI state: *SCI_PHY_STARTING*                      # Cont. from [3]
  -> sci_phy_starting_state_enter()
    -> host.c: sci_controller_link_down()
      -> ->link_down_handler()
      == port_config.c: sci_apc_agent_link_down()
        -> port.c: sci_port_remove_phy()
          -> isci_port_link_down()
      == port_config.c: sci_mpc_agent_link_down()
        -> port.c: sci_port_link_down()
          -> sci_port_deactivate_phy()
            -> isci_port_link_down()

[1] Call chains for 'enter SCI port state: *SCI_PORT_READY*'
------------------------------------------------------------

host.c: isci_host_init()                                        (@)
spin_lock_irq(isci_host::scic_lock)
  -> sci_controller_initialize(), atomic                        (*)
    -> port_config.c: sci_port_configuration_agent_initialize()
      -> sci_mpc_agent_validate_phy_configuration()
        -> port.c: sci_port_add_phy()
          -> sci_port_general_link_up_handler()
            -> port_state_machine_change(, SCI_PORT_READY)
              -> enter port state *SCI_PORT_READY*

host.c: isci_host_start()                                       (@)
spin_lock_irq(isci_host::scic_lock)
  -> host.c: sci_controller_start(), atomic                     (*)
    -> host.c: sci_port_start()
      -> port.c: port_state_machine_change(, SCI_PORT_READY)
        -> enter port state *SCI_PORT_READY*

port_config.c: apc_agent_timeout(), atomic, timer callback      (*)
  -> sci_apc_agent_configure_ports()
    -> port.c: sci_port_add_phy()
      -> sci_port_general_link_up_handler()
        -> port_state_machine_change(, SCI_PORT_READY)
          -> enter port state *SCI_PORT_READY*

port_config.c: mpc_agent_timeout(), atomic, timer callback      (*)
spin_lock_irqsave(isci_host::scic_lock, )
  -> ->link_up_handler()
  == port.c: sci_apc_agent_link_up()
    -> sci_port_general_link_up_handler()
      -> port_state_machine_change(, SCI_PORT_READY)
        -> enter port state *SCI_PORT_READY*
  == port.c: sci_mpc_agent_link_up()
    -> port.c: sci_port_link_up()
      -> sci_port_general_link_up_handler()
        -> port_state_machine_change(, SCI_PORT_READY)
          -> enter port state *SCI_PORT_READY*

phy.c: enter SCI state: SCI_PHY_SUB_FINAL                       # Cont. from [1A]
  -> sci_phy_starting_final_substate_enter()
    -> sci_change_state(SCI_PHY_READY)
      -> enter SCI state: *SCI_PHY_READY*
        -> sci_phy_ready_state_enter()
          -> host.c: sci_controller_link_up()
            -> port_agent.link_up_handler()
              -> port_config.c: [->link_up_handler = sci_apc_agent_link_up]
                 sci_apc_agent_link_up()
                  -> port.c: sci_port_link_up()
                    -> sci_port_general_link_up_handler()
                      -> port_state_machine_change(, SCI_PORT_READY)
                        -> enter port state *SCI_PORT_READY*
              -> port_config.c: [->link_up_handler = sci_mpc_agent_link_up]
                 sci_mpc_agent_link_up()
                  -> port.c: sci_port_link_up()
                    -> sci_port_general_link_up_handler()
                      -> port_state_machine_change(, SCI_PORT_READY)
                        -> enter port state *SCI_PORT_READY*

[1A] Call chains for entering SCI state: *SCI_PHY_SUB_FINAL*
------------------------------------------------------------

host.c: power_control_timeout(), atomic, timer callback         (*)
spin_lock_irqsave(isci_host::scic_lock, )
  -> phy.c: sci_phy_consume_power_handler()
    -> phy.c: sci_change_state(SCI_PHY_SUB_FINAL)

host.c: sci_controller_error_handler(): atomic, irq handler     (*)
OR host.c: sci_controller_completion_handler(), atomic, tasklet (*)
  -> sci_controller_process_completions()
    -> sci_controller_unsolicited_frame()
      -> phy.c: sci_phy_frame_handler()
        -> sci_change_state(SCI_PHY_SUB_AWAIT_SAS_POWER)
          -> sci_phy_starting_await_sas_power_substate_enter()
            -> host.c: sci_controller_power_control_queue_insert()
              -> phy.c: sci_phy_consume_power_handler()
                -> sci_change_state(SCI_PHY_SUB_FINAL)
        -> sci_change_state(SCI_PHY_SUB_FINAL)
    -> sci_controller_event_completion()
      -> phy.c: sci_phy_event_handler()
        -> sci_phy_start_sata_link_training()
          -> sci_change_state(SCI_PHY_SUB_AWAIT_SATA_POWER)
            -> sci_phy_starting_await_sata_power_substate_enter
              -> host.c: sci_controller_power_control_queue_insert()
                -> phy.c: sci_phy_consume_power_handler()
                  -> sci_change_state(SCI_PHY_SUB_FINAL)

[2] Call chains for entering state: *SCI_PHY_STOPPED*
-----------------------------------------------------

host.c: isci_host_init()                                        (@)
spin_lock_irq(isci_host::scic_lock)
  -> sci_controller_initialize(), atomic                        (*)
      -> phy.c: sci_phy_initialize()
        -> phy.c: sci_phy_link_layer_initialization()
          -> phy.c: sci_change_state(SCI_PHY_STOPPED)

init.c: PCI ->remove() || PM_OPS ->suspend,  process context    (+)
  -> host.c: isci_host_deinit()
    -> sci_controller_stop_phys()
      -> phy.c: sci_phy_stop()
	-> sci_change_state(SCI_PHY_STOPPED)

phy.c: isci_phy_control()
spin_lock_irqsave(isci_host::scic_lock, )
  -> sci_phy_stop(), atomic                                     (*)
    -> sci_change_state(SCI_PHY_STOPPED)

[3] Call chains for entering state: *SCI_PHY_STARTING*
------------------------------------------------------

phy.c: phy_sata_timeout(), atimer, timer callback               (*)
spin_lock_irqsave(isci_host::scic_lock, )
  -> sci_change_state(SCI_PHY_STARTING)

host.c: phy_startup_timeout(), atomic, timer callback           (*)
spin_lock_irqsave(isci_host::scic_lock, )
  -> sci_controller_start_next_phy()
    -> sci_phy_start()
      -> sci_change_state(SCI_PHY_STARTING)

host.c: isci_host_start()                                       (@)
spin_lock_irq(isci_host::scic_lock)
  -> sci_controller_start(), atomic                             (*)
    -> sci_controller_start_next_phy()
      -> sci_phy_start()
        -> sci_change_state(SCI_PHY_STARTING)

phy.c: Enter SCI state *SCI_PHY_SUB_FINAL*, atomic, check above (*)
  -> sci_change_state(SCI_PHY_SUB_FINAL)
    -> sci_phy_starting_final_substate_enter()
      -> sci_change_state(SCI_PHY_READY)
        -> Enter SCI state: *SCI_PHY_READY*
          -> sci_phy_ready_state_enter()
            -> host.c: sci_controller_link_up()
              -> sci_controller_start_next_phy()
                -> sci_phy_start()
                  -> sci_change_state(SCI_PHY_STARTING)

phy.c: sci_phy_event_handler(), atomic, discussed earlier       (*)
  -> sci_change_state(SCI_PHY_STARTING), 11 instances

phy.c: enter SCI state: *SCI_PHY_RESETTING*, atomic, discussed  (*)
  -> sci_phy_resetting_state_enter()
    -> sci_change_state(SCI_PHY_STARTING)

As can be seen from the "(*)" markers above, almost all the call-chains
are atomic. The only exception, marked with "(+)", is a PCI ->remove()
and PM_OPS ->suspend() cold path. Thus, pass GFP_ATOMIC to the libsas
phy event notifier.

Note, The now-replaced libsas APIs used in_interrupt() to implicitly
decide which memory allocation type to use.  This was only partially
correct, as it fails to choose the correct GFP flags when just
preemption or interrupts are disabled. Such buggy code paths are marked
with "(@)" in the call chains above.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: stable@vger.kernel.org
Cc: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/scsi/isci/port.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/isci/port.c b/drivers/scsi/isci/port.c
index 1df45f028ea7..c3a8c84b19a2 100644
--- a/drivers/scsi/isci/port.c
+++ b/drivers/scsi/isci/port.c
@@ -270,8 +270,9 @@ static void isci_port_link_down(struct isci_host *isci_host,
 	 * isci_port_deformed and isci_dev_gone functions.
 	 */
 	sas_phy_disconnected(&isci_phy->sas_phy);
-	isci_host->sas_ha.notify_phy_event(&isci_phy->sas_phy,
-					   PHYE_LOSS_OF_SIGNAL);
+	isci_host->sas_ha.notify_phy_event_gfp(&isci_phy->sas_phy,
+					       PHYE_LOSS_OF_SIGNAL,
+					       GFP_ATOMIC);
 
 	dev_dbg(&isci_host->pdev->dev,
 		"%s: isci_port = %p - Done\n", __func__, isci_port);
-- 
2.29.2


^ permalink raw reply	[relevance 73%]

* [PATCH 06/11] scsi: isci: port: broadcast change: Pass gfp_t flags
  2020-12-18 20:43 88% [PATCH 00/11] scsi: libsas: Remove in_interrupt() check Ahmed S. Darwish
                   ` (4 preceding siblings ...)
  2020-12-18 20:43 85% ` [PATCH 05/11] scsi: isci: port: link up: " Ahmed S. Darwish
@ 2020-12-18 20:43 78% ` Ahmed S. Darwish
  2020-12-18 20:43 97% ` [PATCH 07/11] scsi: libsas: Pass gfp_t flags to event notifiers Ahmed S. Darwish
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-18 20:43 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, John Garry, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior,
	Ahmed S. Darwish

Use the new libsas event notifiers API, which requires callers to
explicitly pass the gfp_t memory allocation flags.

The libsas ->notify_port_event() hook is called from
isci_port_bc_change_received(). Below is the context analysis for all of
its call chains:

host.c: sci_controller_error_handler(): atomic, irq handler     (*)
OR host.c: sci_controller_completion_handler(), atomic, tasklet (*)
  -> sci_controller_process_completions()
    -> sci_controller_event_completion()
      -> phy.c: sci_phy_event_handler()
        -> port.c: sci_port_broadcast_change_received()
          -> isci_port_bc_change_received()

host.c: isci_host_init()                                        (@)
spin_lock_irq(isci_host::scic_lock)
  -> sci_controller_initialize(), atomic                        (*)
    -> port_config.c: sci_port_configuration_agent_initialize()
      -> sci_mpc_agent_validate_phy_configuration()
        -> port.c: sci_port_add_phy()
          -> sci_port_set_phy()
            -> phy.c: sci_phy_set_port()
              -> port.c: sci_port_broadcast_change_received()
                -> isci_port_bc_change_received()

port_config.c: apc_agent_timeout(), atomic, timer callback      (*)
  -> sci_apc_agent_configure_ports()
    -> port.c: sci_port_add_phy()
      -> sci_port_set_phy()
        -> phy.c: sci_phy_set_port()
          -> port.c: sci_port_broadcast_change_received()
            -> isci_port_bc_change_received()

phy.c: enter SCI state: *SCI_PHY_STOPPED*                       # Cont. from [1]
  -> sci_phy_stopped_state_enter()
    -> host.c: sci_controller_link_down()
      -> ->link_down_handler()
      == port_config.c: sci_apc_agent_link_down()
        -> port.c: sci_port_remove_phy()
          -> sci_port_clear_phy()
            -> phy.c: sci_phy_set_port()
              -> port.c: sci_port_broadcast_change_received()
                -> isci_port_bc_change_received()

phy.c: enter SCI state: *SCI_PHY_STARTING*                      # Cont. from [2]
  -> sci_phy_starting_state_enter()
    -> host.c: sci_controller_link_down()
      -> ->link_down_handler()
      == port_config.c: sci_apc_agent_link_down()
        -> port.c: sci_port_remove_phy()
          -> sci_port_clear_phy()
            -> phy.c: sci_phy_set_port()
              -> port.c: sci_port_broadcast_change_received()
                -> isci_port_bc_change_received()

[1] Call chains for entering state: *SCI_PHY_STOPPED*
-----------------------------------------------------

host.c: isci_host_init()                                        (@)
spin_lock_irq(isci_host::scic_lock)
  -> sci_controller_initialize(), atomic                        (*)
      -> phy.c: sci_phy_initialize()
        -> phy.c: sci_phy_link_layer_initialization()
          -> phy.c: sci_change_state(SCI_PHY_STOPPED)

init.c: PCI ->remove() || PM_OPS ->suspend,  process context    (+)
  -> host.c: isci_host_deinit()
    -> sci_controller_stop_phys()
      -> phy.c: sci_phy_stop()
	-> sci_change_state(SCI_PHY_STOPPED)

phy.c: isci_phy_control()
spin_lock_irqsave(isci_host::scic_lock, )
  -> sci_phy_stop(), atomic                                     (*)
    -> sci_change_state(SCI_PHY_STOPPED)

[2] Call chains for entering state: *SCI_PHY_STARTING*
------------------------------------------------------

phy.c: phy_sata_timeout(), atimer, timer callback               (*)
spin_lock_irqsave(isci_host::scic_lock, )
  -> sci_change_state(SCI_PHY_STARTING)

host.c: phy_startup_timeout(), atomic, timer callback           (*)
spin_lock_irqsave(isci_host::scic_lock, )
  -> sci_controller_start_next_phy()
    -> sci_phy_start()
      -> sci_change_state(SCI_PHY_STARTING)

host.c: isci_host_start()                                       (@)
spin_lock_irq(isci_host::scic_lock)
  -> sci_controller_start(), atomic                             (*)
    -> sci_controller_start_next_phy()
      -> sci_phy_start()
        -> sci_change_state(SCI_PHY_STARTING)

phy.c: Enter SCI state *SCI_PHY_SUB_FINAL*                      # Cont. from [2A]
  -> sci_change_state(SCI_PHY_SUB_FINAL)
    -> sci_phy_starting_final_substate_enter()
      -> sci_change_state(SCI_PHY_READY)
        -> Enter SCI state: *SCI_PHY_READY*
          -> sci_phy_ready_state_enter()
            -> host.c: sci_controller_link_up()
              -> sci_controller_start_next_phy()
                -> sci_phy_start()
                  -> sci_change_state(SCI_PHY_STARTING)

phy.c: sci_phy_event_handler(), atomic, discussed earlier       (*)
  -> sci_change_state(SCI_PHY_STARTING), 11 instances

port.c: isci_port_perform_hard_reset()
spin_lock_irqsave(isci_host::scic_lock, )
  -> port.c: sci_port_hard_reset(), atomic                      (*)
    -> phy.c: sci_phy_reset()
      -> sci_change_state(SCI_PHY_RESETTING)
        -> enter SCI PHY state: *SCI_PHY_RESETTING*
          -> sci_phy_resetting_state_enter()
            -> sci_change_state(SCI_PHY_STARTING)

[2A] Call chains for entering SCI state: *SCI_PHY_SUB_FINAL*
------------------------------------------------------------

host.c: power_control_timeout(), atomic, timer callback         (*)
spin_lock_irqsave(isci_host::scic_lock, )
  -> phy.c: sci_phy_consume_power_handler()
    -> phy.c: sci_change_state(SCI_PHY_SUB_FINAL)

host.c: sci_controller_error_handler(): atomic, irq handler     (*)
OR host.c: sci_controller_completion_handler(), atomic, tasklet (*)
  -> sci_controller_process_completions()
    -> sci_controller_unsolicited_frame()
      -> phy.c: sci_phy_frame_handler()
        -> sci_change_state(SCI_PHY_SUB_AWAIT_SAS_POWER)
          -> sci_phy_starting_await_sas_power_substate_enter()
            -> host.c: sci_controller_power_control_queue_insert()
              -> phy.c: sci_phy_consume_power_handler()
                -> sci_change_state(SCI_PHY_SUB_FINAL)
        -> sci_change_state(SCI_PHY_SUB_FINAL)
    -> sci_controller_event_completion()
      -> phy.c: sci_phy_event_handler()
        -> sci_phy_start_sata_link_training()
          -> sci_change_state(SCI_PHY_SUB_AWAIT_SATA_POWER)
            -> sci_phy_starting_await_sata_power_substate_enter
              -> host.c: sci_controller_power_control_queue_insert()
                -> phy.c: sci_phy_consume_power_handler()
                  -> sci_change_state(SCI_PHY_SUB_FINAL)

As can be seen from the "(*)" markers above, almost all the call-chains
are atomic. The only exception, marked with "(+)", is a PCI ->remove()
and PM_OPS ->suspend() cold path. Thus, pass GFP_ATOMIC to the libsas
port event notifier.

Note, the now-replaced libsas APIs used in_interrupt() to implicitly
decide which memory allocation type to use.  This was only partially
correct, as it fails to choose the correct GFP flags when just
preemption or interrupts are disabled. Such buggy code paths are marked
with "(@)" in the call chains above.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: stable@vger.kernel.org
Cc: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/scsi/isci/port.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/isci/port.c b/drivers/scsi/isci/port.c
index 69684c80c407..6244981a3ebf 100644
--- a/drivers/scsi/isci/port.c
+++ b/drivers/scsi/isci/port.c
@@ -164,7 +164,9 @@ static void isci_port_bc_change_received(struct isci_host *ihost,
 		"%s: isci_phy = %p, sas_phy = %p\n",
 		__func__, iphy, &iphy->sas_phy);
 
-	ihost->sas_ha.notify_port_event(&iphy->sas_phy, PORTE_BROADCAST_RCVD);
+	ihost->sas_ha.notify_port_event_gfp(&iphy->sas_phy,
+					    PORTE_BROADCAST_RCVD,
+					    GFP_ATOMIC);
 	sci_port_bcn_enable(iport);
 }
 
-- 
2.29.2


^ permalink raw reply	[relevance 78%]

* [PATCH 05/11] scsi: isci: port: link up: Pass gfp_t flags
  2020-12-18 20:43 88% [PATCH 00/11] scsi: libsas: Remove in_interrupt() check Ahmed S. Darwish
                   ` (3 preceding siblings ...)
  2020-12-18 20:43 73% ` [PATCH 04/11] scsi: isci: port: link down: Pass gfp_t flags Ahmed S. Darwish
@ 2020-12-18 20:43 85% ` Ahmed S. Darwish
  2020-12-18 20:43 78% ` [PATCH 06/11] scsi: isci: port: broadcast change: " Ahmed S. Darwish
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-18 20:43 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, John Garry, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior,
	Ahmed S. Darwish

Use the new libsas event notifiers API, which requires callers to
explicitly pass the gfp_t memory allocation flags.

The libsas ->notify_port_event() hook is called from
isci_port_link_up().  Below is the context analysis for all of its call
chains:

host.c: isci_host_init()                                        (@)
spin_lock_irq(isci_host::scic_lock)
  -> sci_controller_initialize(), atomic                        (*)
    -> port_config.c: sci_port_configuration_agent_initialize()
      -> sci_mpc_agent_validate_phy_configuration()
        -> port.c: sci_port_add_phy()
          -> sci_port_general_link_up_handler()
            -> sci_port_activate_phy()
              -> isci_port_link_up()

port_config.c: apc_agent_timeout(), atomic, timer callback      (*)
  -> sci_apc_agent_configure_ports()
    -> port.c: sci_port_add_phy()
      -> sci_port_general_link_up_handler()
        -> sci_port_activate_phy()
          -> isci_port_link_up()

phy.c: enter SCI state: *SCI_PHY_SUB_FINAL*                     # Cont. from [1]
  -> phy.c: sci_phy_starting_final_substate_enter()
    -> phy.c: sci_change_state(SCI_PHY_READY)
      -> enter SCI state: *SCI_PHY_READY*
        -> phy.c: sci_phy_ready_state_enter()
          -> host.c: sci_controller_link_up()
            -> .link_up_handler()
            == port_config.c: sci_apc_agent_link_up()
              -> port.c: sci_port_link_up()
                -> (continue at [A])
            == port_config.c: sci_mpc_agent_link_up()
	      -> port.c: sci_port_link_up()
                -> (continue at [A])

port_config.c: mpc_agent_timeout(), atomic, timer callback      (*)
spin_lock_irqsave(isci_host::scic_lock, )
  -> ->link_up_handler()
  == port_config.c: sci_apc_agent_link_up()
    -> port.c: sci_port_link_up()
      -> (continue at [A])
  == port_config.c: sci_mpc_agent_link_up()
    -> port.c: sci_port_link_up()
      -> (continue at [A])

[A] port.c: sci_port_link_up()
  -> sci_port_activate_phy()
    -> isci_port_link_up()
  -> sci_port_general_link_up_handler()
    -> sci_port_activate_phy()
      -> isci_port_link_up()

[1] Call chains for entering SCI state: *SCI_PHY_SUB_FINAL*
-----------------------------------------------------------

host.c: power_control_timeout(), atomic, timer callback         (*)
spin_lock_irqsave(isci_host::scic_lock, )
  -> phy.c: sci_phy_consume_power_handler()
    -> phy.c: sci_change_state(SCI_PHY_SUB_FINAL)

host.c: sci_controller_error_handler(): atomic, irq handler     (*)
OR host.c: sci_controller_completion_handler(), atomic, tasklet (*)
  -> sci_controller_process_completions()
    -> sci_controller_unsolicited_frame()
      -> phy.c: sci_phy_frame_handler()
        -> sci_change_state(SCI_PHY_SUB_AWAIT_SAS_POWER)
          -> sci_phy_starting_await_sas_power_substate_enter()
            -> host.c: sci_controller_power_control_queue_insert()
              -> phy.c: sci_phy_consume_power_handler()
                -> sci_change_state(SCI_PHY_SUB_FINAL)
        -> sci_change_state(SCI_PHY_SUB_FINAL)
    -> sci_controller_event_completion()
      -> phy.c: sci_phy_event_handler()
        -> sci_phy_start_sata_link_training()
          -> sci_change_state(SCI_PHY_SUB_AWAIT_SATA_POWER)
            -> sci_phy_starting_await_sata_power_substate_enter
              -> host.c: sci_controller_power_control_queue_insert()
                -> phy.c: sci_phy_consume_power_handler()
                  -> sci_change_state(SCI_PHY_SUB_FINAL)

As can be seen from the "(*)" markers above, all the call-chains are
atomic.  Pass GFP_ATOMIC to libsas port event notifier.

Note, the now-replaced libsas APIs used in_interrupt() to implicitly
decide which memory allocation type to use.  This was only partially
correct, as it fails to choose the correct GFP flags when just
preemption or interrupts are disabled. Such buggy code paths are marked
with "(@)" in the call chains above.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: stable@vger.kernel.org
Cc: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/scsi/isci/port.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/isci/port.c b/drivers/scsi/isci/port.c
index c3a8c84b19a2..69684c80c407 100644
--- a/drivers/scsi/isci/port.c
+++ b/drivers/scsi/isci/port.c
@@ -223,8 +223,9 @@ static void isci_port_link_up(struct isci_host *isci_host,
 	/* Notify libsas that we have an address frame, if indeed
 	 * we've found an SSP, SMP, or STP target */
 	if (success)
-		isci_host->sas_ha.notify_port_event(&iphy->sas_phy,
-						    PORTE_BYTES_DMAED);
+		isci_host->sas_ha.notify_port_event_gfp(&iphy->sas_phy,
+							PORTE_BYTES_DMAED,
+							GFP_ATOMIC);
 }
 
 
-- 
2.29.2


^ permalink raw reply	[relevance 85%]

* [PATCH 07/11] scsi: libsas: Pass gfp_t flags to event notifiers
  2020-12-18 20:43 88% [PATCH 00/11] scsi: libsas: Remove in_interrupt() check Ahmed S. Darwish
                   ` (5 preceding siblings ...)
  2020-12-18 20:43 78% ` [PATCH 06/11] scsi: isci: port: broadcast change: " Ahmed S. Darwish
@ 2020-12-18 20:43 97% ` Ahmed S. Darwish
  2020-12-18 20:43 58% ` [PATCH 08/11] scsi: pm80xx: Pass gfp_t flags to libsas " Ahmed S. Darwish
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-18 20:43 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, John Garry, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior,
	Ahmed S. Darwish

Use the new libsas event notifiers API, which requires callers to
explicitly pass the gfp_t memory allocation flags.

Context analysis:

  - sas_enable_revalidation(): process, acquires mutex
  - sas_resume_ha(): process, calls wait_event_timeout()

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: John Garry <john.garry@huawei.com>
Cc: Jason Yan <yanaijie@huawei.com>
---
 drivers/scsi/libsas/sas_event.c | 2 +-
 drivers/scsi/libsas/sas_init.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
index ed5a8325dca7..31b733eeabf6 100644
--- a/drivers/scsi/libsas/sas_event.c
+++ b/drivers/scsi/libsas/sas_event.c
@@ -109,7 +109,7 @@ void sas_enable_revalidation(struct sas_ha_struct *ha)
 
 		sas_phy = container_of(port->phy_list.next, struct asd_sas_phy,
 				port_phy_el);
-		ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		ha->notify_port_event_gfp(sas_phy, PORTE_BROADCAST_RCVD, GFP_KERNEL);
 	}
 	mutex_unlock(&ha->disco_mutex);
 }
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index 9269d524158f..2d2116f827c6 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -410,7 +410,7 @@ void sas_resume_ha(struct sas_ha_struct *ha)
 
 		if (phy->suspended) {
 			dev_warn(&phy->phy->dev, "resume timeout\n");
-			sas_notify_phy_event(phy, PHYE_RESUME_TIMEOUT);
+			sas_notify_phy_event_gfp(phy, PHYE_RESUME_TIMEOUT, GFP_KERNEL);
 		}
 	}
 
-- 
2.29.2


^ permalink raw reply	[relevance 97%]

* [PATCH 08/11] scsi: pm80xx: Pass gfp_t flags to libsas event notifiers
  2020-12-18 20:43 88% [PATCH 00/11] scsi: libsas: Remove in_interrupt() check Ahmed S. Darwish
                   ` (6 preceding siblings ...)
  2020-12-18 20:43 97% ` [PATCH 07/11] scsi: libsas: Pass gfp_t flags to event notifiers Ahmed S. Darwish
@ 2020-12-18 20:43 58% ` Ahmed S. Darwish
  2020-12-18 20:43 85% ` [PATCH 09/11] scsi: aic94xx: " Ahmed S. Darwish
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-18 20:43 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, John Garry, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior,
	Ahmed S. Darwish

Use the new libsas event notifiers API, which requires callers to
explicitly pass the gfp_t memory allocation flags.

Call chain analysis, pm8001_hwi.c:

  pm8001_interrupt_handler_msix() || pm8001_interrupt_handler_intx() || pm8001_tasklet()
    -> PM8001_CHIP_DISP->isr() = pm80xx_chip_isr()
      -> process_oq [spin_lock_irqsave(&pm8001_ha->lock, ...)]
        -> process_one_iomb()
          -> mpi_hw_event()
            -> hw_event_sas_phy_up()
              -> pm8001_bytes_dmaed()
            -> hw_event_sata_phy_up
              -> pm8001_bytes_dmaed()

All functions are invoked by process_one_iomb(), which is invoked by the
interrupt service routine and the tasklet handler. A similar call chain
is also found at pm80xx_hwi.c. Pass GFP_ATOMIC.

For pm8001_sas.c, pm8001_phy_control() runs in task context as it calls
wait_for_completion() and msleep().  Pass GFP_KERNEL.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: Jack Wang <jinpu.wang@cloud.ionos.com>
---
 drivers/scsi/pm8001/pm8001_hwi.c | 38 ++++++++++++++++----------------
 drivers/scsi/pm8001/pm8001_sas.c |  8 +++----
 drivers/scsi/pm8001/pm80xx_hwi.c | 30 ++++++++++++-------------
 3 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index 2b7b2954ec31..36eb5cc6f2fa 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -3306,7 +3306,7 @@ void pm8001_bytes_dmaed(struct pm8001_hba_info *pm8001_ha, int i)
 	PM8001_MSG_DBG(pm8001_ha, pm8001_printk("phy %d byte dmaded.\n", i));
 
 	sas_phy->frame_rcvd_size = phy->frame_rcvd_size;
-	pm8001_ha->sas->notify_port_event(sas_phy, PORTE_BYTES_DMAED);
+	pm8001_ha->sas->notify_port_event_gfp(sas_phy, PORTE_BYTES_DMAED, GFP_ATOMIC);
 }
 
 /* Get the link rate speed  */
@@ -3467,7 +3467,7 @@ hw_event_sas_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	else if (phy->identify.device_type != SAS_PHY_UNUSED)
 		phy->identify.target_port_protocols = SAS_PROTOCOL_SMP;
 	phy->sas_phy.oob_mode = SAS_OOB_MODE;
-	sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE);
+	sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_OOB_DONE, GFP_ATOMIC);
 	spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags);
 	memcpy(phy->frame_rcvd, &pPayload->sas_identify,
 		sizeof(struct sas_identify_frame)-4);
@@ -3512,7 +3512,7 @@ hw_event_sata_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	phy->phy_type |= PORT_TYPE_SATA;
 	phy->phy_attached = 1;
 	phy->sas_phy.oob_mode = SATA_OOB_MODE;
-	sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE);
+	sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_OOB_DONE, GFP_ATOMIC);
 	spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags);
 	memcpy(phy->frame_rcvd, ((u8 *)&pPayload->sata_fis - 4),
 		sizeof(struct dev_to_host_fis));
@@ -3879,12 +3879,12 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb)
 	case HW_EVENT_SATA_SPINUP_HOLD:
 		PM8001_MSG_DBG(pm8001_ha,
 			pm8001_printk("HW_EVENT_SATA_SPINUP_HOLD\n"));
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD);
+		sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_SPINUP_HOLD, GFP_ATOMIC);
 		break;
 	case HW_EVENT_PHY_DOWN:
 		PM8001_MSG_DBG(pm8001_ha,
 			pm8001_printk("HW_EVENT_PHY_DOWN\n"));
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL);
+		sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL, GFP_ATOMIC);
 		phy->phy_attached = 0;
 		phy->phy_state = 0;
 		hw_event_phy_down(pm8001_ha, piomb);
@@ -3894,7 +3894,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb)
 			pm8001_printk("HW_EVENT_PORT_INVALID\n"));
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		break;
 	/* the broadcast change primitive received, tell the LIBSAS this event
 	to revalidate the sas domain*/
@@ -3906,14 +3906,14 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb)
 		spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
 		sas_phy->sas_prim = HW_EVENT_BROADCAST_CHANGE;
 		spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
-		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_BROADCAST_RCVD, GFP_ATOMIC);
 		break;
 	case HW_EVENT_PHY_ERROR:
 		PM8001_MSG_DBG(pm8001_ha,
 			pm8001_printk("HW_EVENT_PHY_ERROR\n"));
 		sas_phy_disconnected(&phy->sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR);
+		sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_OOB_ERROR, GFP_ATOMIC);
 		break;
 	case HW_EVENT_BROADCAST_EXP:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3921,7 +3921,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb)
 		spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
 		sas_phy->sas_prim = HW_EVENT_BROADCAST_EXP;
 		spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
-		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_BROADCAST_RCVD, GFP_ATOMIC);
 		break;
 	case HW_EVENT_LINK_ERR_INVALID_DWORD:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3930,7 +3930,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb)
 			HW_EVENT_LINK_ERR_INVALID_DWORD, port_id, phy_id, 0, 0);
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		break;
 	case HW_EVENT_LINK_ERR_DISPARITY_ERROR:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3940,7 +3940,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb)
 			port_id, phy_id, 0, 0);
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		break;
 	case HW_EVENT_LINK_ERR_CODE_VIOLATION:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3950,7 +3950,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb)
 			port_id, phy_id, 0, 0);
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		break;
 	case HW_EVENT_LINK_ERR_LOSS_OF_DWORD_SYNCH:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3960,7 +3960,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb)
 			port_id, phy_id, 0, 0);
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		break;
 	case HW_EVENT_MALFUNCTION:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3972,7 +3972,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb)
 		spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
 		sas_phy->sas_prim = HW_EVENT_BROADCAST_SES;
 		spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
-		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_BROADCAST_RCVD, GFP_ATOMIC);
 		break;
 	case HW_EVENT_INBOUND_CRC_ERROR:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3984,14 +3984,14 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb)
 	case HW_EVENT_HARD_RESET_RECEIVED:
 		PM8001_MSG_DBG(pm8001_ha,
 			pm8001_printk("HW_EVENT_HARD_RESET_RECEIVED\n"));
-		sas_ha->notify_port_event(sas_phy, PORTE_HARD_RESET);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_HARD_RESET, GFP_ATOMIC);
 		break;
 	case HW_EVENT_ID_FRAME_TIMEOUT:
 		PM8001_MSG_DBG(pm8001_ha,
 			pm8001_printk("HW_EVENT_ID_FRAME_TIMEOUT\n"));
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		break;
 	case HW_EVENT_LINK_ERR_PHY_RESET_FAILED:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -4001,21 +4001,21 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb)
 			port_id, phy_id, 0, 0);
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		break;
 	case HW_EVENT_PORT_RESET_TIMER_TMO:
 		PM8001_MSG_DBG(pm8001_ha,
 			pm8001_printk("HW_EVENT_PORT_RESET_TIMER_TMO\n"));
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		break;
 	case HW_EVENT_PORT_RECOVERY_TIMER_TMO:
 		PM8001_MSG_DBG(pm8001_ha,
 			pm8001_printk("HW_EVENT_PORT_RECOVERY_TIMER_TMO\n"));
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		break;
 	case HW_EVENT_PORT_RECOVER:
 		PM8001_MSG_DBG(pm8001_ha,
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 9889bab7d31c..8850e62550a3 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -209,8 +209,8 @@ int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
 				PHY_STATE_LINK_UP_SPCV) {
 				sas_ha = pm8001_ha->sas;
 				sas_phy_disconnected(&phy->sas_phy);
-				sas_ha->notify_phy_event(&phy->sas_phy,
-					PHYE_LOSS_OF_SIGNAL);
+				sas_ha->notify_phy_event_gfp(&phy->sas_phy,
+							     PHYE_LOSS_OF_SIGNAL, GFP_KERNEL);
 				phy->phy_attached = 0;
 			}
 		} else {
@@ -218,8 +218,8 @@ int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
 				PHY_STATE_LINK_UP_SPC) {
 				sas_ha = pm8001_ha->sas;
 				sas_phy_disconnected(&phy->sas_phy);
-				sas_ha->notify_phy_event(&phy->sas_phy,
-					PHYE_LOSS_OF_SIGNAL);
+				sas_ha->notify_phy_event_gfp(&phy->sas_phy,
+							     PHYE_LOSS_OF_SIGNAL, GFP_KERNEL);
 				phy->phy_attached = 0;
 			}
 		}
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index 7593f248afb2..a6fd08ae4402 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -3355,7 +3355,7 @@ hw_event_sas_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	else if (phy->identify.device_type != SAS_PHY_UNUSED)
 		phy->identify.target_port_protocols = SAS_PROTOCOL_SMP;
 	phy->sas_phy.oob_mode = SAS_OOB_MODE;
-	sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE);
+	sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_OOB_DONE, GFP_ATOMIC);
 	spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags);
 	memcpy(phy->frame_rcvd, &pPayload->sas_identify,
 		sizeof(struct sas_identify_frame)-4);
@@ -3403,7 +3403,7 @@ hw_event_sata_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	phy->phy_type |= PORT_TYPE_SATA;
 	phy->phy_attached = 1;
 	phy->sas_phy.oob_mode = SATA_OOB_MODE;
-	sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE);
+	sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_OOB_DONE, GFP_ATOMIC);
 	spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags);
 	memcpy(phy->frame_rcvd, ((u8 *)&pPayload->sata_fis - 4),
 		sizeof(struct dev_to_host_fis));
@@ -3489,7 +3489,7 @@ hw_event_phy_down(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	if (port_sata && (portstate != PORT_IN_RESET)) {
 		struct sas_ha_struct *sas_ha = pm8001_ha->sas;
 
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL);
+		sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL, GFP_ATOMIC);
 	}
 }
 
@@ -3591,7 +3591,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	case HW_EVENT_SATA_SPINUP_HOLD:
 		PM8001_MSG_DBG(pm8001_ha,
 			pm8001_printk("HW_EVENT_SATA_SPINUP_HOLD\n"));
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD);
+		sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_SPINUP_HOLD, GFP_ATOMIC);
 		break;
 	case HW_EVENT_PHY_DOWN:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3610,7 +3610,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 			pm8001_printk("HW_EVENT_PORT_INVALID\n"));
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		break;
 	/* the broadcast change primitive received, tell the LIBSAS this event
 	to revalidate the sas domain*/
@@ -3622,14 +3622,14 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 		spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
 		sas_phy->sas_prim = HW_EVENT_BROADCAST_CHANGE;
 		spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
-		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_BROADCAST_RCVD, GFP_ATOMIC);
 		break;
 	case HW_EVENT_PHY_ERROR:
 		PM8001_MSG_DBG(pm8001_ha,
 			pm8001_printk("HW_EVENT_PHY_ERROR\n"));
 		sas_phy_disconnected(&phy->sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR);
+		sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_OOB_ERROR, GFP_ATOMIC);
 		break;
 	case HW_EVENT_BROADCAST_EXP:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3637,7 +3637,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 		spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
 		sas_phy->sas_prim = HW_EVENT_BROADCAST_EXP;
 		spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
-		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_BROADCAST_RCVD, GFP_ATOMIC);
 		break;
 	case HW_EVENT_LINK_ERR_INVALID_DWORD:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3676,7 +3676,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 		spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
 		sas_phy->sas_prim = HW_EVENT_BROADCAST_SES;
 		spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
-		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_BROADCAST_RCVD, GFP_ATOMIC);
 		break;
 	case HW_EVENT_INBOUND_CRC_ERROR:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3688,14 +3688,14 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	case HW_EVENT_HARD_RESET_RECEIVED:
 		PM8001_MSG_DBG(pm8001_ha,
 			pm8001_printk("HW_EVENT_HARD_RESET_RECEIVED\n"));
-		sas_ha->notify_port_event(sas_phy, PORTE_HARD_RESET);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_HARD_RESET, GFP_ATOMIC);
 		break;
 	case HW_EVENT_ID_FRAME_TIMEOUT:
 		PM8001_MSG_DBG(pm8001_ha,
 			pm8001_printk("HW_EVENT_ID_FRAME_TIMEOUT\n"));
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		break;
 	case HW_EVENT_LINK_ERR_PHY_RESET_FAILED:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3705,7 +3705,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 			port_id, phy_id, 0, 0);
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		break;
 	case HW_EVENT_PORT_RESET_TIMER_TMO:
 		PM8001_MSG_DBG(pm8001_ha,
@@ -3714,7 +3714,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 			port_id, phy_id, 0, 0);
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
-		sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 		if (pm8001_ha->phy[phy_id].reset_completion) {
 			pm8001_ha->phy[phy_id].port_reset_status =
 					PORT_RESET_TMO;
@@ -3731,8 +3731,8 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 		for (i = 0; i < pm8001_ha->chip->n_phy; i++) {
 			if (port->wide_port_phymap & (1 << i)) {
 				phy = &pm8001_ha->phy[i];
-				sas_ha->notify_phy_event(&phy->sas_phy,
-						PHYE_LOSS_OF_SIGNAL);
+				sas_ha->notify_phy_event_gfp(&phy->sas_phy,
+							     PHYE_LOSS_OF_SIGNAL, GFP_ATOMIC);
 				port->wide_port_phymap &= ~(1 << i);
 			}
 		}
-- 
2.29.2


^ permalink raw reply	[relevance 58%]

* [PATCH 10/11] scsi: hisi_sas: Pass gfp_t flags to libsas event notifiers
  2020-12-18 20:43 88% [PATCH 00/11] scsi: libsas: Remove in_interrupt() check Ahmed S. Darwish
                   ` (8 preceding siblings ...)
  2020-12-18 20:43 85% ` [PATCH 09/11] scsi: aic94xx: " Ahmed S. Darwish
@ 2020-12-18 20:43 70% ` Ahmed S. Darwish
  2020-12-18 20:43 75% ` [PATCH 11/11] scsi: libsas: event notifiers: Remove non _gfp() variants Ahmed S. Darwish
  10 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-18 20:43 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, John Garry, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior,
	Ahmed S. Darwish

Use the new libsas event notifiers API, which requires callers to
explicitly pass the gfp_t memory allocation flags.

Below are the context analysis for modified functions:

=> hisi_sas_bytes_dmaed():

Since it is invoked from both process and atomic contexts, let its
callers pass the gfp_t flags:

  * hisi_sas_main.c:
  ------------------

    hisi_sas_phyup_work(): workqueue context
      -> hisi_sas_bytes_dmaed(..., GFP_KERNEL)

    hisi_sas_controller_reset_done(): has an msleep()
      -> hisi_sas_rescan_topology()
        -> hisi_sas_phy_down()
          -> hisi_sas_bytes_dmaed(..., GFP_KERNEL)

    hisi_sas_debug_I_T_nexus_reset(): calls wait_for_completion_timeout()
      -> hisi_sas_phy_down()
        -> hisi_sas_bytes_dmaed(..., GFP_KERNEL)

  * hisi_sas_v1_hw.c:
  -------------------

    int_abnormal_v1_hw(): irq handler
      -> hisi_sas_phy_down()
        -> hisi_sas_bytes_dmaed(..., GFP_ATOMIC)

  * hisi_sas_v[23]_hw.c:
  ----------------------

    int_phy_updown_v[23]_hw(): irq handler
      -> phy_down_v[23]_hw()
        -> hisi_sas_phy_down()
          -> hisi_sas_bytes_dmaed(..., GFP_ATOMIC)

=> int_bcast_v1_hw() and phy_bcast_v3_hw():

Both are invoked exclusively from irq handlers. Pass GFP_ATOMIC.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: John Garry <john.garry@huawei.com>
---
 drivers/scsi/hisi_sas/hisi_sas.h       |  3 ++-
 drivers/scsi/hisi_sas/hisi_sas_main.c  | 26 +++++++++++++++-----------
 drivers/scsi/hisi_sas/hisi_sas_v1_hw.c |  5 +++--
 drivers/scsi/hisi_sas/hisi_sas_v2_hw.c |  5 +++--
 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c |  5 +++--
 5 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index a25cfc11c96d..e08c71bf607d 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -658,7 +658,8 @@ extern void hisi_sas_scan_start(struct Scsi_Host *shost);
 extern int hisi_sas_host_reset(struct Scsi_Host *shost, int reset_type);
 extern void hisi_sas_phy_enable(struct hisi_hba *hisi_hba, int phy_no,
 				int enable);
-extern void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy);
+extern void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy,
+			      gfp_t gfp_flags);
 extern void hisi_sas_slot_task_free(struct hisi_hba *hisi_hba,
 				    struct sas_task *task,
 				    struct hisi_sas_slot *slot);
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 274ccf18ce2d..f9332f62739b 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -618,7 +618,8 @@ static int hisi_sas_task_exec(struct sas_task *task, gfp_t gfp_flags,
 	return rc;
 }
 
-static void hisi_sas_bytes_dmaed(struct hisi_hba *hisi_hba, int phy_no)
+static void hisi_sas_bytes_dmaed(struct hisi_hba *hisi_hba, int phy_no,
+				 gfp_t gfp_flags)
 {
 	struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no];
 	struct asd_sas_phy *sas_phy = &phy->sas_phy;
@@ -634,7 +635,7 @@ static void hisi_sas_bytes_dmaed(struct hisi_hba *hisi_hba, int phy_no)
 	}
 
 	sas_ha = &hisi_hba->sha;
-	sas_ha->notify_phy_event(sas_phy, PHYE_OOB_DONE);
+	sas_ha->notify_phy_event_gfp(sas_phy, PHYE_OOB_DONE, gfp_flags);
 
 	if (sas_phy->phy) {
 		struct sas_phy *sphy = sas_phy->phy;
@@ -662,7 +663,7 @@ static void hisi_sas_bytes_dmaed(struct hisi_hba *hisi_hba, int phy_no)
 	}
 
 	sas_phy->frame_rcvd_size = phy->frame_rcvd_size;
-	sas_ha->notify_port_event(sas_phy, PORTE_BYTES_DMAED);
+	sas_ha->notify_port_event_gfp(sas_phy, PORTE_BYTES_DMAED, gfp_flags);
 }
 
 static struct hisi_sas_device *hisi_sas_alloc_dev(struct domain_device *device)
@@ -868,7 +869,7 @@ static void hisi_sas_phyup_work(struct work_struct *work)
 
 	if (phy->identify.target_port_protocols == SAS_PROTOCOL_SSP)
 		hisi_hba->hw->sl_notify_ssp(hisi_hba, phy_no);
-	hisi_sas_bytes_dmaed(hisi_hba, phy_no);
+	hisi_sas_bytes_dmaed(hisi_hba, phy_no, GFP_KERNEL);
 }
 
 static void hisi_sas_linkreset_work(struct work_struct *work)
@@ -1438,11 +1439,12 @@ static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state)
 				_sas_port = sas_port;
 
 				if (dev_is_expander(dev->dev_type))
-					sas_ha->notify_port_event(sas_phy,
-							PORTE_BROADCAST_RCVD);
+					sas_ha->notify_port_event_gfp(sas_phy,
+								      PORTE_BROADCAST_RCVD,
+								      GFP_KERNEL);
 			}
 		} else {
-			hisi_sas_phy_down(hisi_hba, phy_no, 0);
+			hisi_sas_phy_down(hisi_hba, phy_no, 0, GFP_KERNEL);
 		}
 	}
 }
@@ -1796,7 +1798,7 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
 
 		/* report PHY down if timed out */
 		if (!ret)
-			hisi_sas_phy_down(hisi_hba, sas_phy->id, 0);
+			hisi_sas_phy_down(hisi_hba, sas_phy->id, 0, GFP_KERNEL);
 	} else if (sas_dev->dev_status != HISI_SAS_DEV_INIT) {
 		/*
 		 * If in init state, we rely on caller to wait for link to be
@@ -2196,7 +2198,8 @@ static void hisi_sas_phy_disconnected(struct hisi_sas_phy *phy)
 	spin_unlock_irqrestore(&phy->lock, flags);
 }
 
-void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy)
+void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy,
+		       gfp_t gfp_flags)
 {
 	struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no];
 	struct asd_sas_phy *sas_phy = &phy->sas_phy;
@@ -2205,7 +2208,7 @@ void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy)
 
 	if (rdy) {
 		/* Phy down but ready */
-		hisi_sas_bytes_dmaed(hisi_hba, phy_no);
+		hisi_sas_bytes_dmaed(hisi_hba, phy_no, gfp_flags);
 		hisi_sas_port_notify_formed(sas_phy);
 	} else {
 		struct hisi_sas_port *port  = phy->port;
@@ -2216,7 +2219,8 @@ void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy)
 			return;
 		}
 		/* Phy down and not ready */
-		sas_ha->notify_phy_event(sas_phy, PHYE_LOSS_OF_SIGNAL);
+		sas_ha->notify_phy_event_gfp(sas_phy, PHYE_LOSS_OF_SIGNAL,
+					     gfp_flags);
 		sas_phy_disconnected(sas_phy);
 
 		if (port) {
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
index 45e866cb9164..56b2ca5544e1 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
@@ -1424,7 +1424,7 @@ static irqreturn_t int_bcast_v1_hw(int irq, void *p)
 	}
 
 	if (!test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags))
-		sha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sha->notify_port_event_gfp(sas_phy, PORTE_BROADCAST_RCVD, GFP_ATOMIC);
 
 end:
 	hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT2,
@@ -1453,7 +1453,8 @@ static irqreturn_t int_abnormal_v1_hw(int irq, void *p)
 		u32 phy_state = hisi_sas_read32(hisi_hba, PHY_STATE);
 
 		hisi_sas_phy_down(hisi_hba, phy_no,
-				  (phy_state & 1 << phy_no) ? 1 : 0);
+				  (phy_state & 1 << phy_no) ? 1 : 0,
+				  GFP_ATOMIC);
 	}
 
 	if (irq_value & CHL_INT0_ID_TIMEOUT_MSK)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index b57177b52fac..a8cc89abcc6b 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -2734,7 +2734,8 @@ static int phy_down_v2_hw(int phy_no, struct hisi_hba *hisi_hba)
 
 	phy_state = hisi_sas_read32(hisi_hba, PHY_STATE);
 	dev_info(dev, "phydown: phy%d phy_state=0x%x\n", phy_no, phy_state);
-	hisi_sas_phy_down(hisi_hba, phy_no, (phy_state & 1 << phy_no) ? 1 : 0);
+	hisi_sas_phy_down(hisi_hba, phy_no, (phy_state & 1 << phy_no) ? 1 : 0,
+			  GFP_ATOMIC);
 
 	sl_ctrl = hisi_sas_phy_read32(hisi_hba, phy_no, SL_CONTROL);
 	hisi_sas_phy_write32(hisi_hba, phy_no, SL_CONTROL,
@@ -2825,7 +2826,7 @@ static void phy_bcast_v2_hw(int phy_no, struct hisi_hba *hisi_hba)
 	bcast_status = hisi_sas_phy_read32(hisi_hba, phy_no, RX_PRIMS_STATUS);
 	if ((bcast_status & RX_BCAST_CHG_MSK) &&
 	    !test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags))
-		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_BROADCAST_RCVD, GFP_ATOMIC);
 	hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT0,
 			     CHL_INT0_SL_RX_BCST_ACK_MSK);
 	hisi_sas_phy_write32(hisi_hba, phy_no, SL_RX_BCAST_CHK_MSK, 0);
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 960de375ce69..efba79252ddc 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -1578,7 +1578,8 @@ static irqreturn_t phy_down_v3_hw(int phy_no, struct hisi_hba *hisi_hba)
 
 	phy_state = hisi_sas_read32(hisi_hba, PHY_STATE);
 	dev_info(dev, "phydown: phy%d phy_state=0x%x\n", phy_no, phy_state);
-	hisi_sas_phy_down(hisi_hba, phy_no, (phy_state & 1 << phy_no) ? 1 : 0);
+	hisi_sas_phy_down(hisi_hba, phy_no, (phy_state & 1 << phy_no) ? 1 : 0,
+			  GFP_ATOMIC);
 
 	sl_ctrl = hisi_sas_phy_read32(hisi_hba, phy_no, SL_CONTROL);
 	hisi_sas_phy_write32(hisi_hba, phy_no, SL_CONTROL,
@@ -1605,7 +1606,7 @@ static irqreturn_t phy_bcast_v3_hw(int phy_no, struct hisi_hba *hisi_hba)
 	bcast_status = hisi_sas_phy_read32(hisi_hba, phy_no, RX_PRIMS_STATUS);
 	if ((bcast_status & RX_BCAST_CHG_MSK) &&
 	    !test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags))
-		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_BROADCAST_RCVD, GFP_ATOMIC);
 	hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT0,
 			     CHL_INT0_SL_RX_BCST_ACK_MSK);
 	hisi_sas_phy_write32(hisi_hba, phy_no, SL_RX_BCAST_CHK_MSK, 0);
-- 
2.29.2


^ permalink raw reply	[relevance 70%]

* [PATCH 11/11] scsi: libsas: event notifiers: Remove non _gfp() variants
  2020-12-18 20:43 88% [PATCH 00/11] scsi: libsas: Remove in_interrupt() check Ahmed S. Darwish
                   ` (9 preceding siblings ...)
  2020-12-18 20:43 70% ` [PATCH 10/11] scsi: hisi_sas: " Ahmed S. Darwish
@ 2020-12-18 20:43 75% ` Ahmed S. Darwish
    10 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-12-18 20:43 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, John Garry, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior,
	Ahmed S. Darwish

All call-sites of below libsas APIs:

  - sas_alloc_event()
  - sas_ha_struct::notify_port_event()
  - sas_ha_struct::notify_phy_event()

have been converted to use the new _gfp()-suffixed version.

Remove the old APIs from libsas code, headers, and documentation.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: John Garry <john.garry@huawei.com>
Cc: Jason Yan <yanaijie@huawei.com>
---
 Documentation/scsi/libsas.rst      |  2 -
 drivers/scsi/libsas/sas_event.c    | 72 +++++++-----------------------
 drivers/scsi/libsas/sas_init.c     | 15 +------
 drivers/scsi/libsas/sas_internal.h |  2 -
 include/scsi/libsas.h              |  2 -
 5 files changed, 18 insertions(+), 75 deletions(-)

diff --git a/Documentation/scsi/libsas.rst b/Documentation/scsi/libsas.rst
index dc85d0e4c107..7e1bf710760b 100644
--- a/Documentation/scsi/libsas.rst
+++ b/Documentation/scsi/libsas.rst
@@ -189,8 +189,6 @@ num_phys
 The event interface::
 
 	/* LLDD calls these to notify the class of an event. */
-	void (*notify_port_event)(struct sas_phy *, enum port_event);
-	void (*notify_phy_event)(struct sas_phy *, enum phy_event);
 	void (*notify_port_event_gfp)(struct sas_phy *, enum port_event, gfp_t);
 	void (*notify_phy_event_gfp)(struct sas_phy *, enum phy_event, gfp_t);
 
diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
index 31b733eeabf6..23aeb67f6381 100644
--- a/drivers/scsi/libsas/sas_event.c
+++ b/drivers/scsi/libsas/sas_event.c
@@ -131,57 +131,21 @@ static void sas_phy_event_worker(struct work_struct *work)
 	sas_free_event(ev);
 }
 
-static int __sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event,
-				   struct asd_sas_event *ev)
-{
-	struct sas_ha_struct *ha = phy->ha;
-	int ret;
-
-	BUG_ON(event >= PORT_NUM_EVENTS);
-
-	INIT_SAS_EVENT(ev, sas_port_event_worker, phy, event);
-
-	ret = sas_queue_event(event, &ev->work, ha);
-	if (ret != 1)
-		sas_free_event(ev);
-
-	return ret;
-}
-
 static int sas_notify_port_event_gfp(struct asd_sas_phy *phy,
 				     enum port_event event,
 				     gfp_t gfp_flags)
-{
-	struct asd_sas_event *ev;
-
-	ev = sas_alloc_event_gfp(phy, gfp_flags);
-	if (!ev)
-		return -ENOMEM;
-
-	return __sas_notify_port_event(phy, event, ev);
-}
-
-static int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event)
-{
-	struct asd_sas_event *ev;
-
-	ev = sas_alloc_event(phy);
-	if (!ev)
-		return -ENOMEM;
-
-	return __sas_notify_port_event(phy, event, ev);
-}
-
-static inline int __sas_notify_phy_event(struct asd_sas_phy *phy,
-					 enum phy_event event,
-					 struct asd_sas_event *ev)
 {
 	struct sas_ha_struct *ha = phy->ha;
+	struct asd_sas_event *ev;
 	int ret;
 
-	BUG_ON(event >= PHY_NUM_EVENTS);
+	BUG_ON(event >= PORT_NUM_EVENTS);
 
-	INIT_SAS_EVENT(ev, sas_phy_event_worker, phy, event);
+	ev = sas_alloc_event_gfp(phy, gfp_flags);
+	if (!ev)
+		return -ENOMEM;
+
+	INIT_SAS_EVENT(ev, sas_port_event_worker, phy, event);
 
 	ret = sas_queue_event(event, &ev->work, ha);
 	if (ret != 1)
@@ -193,31 +157,27 @@ static inline int __sas_notify_phy_event(struct asd_sas_phy *phy,
 int sas_notify_phy_event_gfp(struct asd_sas_phy *phy, enum phy_event event,
 			     gfp_t gfp_flags)
 {
+	struct sas_ha_struct *ha = phy->ha;
 	struct asd_sas_event *ev;
+	int ret;
+
+	BUG_ON(event >= PHY_NUM_EVENTS);
 
 	ev = sas_alloc_event_gfp(phy, gfp_flags);
 	if (!ev)
 		return -ENOMEM;
 
-	return __sas_notify_phy_event(phy, event, ev);
-}
+	INIT_SAS_EVENT(ev, sas_phy_event_worker, phy, event);
 
-int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event)
-{
-	struct asd_sas_event *ev;
+	ret = sas_queue_event(event, &ev->work, ha);
+	if (ret != 1)
+		sas_free_event(ev);
 
-	ev = sas_alloc_event(phy);
-	if (!ev)
-		return -ENOMEM;
-
-	return __sas_notify_phy_event(phy, event, ev);
+	return ret;
 }
 
 int sas_init_events(struct sas_ha_struct *sas_ha)
 {
-	sas_ha->notify_port_event = sas_notify_port_event;
-	sas_ha->notify_phy_event = sas_notify_phy_event;
-
 	sas_ha->notify_port_event_gfp = sas_notify_port_event_gfp;
 	sas_ha->notify_phy_event_gfp = sas_notify_phy_event_gfp;
 
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index 2d2116f827c6..e08351f909fb 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -590,8 +590,8 @@ sas_domain_attach_transport(struct sas_domain_function_template *dft)
 }
 EXPORT_SYMBOL_GPL(sas_domain_attach_transport);
 
-static struct asd_sas_event * __sas_alloc_event(struct asd_sas_phy *phy,
-						gfp_t gfp_flags)
+struct asd_sas_event *sas_alloc_event_gfp(struct asd_sas_phy *phy,
+					  gfp_t gfp_flags)
 {
 	struct asd_sas_event *event;
 	struct sas_ha_struct *sas_ha = phy->ha;
@@ -623,17 +623,6 @@ static struct asd_sas_event * __sas_alloc_event(struct asd_sas_phy *phy,
 	return event;
 }
 
-struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy)
-{
-	return __sas_alloc_event(phy, in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
-}
-
-struct asd_sas_event *sas_alloc_event_gfp(struct asd_sas_phy *phy,
-					  gfp_t gfp_flags)
-{
-	return __sas_alloc_event(phy, gfp_flags);
-}
-
 void sas_free_event(struct asd_sas_event *event)
 {
 	struct asd_sas_phy *phy = event->phy;
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index 437a697b6f73..b0422d47675b 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -48,7 +48,6 @@ int sas_show_oob_mode(enum sas_oob_mode oob_mode, char *buf);
 int  sas_register_phys(struct sas_ha_struct *sas_ha);
 void sas_unregister_phys(struct sas_ha_struct *sas_ha);
 
-struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy);
 struct asd_sas_event *sas_alloc_event_gfp(struct asd_sas_phy *phy, gfp_t gfp_flags);
 void sas_free_event(struct asd_sas_event *event);
 
@@ -78,7 +77,6 @@ int sas_smp_phy_control(struct domain_device *dev, int phy_id,
 			enum phy_func phy_func, struct sas_phy_linkrates *);
 int sas_smp_get_phy_events(struct sas_phy *phy);
 
-int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event);
 int sas_notify_phy_event_gfp(struct asd_sas_phy *phy, enum phy_event event, gfp_t flags);
 void sas_device_set_phy(struct domain_device *dev, struct sas_port *port);
 struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index f7c2530bbd9d..fdd338fa65c9 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -392,8 +392,6 @@ struct sas_ha_struct {
 				* their siblings when forming wide ports */
 
 	/* LLDD calls these to notify the class of an event. */
-	int (*notify_port_event)(struct asd_sas_phy *, enum port_event);
-	int (*notify_phy_event)(struct asd_sas_phy *, enum phy_event);
 	int (*notify_port_event_gfp)(struct asd_sas_phy *, enum port_event, gfp_t);
 	int (*notify_phy_event_gfp)(struct asd_sas_phy *, enum phy_event, gfp_t);
 
-- 
2.29.2


^ permalink raw reply	[relevance 75%]

* [PATCH 09/11] scsi: aic94xx: Pass gfp_t flags to libsas event notifiers
  2020-12-18 20:43 88% [PATCH 00/11] scsi: libsas: Remove in_interrupt() check Ahmed S. Darwish
                   ` (7 preceding siblings ...)
  2020-12-18 20:43 58% ` [PATCH 08/11] scsi: pm80xx: Pass gfp_t flags to libsas " Ahmed S. Darwish
@ 2020-12-18 20:43 85% ` Ahmed S. Darwish
  2020-12-18 20:43 70% ` [PATCH 10/11] scsi: hisi_sas: " Ahmed S. Darwish
  2020-12-18 20:43 75% ` [PATCH 11/11] scsi: libsas: event notifiers: Remove non _gfp() variants Ahmed S. Darwish
  10 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-18 20:43 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, John Garry, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior,
	Ahmed S. Darwish

Use the new libsas event notifiers API, which requires callers to
explicitly pass the gfp_t memory allocation flags.

Context analysis:

  aic94xx_hwi.c: asd_dl_tasklet_handler()
    -> asd_ascb::tasklet_complete()
    == escb_tasklet_complete()
      -> aic94xx_scb.c: asd_phy_event_tasklet()
      -> aic94xx_scb.c: asd_bytes_dmaed_tasklet()
      -> aic94xx_scb.c: asd_link_reset_err_tasklet()
      -> aic94xx_scb.c: asd_primitive_rcvd_tasklet()

All functions are invoked by escb_tasklet_complete(), which is invoked
by the tasklet handler. Pass GFP_ATOMIC.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 drivers/scsi/aic94xx/aic94xx_scb.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c
index e2d880a5f391..9ca60da59865 100644
--- a/drivers/scsi/aic94xx/aic94xx_scb.c
+++ b/drivers/scsi/aic94xx/aic94xx_scb.c
@@ -81,7 +81,7 @@ static void asd_phy_event_tasklet(struct asd_ascb *ascb,
 		ASD_DPRINTK("phy%d: device unplugged\n", phy_id);
 		asd_turn_led(asd_ha, phy_id, 0);
 		sas_phy_disconnected(&phy->sas_phy);
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL);
+		sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL, GFP_ATOMIC);
 		break;
 	case CURRENT_OOB_DONE:
 		/* hot plugged device */
@@ -89,12 +89,12 @@ static void asd_phy_event_tasklet(struct asd_ascb *ascb,
 		get_lrate_mode(phy, oob_mode);
 		ASD_DPRINTK("phy%d device plugged: lrate:0x%x, proto:0x%x\n",
 			    phy_id, phy->sas_phy.linkrate, phy->sas_phy.iproto);
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE);
+		sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_OOB_DONE, GFP_ATOMIC);
 		break;
 	case CURRENT_SPINUP_HOLD:
 		/* hot plug SATA, no COMWAKE sent */
 		asd_turn_led(asd_ha, phy_id, 1);
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD);
+		sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_SPINUP_HOLD, GFP_ATOMIC);
 		break;
 	case CURRENT_GTO_TIMEOUT:
 	case CURRENT_OOB_ERROR:
@@ -102,7 +102,7 @@ static void asd_phy_event_tasklet(struct asd_ascb *ascb,
 			    dl->status_block[1]);
 		asd_turn_led(asd_ha, phy_id, 0);
 		sas_phy_disconnected(&phy->sas_phy);
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR);
+		sas_ha->notify_phy_event_gfp(&phy->sas_phy, PHYE_OOB_ERROR, GFP_ATOMIC);
 		break;
 	}
 }
@@ -234,7 +234,7 @@ static void asd_bytes_dmaed_tasklet(struct asd_ascb *ascb,
 	spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags);
 	asd_dump_frame_rcvd(phy, dl);
 	asd_form_port(ascb->ha, phy);
-	sas_ha->notify_port_event(&phy->sas_phy, PORTE_BYTES_DMAED);
+	sas_ha->notify_port_event_gfp(&phy->sas_phy, PORTE_BYTES_DMAED, GFP_ATOMIC);
 }
 
 static void asd_link_reset_err_tasklet(struct asd_ascb *ascb,
@@ -270,7 +270,7 @@ static void asd_link_reset_err_tasklet(struct asd_ascb *ascb,
 	asd_turn_led(asd_ha, phy_id, 0);
 	sas_phy_disconnected(sas_phy);
 	asd_deform_port(asd_ha, phy);
-	sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+	sas_ha->notify_port_event_gfp(sas_phy, PORTE_LINK_RESET_ERR, GFP_ATOMIC);
 
 	if (retries_left == 0) {
 		int num = 1;
@@ -315,7 +315,7 @@ static void asd_primitive_rcvd_tasklet(struct asd_ascb *ascb,
 			spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
 			sas_phy->sas_prim = ffs(cont);
 			spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
-			sas_ha->notify_port_event(sas_phy,PORTE_BROADCAST_RCVD);
+			sas_ha->notify_port_event_gfp(sas_phy,PORTE_BROADCAST_RCVD, GFP_ATOMIC);
 			break;
 
 		case LmUNKNOWNP:
@@ -336,7 +336,7 @@ static void asd_primitive_rcvd_tasklet(struct asd_ascb *ascb,
 			/* The sequencer disables all phys on that port.
 			 * We have to re-enable the phys ourselves. */
 			asd_deform_port(asd_ha, phy);
-			sas_ha->notify_port_event(sas_phy, PORTE_HARD_RESET);
+			sas_ha->notify_port_event_gfp(sas_phy, PORTE_HARD_RESET, GFP_ATOMIC);
 			break;
 
 		default:
@@ -567,7 +567,7 @@ static void escb_tasklet_complete(struct asd_ascb *ascb,
 		/* the device is gone */
 		sas_phy_disconnected(sas_phy);
 		asd_deform_port(asd_ha, phy);
-		sas_ha->notify_port_event(sas_phy, PORTE_TIMER_EVENT);
+		sas_ha->notify_port_event_gfp(sas_phy, PORTE_TIMER_EVENT, GFP_ATOMIC);
 		break;
 	default:
 		ASD_DPRINTK("%s: phy%d: unknown event:0x%x\n", __func__,
-- 
2.29.2


^ permalink raw reply	[relevance 85%]

* Re: [PATCH 11/11] scsi: libsas: event notifiers: Remove non _gfp() variants
  @ 2020-12-21 17:38 99%     ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-21 17:38 UTC (permalink / raw)
  To: John Garry
  Cc: James E.J. Bottomley, Martin K. Petersen, Daniel Wagner,
	Jason Yan, Artur Paszkiewicz, Jack Wang, linux-scsi, LKML,
	Thomas Gleixner, Sebastian A. Siewior

On Mon, Dec 21, 2020 at 05:17:13PM +0000, John Garry wrote:
> On 18/12/2020 20:43, Ahmed S. Darwish wrote:
> > All call-sites of below libsas APIs:
> >
> >    - sas_alloc_event()
> >    - sas_ha_struct::notify_port_event()
> >    - sas_ha_struct::notify_phy_event()
> >
> > have been converted to use the new _gfp()-suffixed version.
> >
>
> nit: Is it possible to have non- _gfp()-suffixed symbols at the end, i.e.
> have same as original?
>

Yes, of course. I just did not want to double-fold the patch series size
from first submission ;-)

If the overall outlook of this series is OK, in v2 I'll append patches
#12 => #20 restoring call sites to the original names without _gfp(),
then keep only the original libsas names.

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* [RFC PATCH 1/1] net: arcnet: Fix RESET flag handling
  2020-12-22  9:03 92% [RFC PATCH 0/1] net: arcnet: Fix RESET sequence Ahmed S. Darwish
@ 2020-12-22  9:03 68% ` Ahmed S. Darwish
  2021-01-11 13:54 99% ` [RFC PATCH 0/1] net: arcnet: Fix RESET sequence Ahmed S. Darwish
  1 sibling, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-22  9:03 UTC (permalink / raw)
  To: Michael Grzeschik, David S. Miller, Jakub Kicinski, netdev
  Cc: LKML, Thomas Gleixner, Sebastian A. Siewior, Ahmed S. Darwish

The main arcnet interrupt handler calls arcnet_close() then
arcnet_open(), if the RESET status flag is encountered.

This is invalid:

  1) In general, interrupt handlers should never call ->ndo_stop() and
     ->ndo_open() functions. They are usually full of blocking calls and
     other methods that are expected to be called only from drivers
     init and exit code paths.

  2) arcnet_close() contains a del_timer_sync(). If the irq handler
     interrupts the to-be-deleted timer, del_timer_sync() will just loop
     forever.

  3) arcnet_close() also calls tasklet_kill(), which has a warning if
     called from irq context.

  4) For device reset, the sequence "arcnet_close(); arcnet_open();" is
     not complete.  Some children arcnet drivers have special init/exit
     code sequences, which then embed a call to arcnet_open() and
     arcnet_close() accordingly. Check drivers/net/arcnet/com20020.c.

Run the device RESET sequence from a scheduled workqueue instead.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 drivers/net/arcnet/arc-rimi.c     |  4 +-
 drivers/net/arcnet/arcdevice.h    |  6 +++
 drivers/net/arcnet/arcnet.c       | 69 +++++++++++++++++++++++++++++--
 drivers/net/arcnet/com20020-isa.c |  4 +-
 drivers/net/arcnet/com20020-pci.c |  2 +-
 drivers/net/arcnet/com20020_cs.c  |  2 +-
 drivers/net/arcnet/com90io.c      |  4 +-
 drivers/net/arcnet/com90xx.c      |  4 +-
 8 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c
index 98df38fe553c..12d085405bd0 100644
--- a/drivers/net/arcnet/arc-rimi.c
+++ b/drivers/net/arcnet/arc-rimi.c
@@ -332,7 +332,7 @@ static int __init arc_rimi_init(void)
 		dev->irq = 9;
 
 	if (arcrimi_probe(dev)) {
-		free_netdev(dev);
+		free_arcdev(dev);
 		return -EIO;
 	}
 
@@ -349,7 +349,7 @@ static void __exit arc_rimi_exit(void)
 	iounmap(lp->mem_start);
 	release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1);
 	free_irq(dev->irq, dev);
-	free_netdev(dev);
+	free_arcdev(dev);
 }
 
 #ifndef MODULE
diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h
index 22a49c6d7ae6..5d4a4c7efbbf 100644
--- a/drivers/net/arcnet/arcdevice.h
+++ b/drivers/net/arcnet/arcdevice.h
@@ -298,6 +298,10 @@ struct arcnet_local {
 
 	int excnak_pending;    /* We just got an excesive nak interrupt */
 
+	/* RESET flag handling */
+	int reset_in_progress;
+	struct work_struct reset_work;
+
 	struct {
 		uint16_t sequence;	/* sequence number (incs with each packet) */
 		__be16 aborted_seq;
@@ -350,7 +354,9 @@ void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc)
 
 void arcnet_unregister_proto(struct ArcProto *proto);
 irqreturn_t arcnet_interrupt(int irq, void *dev_id);
+
 struct net_device *alloc_arcdev(const char *name);
+void free_arcdev(struct net_device *dev);
 
 int arcnet_open(struct net_device *dev);
 int arcnet_close(struct net_device *dev);
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index e04efc0a5c97..563c43ae5cce 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -387,10 +387,46 @@ static void arcnet_timer(struct timer_list *t)
 	struct arcnet_local *lp = from_timer(lp, t, timer);
 	struct net_device *dev = lp->dev;
 
-	if (!netif_carrier_ok(dev)) {
+	spin_lock_irq(&lp->lock);
+
+	if (!lp->reset_in_progress && !netif_carrier_ok(dev)) {
 		netif_carrier_on(dev);
 		netdev_info(dev, "link up\n");
 	}
+
+	spin_unlock_irq(&lp->lock);
+}
+
+static void reset_device_work(struct work_struct *work)
+{
+	struct arcnet_local *lp;
+	struct net_device *dev;
+
+	lp = container_of(work, struct arcnet_local, reset_work);
+	dev = lp->dev;
+
+	/*
+	 * Do not bring the network interface back up if an ifdown
+	 * was already done.
+	 */
+	if (!netif_running(dev) || !lp->reset_in_progress)
+		return;
+
+	rtnl_lock();
+
+	/*
+	 * Do another check, in case of an ifdown that was triggered in
+	 * the small race window between the exit condition above and
+	 * acquiring RTNL.
+	 */
+	if (!netif_running(dev) || !lp->reset_in_progress)
+		goto out;
+
+	dev_close(dev);
+	dev_open(dev, NULL);
+
+out:
+	rtnl_unlock();
 }
 
 static void arcnet_reply_tasklet(unsigned long data)
@@ -452,12 +488,26 @@ struct net_device *alloc_arcdev(const char *name)
 		lp->dev = dev;
 		spin_lock_init(&lp->lock);
 		timer_setup(&lp->timer, arcnet_timer, 0);
+		INIT_WORK(&lp->reset_work, reset_device_work);
 	}
 
 	return dev;
 }
 EXPORT_SYMBOL(alloc_arcdev);
 
+void free_arcdev(struct net_device *dev)
+{
+	struct arcnet_local *lp = netdev_priv(dev);
+
+	/*
+	 * Do not cancel this at ->ndo_close(), as the workqueue itself
+	 * indirectly calls the ifdown path through dev_close().
+	 */
+	cancel_work_sync(&lp->reset_work);
+	free_netdev(dev);
+}
+EXPORT_SYMBOL(free_arcdev);
+
 /* Open/initialize the board.  This is called sometime after booting when
  * the 'ifconfig' program is run.
  *
@@ -587,6 +637,10 @@ int arcnet_close(struct net_device *dev)
 
 	/* shut down the card */
 	lp->hw.close(dev);
+
+	/* reset counters */
+	lp->reset_in_progress = 0;
+
 	module_put(lp->hw.owner);
 	return 0;
 }
@@ -820,6 +874,9 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 
 	spin_lock_irqsave(&lp->lock, flags);
 
+	if (lp->reset_in_progress)
+		goto out;
+
 	/* RESET flag was enabled - if device is not running, we must
 	 * clear it right away (but nothing else).
 	 */
@@ -852,11 +909,14 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 		if (status & RESETflag) {
 			arc_printk(D_NORMAL, dev, "spurious reset (status=%Xh)\n",
 				   status);
-			arcnet_close(dev);
-			arcnet_open(dev);
+
+			lp->reset_in_progress = 1;
+			netif_stop_queue(dev);
+			netif_carrier_off(dev);
+			schedule_work(&lp->reset_work);
 
 			/* get out of the interrupt handler! */
-			break;
+			goto out;
 		}
 		/* RX is inhibited - we must have received something.
 		 * Prepare to receive into the next buffer.
@@ -1052,6 +1112,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	udelay(1);
 	lp->hw.intmask(dev, lp->intmask);
 
+out:
 	spin_unlock_irqrestore(&lp->lock, flags);
 	return retval;
 }
diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c
index f983c4ce6b07..b50d4c33d2a4 100644
--- a/drivers/net/arcnet/com20020-isa.c
+++ b/drivers/net/arcnet/com20020-isa.c
@@ -169,7 +169,7 @@ static int __init com20020_init(void)
 		dev->irq = 9;
 
 	if (com20020isa_probe(dev)) {
-		free_netdev(dev);
+		free_arcdev(dev);
 		return -EIO;
 	}
 
@@ -182,7 +182,7 @@ static void __exit com20020_exit(void)
 	unregister_netdev(my_dev);
 	free_irq(my_dev->irq, my_dev);
 	release_region(my_dev->base_addr, ARCNET_TOTAL_SIZE);
-	free_netdev(my_dev);
+	free_ardev(my_dev);
 }
 
 #ifndef MODULE
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index eb7f76753c9c..8bdc44b7e09a 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -291,7 +291,7 @@ static void com20020pci_remove(struct pci_dev *pdev)
 
 		unregister_netdev(dev);
 		free_irq(dev->irq, dev);
-		free_netdev(dev);
+		free_arcdev(dev);
 	}
 }
 
diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c
index cf607ffcf358..9cc5eb6a8e90 100644
--- a/drivers/net/arcnet/com20020_cs.c
+++ b/drivers/net/arcnet/com20020_cs.c
@@ -177,7 +177,7 @@ static void com20020_detach(struct pcmcia_device *link)
 		dev = info->dev;
 		if (dev) {
 			dev_dbg(&link->dev, "kfree...\n");
-			free_netdev(dev);
+			free_arcdev(dev);
 		}
 		dev_dbg(&link->dev, "kfree2...\n");
 		kfree(info);
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c
index cf214b730671..3856b447d38e 100644
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -396,7 +396,7 @@ static int __init com90io_init(void)
 	err = com90io_probe(dev);
 
 	if (err) {
-		free_netdev(dev);
+		free_arcdev(dev);
 		return err;
 	}
 
@@ -419,7 +419,7 @@ static void __exit com90io_exit(void)
 
 	free_irq(dev->irq, dev);
 	release_region(dev->base_addr, ARCNET_TOTAL_SIZE);
-	free_netdev(dev);
+	free_arcdev(dev);
 }
 
 module_init(com90io_init)
diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c
index 3dc3d533cb19..d8dfb9ea0de8 100644
--- a/drivers/net/arcnet/com90xx.c
+++ b/drivers/net/arcnet/com90xx.c
@@ -554,7 +554,7 @@ static int __init com90xx_found(int ioaddr, int airq, u_long shmem,
 err_release_mem:
 	release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1);
 err_free_dev:
-	free_netdev(dev);
+	free_arcdev(dev);
 	return -EIO;
 }
 
@@ -672,7 +672,7 @@ static void __exit com90xx_exit(void)
 		release_region(dev->base_addr, ARCNET_TOTAL_SIZE);
 		release_mem_region(dev->mem_start,
 				   dev->mem_end - dev->mem_start + 1);
-		free_netdev(dev);
+		free_arcdev(dev);
 	}
 }
 
-- 
2.29.2


^ permalink raw reply	[relevance 68%]

* [RFC PATCH 0/1] net: arcnet: Fix RESET sequence
@ 2020-12-22  9:03 92% Ahmed S. Darwish
  2020-12-22  9:03 68% ` [RFC PATCH 1/1] net: arcnet: Fix RESET flag handling Ahmed S. Darwish
  2021-01-11 13:54 99% ` [RFC PATCH 0/1] net: arcnet: Fix RESET sequence Ahmed S. Darwish
  0 siblings, 2 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-22  9:03 UTC (permalink / raw)
  To: Michael Grzeschik, David S. Miller, Jakub Kicinski, netdev
  Cc: LKML, Thomas Gleixner, Sebastian A. Siewior, Ahmed S. Darwish

Folks,

At drivers/net/arcnet/arcnet.c, there is:

  irqreturn_t arcnet_interrupt(int irq, void *dev_id)
  {
        ...
        if (status & RESETflag) {
                arcnet_close(dev);
                arcnet_open(dev);
        }
	...
  }

  struct net_device_ops arcnet_netdev_ops = {
        .ndo_open = arcnet_open,
        .ndo_stop = arcnet_close,
        ...
  };

which is wrong, in many ways:

  1) In general, interrupt handlers should never call ->ndo_stop() and
     ->ndo_open() functions. They are usually full of blocking calls and
     other methods that are expected to be called only from drivers
     init/exit code paths.

  2) arcnet_close() contains a del_timer_sync(). If the irq handler
     interrupts the to-be-deleted timer then call del_timer_sync(), it
     will just loop forever.

  3) arcnet_close() also calls tasklet_kill(), which has a warning if
     called from irq context.

  4) For device reset, the sequence "arcnet_close(); arcnet_open();" is
     not complete.  Some children arcnet drivers have special init/exit
     code sequences, which then embed a call to arcnet_open() and
     arcnet_close() accordingly. Check drivers/net/arcnet/com20020.c.

Included is an RFC patch to fix the points above: if the RESET flag is
encountered, a workqueue is scheduled to run the generic reset sequence.

Note: Only compile-tested, as I do not have the hardware in question.

Thanks,

8<--------------

Ahmed S. Darwish (1):
  net: arcnet: Fix RESET flag handling

 drivers/net/arcnet/arc-rimi.c     |  4 +-
 drivers/net/arcnet/arcdevice.h    |  6 +++
 drivers/net/arcnet/arcnet.c       | 69 +++++++++++++++++++++++++++++--
 drivers/net/arcnet/com20020-isa.c |  4 +-
 drivers/net/arcnet/com20020-pci.c |  2 +-
 drivers/net/arcnet/com20020_cs.c  |  2 +-
 drivers/net/arcnet/com90io.c      |  4 +-
 drivers/net/arcnet/com90xx.c      |  4 +-
 8 files changed, 81 insertions(+), 14 deletions(-)

base-commit: 2c85ebc57b3e1817b6ce1a6b703928e113a90442
--
2.29.2

^ permalink raw reply	[relevance 92%]

* [RFC PATCH 0/3] chelsio: cxgb: Use threaded irqs
@ 2020-12-24 13:11 94% Ahmed S. Darwish
  2020-12-24 13:11 89% ` [RFC PATCH 1/3] chelsio: cxgb: Remove ndo_poll_controller() Ahmed S. Darwish
                   ` (2 more replies)
  0 siblings, 3 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-24 13:11 UTC (permalink / raw)
  To: Rahul Lakkireddy, Rohit Maheshwari, Vinay Kumar Yadav,
	Vishal Kulkarni, netdev
  Cc: David S. Miller, Jakub Kicinski, LKML, Thomas Gleixner,
	Sebastian A. Siewior, Ahmed S. Darwish

Folks,

The t1_interrupt() irq handler calls del_timer_sync() down the chain:

   sge.c: t1_interrupt()
     -> subr.c: t1_slow_intr_handler()
       -> asic_slow_intr() || fpga_slow_intr()
         -> t1_pci_intr_handler()
 	  -> cxgb2.c: t1_fatal_err()		# Cont. at [*]
       -> fpga_slow_intr()
         -> sge.c: t1_sge_intr_error_handler()
 	  -> cxgb2.c: t1_fatal_err()		# Cont. at [*]

[*] cxgb2.c: t1_fatal_err()
      -> sge.c: t1_sge_stop()
        -> timer.c: del_timer_sync()

This is invalid: if an irq handler calls del_timer_sync() on a timer
it has already interrupted, it will just loop forever.  That's why
del_timer_sync() also has a WARN_ON(in_irq()).

Included is an RFC patch series that runs the interrupt handler slow
path, t1_slow_intr_handler(), in a threaded-irq context.

This also leads to nice code savings across the driver, as some
workqueues and spinlocks are no longer needed.

Note: Only compile-tested. I do not have the hardware in question.

Thanks,

8<--------------

Ahmed S. Darwish (3):
  chelsio: cxgb: Remove ndo_poll_controller()
  chelsio: cxgb: Move slow interrupt handling to threaded irqs
  chelsio: cxgb: Do not schedule a workqueue for external interrupts

 drivers/net/ethernet/chelsio/cxgb/common.h |  2 -
 drivers/net/ethernet/chelsio/cxgb/cxgb2.c  | 58 ++--------------------
 drivers/net/ethernet/chelsio/cxgb/sge.c    | 25 +++++++---
 drivers/net/ethernet/chelsio/cxgb/sge.h    |  3 +-
 drivers/net/ethernet/chelsio/cxgb/subr.c   |  2 +-
 5 files changed, 25 insertions(+), 65 deletions(-)

base-commit: 2c85ebc57b3e1817b6ce1a6b703928e113a90442
-- 
2.29.2


^ permalink raw reply	[relevance 94%]

* [RFC PATCH 1/3] chelsio: cxgb: Remove ndo_poll_controller()
  2020-12-24 13:11 94% [RFC PATCH 0/3] chelsio: cxgb: Use threaded irqs Ahmed S. Darwish
@ 2020-12-24 13:11 89% ` Ahmed S. Darwish
  2020-12-24 13:31 99%   ` Ahmed S. Darwish
  2020-12-24 13:11 87% ` [RFC PATCH 2/3] chelsio: cxgb: Move slow interrupt handling to threaded irqs Ahmed S. Darwish
  2020-12-24 13:11 84% ` [RFC PATCH 3/3] chelsio: cxgb: Do not schedule a workqueue for external interrupts Ahmed S. Darwish
  2 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2020-12-24 13:11 UTC (permalink / raw)
  To: Rahul Lakkireddy, Rohit Maheshwari, Vinay Kumar Yadav,
	Vishal Kulkarni, netdev
  Cc: David S. Miller, Jakub Kicinski, LKML, Thomas Gleixner,
	Sebastian A. Siewior, Ahmed S. Darwish

Since commit ac3d9dd034e5 ("netpoll: make ndo_poll_controller()
optional"), networking drivers which use NAPI for clearing their TX
completions should not provide an ndo_poll_controller(). Netpoll simply
triggers the necessary TX queues cleanup by synchronously calling the
driver's NAPI poll handler -- with irqs off and a zero budget.

Modify the cxgb's poll method to clear the TX queues upon zero budget.
Per API requirements, make sure to never consume any RX packet in that
case (budget=0), and thus also not to increment the budget upon return.

Afterwards, remove ndo_poll_controller().

Link: https://lkml.kernel.org/r/20180921222752.101307-1-edumazet@google.com
Link: https://lkml.kernel.org/r/A782704A-DF97-4E85-B10A-D2268A67DFD7@fb.com
References: 822d54b9c2c1 ("netpoll: Drop budget parameter from NAPI polling call hierarchy")
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: Eric Dumazet <edumazet@google.com>
---
 drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 14 --------------
 drivers/net/ethernet/chelsio/cxgb/sge.c   |  9 ++++++++-
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 0e4a0f413960..7b5a98330ef7 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -878,17 +878,6 @@ static int t1_set_features(struct net_device *dev, netdev_features_t features)
 
 	return 0;
 }
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void t1_netpoll(struct net_device *dev)
-{
-	unsigned long flags;
-	struct adapter *adapter = dev->ml_priv;
-
-	local_irq_save(flags);
-	t1_interrupt(adapter->pdev->irq, adapter);
-	local_irq_restore(flags);
-}
-#endif
 
 /*
  * Periodic accumulation of MAC statistics.  This is used only if the MAC
@@ -973,9 +962,6 @@ static const struct net_device_ops cxgb_netdev_ops = {
 	.ndo_set_mac_address	= t1_set_mac_addr,
 	.ndo_fix_features	= t1_fix_features,
 	.ndo_set_features	= t1_set_features,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= t1_netpoll,
-#endif
 };
 
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index 2d9c2b5a690a..d6df1a87db0b 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1609,7 +1609,14 @@ static int process_pure_responses(struct adapter *adapter)
 int t1_poll(struct napi_struct *napi, int budget)
 {
 	struct adapter *adapter = container_of(napi, struct adapter, napi);
-	int work_done = process_responses(adapter, budget);
+	int work_done = 0;
+
+	if (budget)
+		work_done = process_responses(adapter, budget);
+	else {
+		/* budget=0 means: don't poll rx data */
+		process_pure_responses(adapter);
+	}
 
 	if (likely(work_done < budget)) {
 		napi_complete_done(napi, work_done);
-- 
2.29.2


^ permalink raw reply	[relevance 89%]

* [RFC PATCH 3/3] chelsio: cxgb: Do not schedule a workqueue for external interrupts
  2020-12-24 13:11 94% [RFC PATCH 0/3] chelsio: cxgb: Use threaded irqs Ahmed S. Darwish
  2020-12-24 13:11 89% ` [RFC PATCH 1/3] chelsio: cxgb: Remove ndo_poll_controller() Ahmed S. Darwish
  2020-12-24 13:11 87% ` [RFC PATCH 2/3] chelsio: cxgb: Move slow interrupt handling to threaded irqs Ahmed S. Darwish
@ 2020-12-24 13:11 84% ` Ahmed S. Darwish
  2 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-24 13:11 UTC (permalink / raw)
  To: Rahul Lakkireddy, Rohit Maheshwari, Vinay Kumar Yadav,
	Vishal Kulkarni, netdev
  Cc: David S. Miller, Jakub Kicinski, LKML, Thomas Gleixner,
	Sebastian A. Siewior, Ahmed S. Darwish

cxgb's "elmer0" external interrupt handling code requires task context,
so originally a workqueue was scheduled for it from the hardirq handler.

Now that all of the external interrupt handling, elmer0 included, is run
from a threaded-irq context, just directly call the real handler.

Remove all the workqueue code that is now no longer needed, including
the spinlock used for synchronizing the workqueue's NIC regsiters access
against the irq handler.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 drivers/net/ethernet/chelsio/cxgb/common.h |  2 --
 drivers/net/ethernet/chelsio/cxgb/cxgb2.c  | 38 ----------------------
 drivers/net/ethernet/chelsio/cxgb/sge.c    |  3 --
 drivers/net/ethernet/chelsio/cxgb/subr.c   |  2 +-
 4 files changed, 1 insertion(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb/common.h b/drivers/net/ethernet/chelsio/cxgb/common.h
index 6475060649e9..504882e66831 100644
--- a/drivers/net/ethernet/chelsio/cxgb/common.h
+++ b/drivers/net/ethernet/chelsio/cxgb/common.h
@@ -238,7 +238,6 @@ struct adapter {
 	int msg_enable;
 	u32 mmio_len;
 
-	struct work_struct ext_intr_handler_task;
 	struct adapter_params params;
 
 	/* Terminator modules. */
@@ -256,7 +255,6 @@ struct adapter {
 	spinlock_t mac_lock;
 
 	/* guards async operations */
-	spinlock_t async_lock ____cacheline_aligned;
 	u32 slow_intr_mask;
 	int t1powersave;
 };
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index c96bdca4f270..b93e86d4d079 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -905,41 +905,6 @@ static void mac_stats_task(struct work_struct *work)
 	spin_unlock(&adapter->work_lock);
 }
 
-/*
- * Processes elmer0 external interrupts in process context.
- */
-static void ext_intr_task(struct work_struct *work)
-{
-	struct adapter *adapter =
-		container_of(work, struct adapter, ext_intr_handler_task);
-
-	t1_elmer0_ext_intr_handler(adapter);
-
-	/* Now reenable external interrupts */
-	spin_lock_irq(&adapter->async_lock);
-	adapter->slow_intr_mask |= F_PL_INTR_EXT;
-	writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE);
-	writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
-		   adapter->regs + A_PL_ENABLE);
-	spin_unlock_irq(&adapter->async_lock);
-}
-
-/*
- * Interrupt-context handler for elmer0 external interrupts.
- */
-void t1_elmer0_ext_intr(struct adapter *adapter)
-{
-	/*
-	 * Schedule a task to handle external interrupts as we require
-	 * a process context.  We disable EXT interrupts in the interim
-	 * and let the task reenable them when it's done.
-	 */
-	adapter->slow_intr_mask &= ~F_PL_INTR_EXT;
-	writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
-		   adapter->regs + A_PL_ENABLE);
-	schedule_work(&adapter->ext_intr_handler_task);
-}
-
 void t1_fatal_err(struct adapter *adapter)
 {
 	if (adapter->flags & FULL_INIT_DONE) {
@@ -1045,11 +1010,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 			spin_lock_init(&adapter->tpi_lock);
 			spin_lock_init(&adapter->work_lock);
-			spin_lock_init(&adapter->async_lock);
 			spin_lock_init(&adapter->mac_lock);
 
-			INIT_WORK(&adapter->ext_intr_handler_task,
-				  ext_intr_task);
 			INIT_DELAYED_WORK(&adapter->stats_update_task,
 					  mac_stats_task);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index f1c402f6b889..9b4ffddbbc05 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1657,10 +1657,7 @@ irqreturn_t t1_irq_thread(int irq, void *data)
 	struct sge *sge = adapter->sge;
 	int handled;
 
-	spin_lock(&adapter->async_lock);
 	handled = t1_slow_intr_handler(adapter);
-	spin_unlock(&adapter->async_lock);
-
 	if (!handled)
 		sge->stats.unhandled_irqs++;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/subr.c b/drivers/net/ethernet/chelsio/cxgb/subr.c
index ea0f8741d7cf..197d3bb924ca 100644
--- a/drivers/net/ethernet/chelsio/cxgb/subr.c
+++ b/drivers/net/ethernet/chelsio/cxgb/subr.c
@@ -858,7 +858,7 @@ static int asic_slow_intr(adapter_t *adapter)
 	if (cause & F_PL_INTR_PCIX)
 		t1_pci_intr_handler(adapter);
 	if (cause & F_PL_INTR_EXT)
-		t1_elmer0_ext_intr(adapter);
+		t1_elmer0_ext_intr_handler(adapter);
 
 	/* Clear the interrupts just processed. */
 	writel(cause, adapter->regs + A_PL_CAUSE);
-- 
2.29.2


^ permalink raw reply	[relevance 84%]

* [RFC PATCH 2/3] chelsio: cxgb: Move slow interrupt handling to threaded irqs
  2020-12-24 13:11 94% [RFC PATCH 0/3] chelsio: cxgb: Use threaded irqs Ahmed S. Darwish
  2020-12-24 13:11 89% ` [RFC PATCH 1/3] chelsio: cxgb: Remove ndo_poll_controller() Ahmed S. Darwish
@ 2020-12-24 13:11 87% ` Ahmed S. Darwish
  2020-12-24 13:11 84% ` [RFC PATCH 3/3] chelsio: cxgb: Do not schedule a workqueue for external interrupts Ahmed S. Darwish
  2 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-24 13:11 UTC (permalink / raw)
  To: Rahul Lakkireddy, Rohit Maheshwari, Vinay Kumar Yadav,
	Vishal Kulkarni, netdev
  Cc: David S. Miller, Jakub Kicinski, LKML, Thomas Gleixner,
	Sebastian A. Siewior, Ahmed S. Darwish

The t1_interrupt() irq handler calls del_timer_sync() down the chain:

   sge.c: t1_interrupt()
     -> subr.c: t1_slow_intr_handler()
       -> asic_slow_intr() || fpga_slow_intr()
         -> t1_pci_intr_handler()
           -> cxgb2.c: t1_fatal_err()           # Cont. at [*]
       -> fpga_slow_intr()
         -> sge.c: t1_sge_intr_error_handler()
           -> cxgb2.c: t1_fatal_err()           # Cont. at [*]

[*] cxgb2.c: t1_fatal_err()
      -> sge.c: t1_sge_stop()
        -> timer.c: del_timer_sync()

This is invalid: if an irq handler calls del_timer_sync() on a timer it
has already interrupted, it will loop forever.

Move the slow t1 interrupt handling path, t1_slow_intr_handler(), to a
threaded-irq task context.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 drivers/net/ethernet/chelsio/cxgb/cxgb2.c |  6 +++---
 drivers/net/ethernet/chelsio/cxgb/sge.c   | 13 +++++++++++--
 drivers/net/ethernet/chelsio/cxgb/sge.h   |  3 ++-
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 7b5a98330ef7..c96bdca4f270 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -211,9 +211,9 @@ static int cxgb_up(struct adapter *adapter)
 	t1_interrupts_clear(adapter);
 
 	adapter->params.has_msi = !disable_msi && !pci_enable_msi(adapter->pdev);
-	err = request_irq(adapter->pdev->irq, t1_interrupt,
-			  adapter->params.has_msi ? 0 : IRQF_SHARED,
-			  adapter->name, adapter);
+	err = request_threaded_irq(adapter->pdev->irq, t1_irq, t1_irq_thread,
+				   adapter->params.has_msi ? 0 : IRQF_SHARED,
+				   adapter->name, adapter);
 	if (err) {
 		if (adapter->params.has_msi)
 			pci_disable_msi(adapter->pdev);
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index d6df1a87db0b..f1c402f6b889 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1626,11 +1626,10 @@ int t1_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-irqreturn_t t1_interrupt(int irq, void *data)
+irqreturn_t t1_irq(int irq, void *data)
 {
 	struct adapter *adapter = data;
 	struct sge *sge = adapter->sge;
-	int handled;
 
 	if (likely(responses_pending(adapter))) {
 		writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE);
@@ -1645,9 +1644,19 @@ irqreturn_t t1_interrupt(int irq, void *data)
 				napi_enable(&adapter->napi);
 			}
 		}
+
 		return IRQ_HANDLED;
 	}
 
+	return IRQ_WAKE_THREAD;
+}
+
+irqreturn_t t1_irq_thread(int irq, void *data)
+{
+	struct adapter *adapter = data;
+	struct sge *sge = adapter->sge;
+	int handled;
+
 	spin_lock(&adapter->async_lock);
 	handled = t1_slow_intr_handler(adapter);
 	spin_unlock(&adapter->async_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.h b/drivers/net/ethernet/chelsio/cxgb/sge.h
index a1ba591b3431..4072b3fb312b 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.h
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.h
@@ -74,7 +74,8 @@ struct sge *t1_sge_create(struct adapter *, struct sge_params *);
 int t1_sge_configure(struct sge *, struct sge_params *);
 int t1_sge_set_coalesce_params(struct sge *, struct sge_params *);
 void t1_sge_destroy(struct sge *);
-irqreturn_t t1_interrupt(int irq, void *cookie);
+irqreturn_t t1_irq(int irq, void *cookie);
+irqreturn_t t1_irq_thread(int irq, void *cookie);
 int t1_poll(struct napi_struct *, int);
 
 netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev);
-- 
2.29.2


^ permalink raw reply	[relevance 87%]

* Re: [RFC PATCH 1/3] chelsio: cxgb: Remove ndo_poll_controller()
  2020-12-24 13:11 89% ` [RFC PATCH 1/3] chelsio: cxgb: Remove ndo_poll_controller() Ahmed S. Darwish
@ 2020-12-24 13:31 99%   ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2020-12-24 13:31 UTC (permalink / raw)
  To: Rahul Lakkireddy, Rohit Maheshwari, Vinay Kumar Yadav,
	Vishal Kulkarni, netdev
  Cc: David S. Miller, Jakub Kicinski, Eric Dumazet, LKML,
	Thomas Gleixner, Sebastian A. Siewior

[[ Actually adding Eric to Cc ]]

On Thu, Dec 24, 2020 at 02:11:46PM +0100, Ahmed S. Darwish wrote:
> Since commit ac3d9dd034e5 ("netpoll: make ndo_poll_controller()
> optional"), networking drivers which use NAPI for clearing their TX
> completions should not provide an ndo_poll_controller(). Netpoll simply
> triggers the necessary TX queues cleanup by synchronously calling the
> driver's NAPI poll handler -- with irqs off and a zero budget.
>
> Modify the cxgb's poll method to clear the TX queues upon zero budget.
> Per API requirements, make sure to never consume any RX packet in that
> case (budget=0), and thus also not to increment the budget upon return.
>
> Afterwards, remove ndo_poll_controller().
>
> Link: https://lkml.kernel.org/r/20180921222752.101307-1-edumazet@google.com
> Link: https://lkml.kernel.org/r/A782704A-DF97-4E85-B10A-D2268A67DFD7@fb.com
> References: 822d54b9c2c1 ("netpoll: Drop budget parameter from NAPI polling call hierarchy")
> Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
> Cc: Eric Dumazet <edumazet@google.com>
> ---
>  drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 14 --------------
>  drivers/net/ethernet/chelsio/cxgb/sge.c   |  9 ++++++++-
>  2 files changed, 8 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
> index 0e4a0f413960..7b5a98330ef7 100644
> --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
> +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
> @@ -878,17 +878,6 @@ static int t1_set_features(struct net_device *dev, netdev_features_t features)
>
>  	return 0;
>  }
> -#ifdef CONFIG_NET_POLL_CONTROLLER
> -static void t1_netpoll(struct net_device *dev)
> -{
> -	unsigned long flags;
> -	struct adapter *adapter = dev->ml_priv;
> -
> -	local_irq_save(flags);
> -	t1_interrupt(adapter->pdev->irq, adapter);
> -	local_irq_restore(flags);
> -}
> -#endif
>
>  /*
>   * Periodic accumulation of MAC statistics.  This is used only if the MAC
> @@ -973,9 +962,6 @@ static const struct net_device_ops cxgb_netdev_ops = {
>  	.ndo_set_mac_address	= t1_set_mac_addr,
>  	.ndo_fix_features	= t1_fix_features,
>  	.ndo_set_features	= t1_set_features,
> -#ifdef CONFIG_NET_POLL_CONTROLLER
> -	.ndo_poll_controller	= t1_netpoll,
> -#endif
>  };
>
>  static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
> index 2d9c2b5a690a..d6df1a87db0b 100644
> --- a/drivers/net/ethernet/chelsio/cxgb/sge.c
> +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
> @@ -1609,7 +1609,14 @@ static int process_pure_responses(struct adapter *adapter)
>  int t1_poll(struct napi_struct *napi, int budget)
>  {
>  	struct adapter *adapter = container_of(napi, struct adapter, napi);
> -	int work_done = process_responses(adapter, budget);
> +	int work_done = 0;
> +
> +	if (budget)
> +		work_done = process_responses(adapter, budget);
> +	else {
> +		/* budget=0 means: don't poll rx data */
> +		process_pure_responses(adapter);
> +	}
>
>  	if (likely(work_done < budget)) {
>  		napi_complete_done(napi, work_done);
> --
> 2.29.2
>

^ permalink raw reply	[relevance 99%]

* Re: [PATCH 00/11] scsi: libsas: Remove in_interrupt() check
  @ 2021-01-11 13:43 99% ` Ahmed S. Darwish
    0 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2021-01-11 13:43 UTC (permalink / raw)
  To: John Garry, Jason Yan
  Cc: Hannes Reinecke, James E.J. Bottomley, Martin K. Petersen,
	Daniel Wagner, Artur Paszkiewicz, Jack Wang, linux-scsi, LKML,
	Thomas Gleixner, Sebastian A. Siewior

Hi John, Jason,

On Tue, Dec 22, 2020 at 12:54:58PM +0000, John Garry wrote:
> On 22/12/2020 12:30, Jason Yan wrote:
> > >      return event;
> > >
> > >
> > > So default for phy->ha->event_thres is 32, and I can't imagine that
> >
> > The default value is 1024.
>
> Ah, 32 is the minimum allowed set via sysfs.
>
> >
> > > anyone has ever reconfigured this via sysfs or even required a value
> > > that large. Maybe Jason (cc'ed) knows better. It's an arbitrary
> > > value to say that the PHY is malfunctioning. I do note that there is
> > > the circular path sas_alloc_event() -> sas_notify_phy_event() ->
> > > sas_alloc_event() there also.
> > >
> > > Anyway, if the 32x event memories were per-allocated, maybe there is
> > > a clean method to manage this memory, which even works in atomic
> > > context, so we could avoid this rework (ignoring the context bugs
> > > you reported for a moment). I do also note that the sas_event_cache
> > > size is not huge.
> > >
> >
> > Pre-allocated memory is an option.(Which we have tried at the very
> > beginnig by Wang Yijing.)
>
> Right, I remember this, but I think the concern was having a proper method
> to manage this pre-allocated memory then. And same problem now.
>
> >
> > Or directly use GFP_ATOMIC is maybe better than passing flags from lldds.
> >
>
> I think that if we don't really need this, then should not use it.
>

Kind reminder. Do we have any consensus here?

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* Re: [RFC PATCH 0/1] net: arcnet: Fix RESET sequence
  2020-12-22  9:03 92% [RFC PATCH 0/1] net: arcnet: Fix RESET sequence Ahmed S. Darwish
  2020-12-22  9:03 68% ` [RFC PATCH 1/1] net: arcnet: Fix RESET flag handling Ahmed S. Darwish
@ 2021-01-11 13:54 99% ` Ahmed S. Darwish
  2021-01-18 10:45 99%   ` Ahmed S. Darwish
  1 sibling, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2021-01-11 13:54 UTC (permalink / raw)
  To: Michael Grzeschik, David S. Miller, Jakub Kicinski, netdev
  Cc: LKML, Thomas Gleixner, Sebastian A. Siewior

Hi,

On Tue, Dec 22, 2020 at 10:03:37AM +0100, Ahmed S. Darwish wrote:
...
>
> Included is an RFC patch to fix the points above: if the RESET flag is
> encountered, a workqueue is scheduled to run the generic reset sequence.
>
...

Kind reminder.

^ permalink raw reply	[relevance 99%]

* Re: [PATCH 00/11] scsi: libsas: Remove in_interrupt() check
  @ 2021-01-11 14:28 99%     ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2021-01-11 14:28 UTC (permalink / raw)
  To: John Garry
  Cc: Jason Yan, Hannes Reinecke, James E.J. Bottomley,
	Martin K. Petersen, Daniel Wagner, Artur Paszkiewicz, Jack Wang,
	linux-scsi, LKML, Thomas Gleixner, Sebastian A. Siewior

On Mon, Jan 11, 2021 at 01:59:25PM +0000, John Garry wrote:
...
> To me, what you're doing seems fine.
>
...
>
> Just one other thing to mention:
> I have a patch to remove the indirection in libsas notifiers:
> https://github.com/hisilicon/kernel-dev/commit/87fcd7e113dc05b7933260e7fa4588dc3730cc2a
>
> I was going to send it today. Hopefully, if community has no problem with
> it, you can make your changes with that in mind.
>

Perfect. I'll rebase on top of it if everything is OK there.

I'll also append some patches to the series, removing the _gfp() suffix,
per your request earlier:

  https://lkml.kernel.org/r/68957d37-c789-0f0e-f5d1-85fef7f39f4f@huawei.com

Thanks!

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* Re: [PATCH] scsi: libsas and users: Remove notifier indirection
  @ 2021-01-11 17:41 99% ` Ahmed S. Darwish
    2021-01-12 11:25 99% ` Ahmed S. Darwish
  1 sibling, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2021-01-11 17:41 UTC (permalink / raw)
  To: John Garry
  Cc: jejb, martin.petersen, artur.paszkiewicz, jinpu.wang, corbet,
	yanaijie, linux-scsi, linux-kernel, intel-linux-scu, linux-doc

On Tue, Jan 12, 2021 at 01:28:32AM +0800, John Garry wrote:
...
> index a920eced92ec..6a51abdc59ae 100644
> --- a/drivers/scsi/mvsas/mv_sas.c
> +++ b/drivers/scsi/mvsas/mv_sas.c
> @@ -230,7 +230,7 @@ static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
>  	}
>
>  	sas_ha = mvi->sas;
> -	sas_ha->notify_phy_event(sas_phy, PHYE_OOB_DONE);
> +	sas_notify_phy_event(sas_phy, PHYE_OOB_DONE);
>

Minor point: "sas_ha" is now not used anywhere; it should be removed.

^ permalink raw reply	[relevance 99%]

* Re: [PATCH] scsi: libsas and users: Remove notifier indirection
  @ 2021-01-11 17:52 99%     ` Ahmed S. Darwish
  0 siblings, 0 replies; 200+ results
From: Ahmed S. Darwish @ 2021-01-11 17:52 UTC (permalink / raw)
  To: John Garry
  Cc: jejb, martin.petersen, artur.paszkiewicz, jinpu.wang, corbet,
	yanaijie, linux-scsi, linux-kernel, intel-linux-scu, linux-doc

On Mon, Jan 11, 2021 at 05:44:17PM +0000, John Garry wrote:
> On 11/01/2021 17:41, Ahmed S. Darwish wrote:
> > On Tue, Jan 12, 2021 at 01:28:32AM +0800, John Garry wrote:
> > ...
> > > index a920eced92ec..6a51abdc59ae 100644
> > > --- a/drivers/scsi/mvsas/mv_sas.c
> > > +++ b/drivers/scsi/mvsas/mv_sas.c
> > > @@ -230,7 +230,7 @@ static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
> > >   	}
> > >
> > >   	sas_ha = mvi->sas;
> > > -	sas_ha->notify_phy_event(sas_phy, PHYE_OOB_DONE);
> > > +	sas_notify_phy_event(sas_phy, PHYE_OOB_DONE);
> > >
> >
> > Minor point: "sas_ha" is now not used anywhere; it should be removed.
> > .
> >
>
> ah, yes, it can be removed.
>

Similarly for drivers/scsi/pm8001/pm8001_sas.c.

(Just discovering these while integrating your patch at the top of my
 series).

> BTW, on separate topic, did intel-linux-scu@intel.com bounce for you?
>

Yup :)

--
Ahmed S. Darwish
Linutronix GmbH

^ permalink raw reply	[relevance 99%]

* [PATCH v2 00/19] scsi: libsas: Remove in_interrupt() check
@ 2021-01-12 11:06 81% Ahmed S. Darwish
  2021-01-12 11:06 99% ` [PATCH v2 01/19] Documentation: scsi: libsas: Remove notify_ha_event() Ahmed S. Darwish
                   ` (19 more replies)
  0 siblings, 20 replies; 200+ results
From: Ahmed S. Darwish @ 2021-01-12 11:06 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, John Garry, Jason Yan,
	Daniel Wagner, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, intel-linux-scu, LKML, Thomas Gleixner,
	Sebastian A. Siewior, Ahmed S. Darwish

Hi,

Changelog v2
------------

- Rebase on top of v5.11-rc3

- Rebase on top of John's patch "scsi: libsas and users: Remove notifier
  indirection", as it affects every other patch. Include it in this
  series (patch #2).

- Introduce patches #13 => #19, which modify call sites back to use the
  original libsas notifier function names without _gfp() suffix.

- Collect r-b tags

v1 Submission
-------------

  https://lkml.kernel.org/r/20201218204354.586951-1-a.darwish@linutronix.de

Cover letter
------------

In the discussion about preempt count consistency across kernel
configurations:

  https://lkml.kernel.org/r/20200914204209.256266093@linutronix.de

it was concluded that the usage of in_interrupt() and related context
checks should be removed from non-core code.

This includes memory allocation mode decisions (GFP_*). In the long run,
usage of in_interrupt() and its siblings should be banned from driver
code completely.

This series addresses SCSI libsas. Basically, the function:

  => drivers/scsi/libsas/sas_init.c:
  struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy)
  {
        ...
        gfp_t flags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
        event = kmem_cache_zalloc(sas_event_cache, flags);
        ...
  }

is transformed so that callers explicitly pass the gfp_t memory
allocation flags. Affected libsas clients are modified accordingly.

Patches #1, #2 => #7 have "Fixes: " tags and address bugs the were
noticed during the context analysis.

Thanks!

8<--------------

Ahmed S. Darwish (18):
  Documentation: scsi: libsas: Remove notify_ha_event()
  scsi: libsas: Introduce a _gfp() variant of event notifiers
  scsi: mvsas: Pass gfp_t flags to libsas event notifiers
  scsi: isci: port: link down: Pass gfp_t flags
  scsi: isci: port: link up: Pass gfp_t flags
  scsi: isci: port: broadcast change: Pass gfp_t flags
  scsi: libsas: Pass gfp_t flags to event notifiers
  scsi: pm80xx: Pass gfp_t flags to libsas event notifiers
  scsi: aic94xx: Pass gfp_t flags to libsas event notifiers
  scsi: hisi_sas: Pass gfp_t flags to libsas event notifiers
  scsi: libsas: event notifiers API: Add gfp_t flags parameter
  scsi: hisi_sas: Switch back to original libsas event notifiers
  scsi: aic94xx: Switch back to original libsas event notifiers
  scsi: pm80xx: Switch back to original libsas event notifiers
  scsi: libsas: Switch back to original event notifiers API
  scsi: isci: Switch back to original libsas event notifiers
  scsi: mvsas: Switch back to original libsas event notifiers
  scsi: libsas: Remove temporarily-added _gfp() API variants

John Garry (1):
  scsi: libsas and users: Remove notifier indirection

 Documentation/scsi/libsas.rst          |  5 ++--
 drivers/scsi/aic94xx/aic94xx_scb.c     | 20 ++++++-------
 drivers/scsi/hisi_sas/hisi_sas.h       |  3 +-
 drivers/scsi/hisi_sas/hisi_sas_main.c  | 29 +++++++++----------
 drivers/scsi/hisi_sas/hisi_sas_v1_hw.c |  6 ++--
 drivers/scsi/hisi_sas/hisi_sas_v2_hw.c |  6 ++--
 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c |  6 ++--
 drivers/scsi/isci/port.c               | 11 +++----
 drivers/scsi/libsas/sas_event.c        | 27 ++++++++---------
 drivers/scsi/libsas/sas_init.c         | 17 ++++-------
 drivers/scsi/libsas/sas_internal.h     |  5 ++--
 drivers/scsi/mvsas/mv_sas.c            | 24 +++++++---------
 drivers/scsi/pm8001/pm8001_hwi.c       | 40 ++++++++++++--------------
 drivers/scsi/pm8001/pm8001_sas.c       | 12 +++-----
 drivers/scsi/pm8001/pm80xx_hwi.c       | 37 +++++++++++-------------
 include/scsi/libsas.h                  |  9 +++---
 16 files changed, 115 insertions(+), 142 deletions(-)

base-commit: 7c53f6b671f4aba70ff15e1b05148b10d58c2837
--
2.30.0

^ permalink raw reply	[relevance 81%]

* [PATCH v2 02/19] scsi: libsas and users: Remove notifier indirection
  2021-01-12 11:06 81% [PATCH v2 00/19] scsi: libsas: Remove in_interrupt() check Ahmed S. Darwish
  2021-01-12 11:06 99% ` [PATCH v2 01/19] Documentation: scsi: libsas: Remove notify_ha_event() Ahmed S. Darwish
@ 2021-01-12 11:06 37% ` Ahmed S. Darwish
    2021-01-12 11:06 68% ` [PATCH v2 03/19] scsi: libsas: Introduce a _gfp() variant of event notifiers Ahmed S. Darwish
                   ` (17 subsequent siblings)
  19 siblings, 1 reply; 200+ results
From: Ahmed S. Darwish @ 2021-01-12 11:06 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, John Garry, Jason Yan,
	Daniel Wagner, Artur Paszkiewicz, Jack Wang
  Cc: linux-scsi, intel-linux-scu, LKML, Thomas Gleixner,
	Sebastian A. Siewior, Ahmed S. Darwish

From: John Garry <john.garry@huawei.com>

The LLDDs report events to libsas with .notify_port_event and
.notify_phy_event callbacks.

These callbacks are fixed and so there is no reason why we cannot call the
functions directly, so do that.

This neatens the code slightly.

[a.darwish@linutronix.de: Remove the now unused "sas_ha" local variables]
Signed-off-by: John Garry <john.garry@huawei.com>
---
 Documentation/scsi/libsas.rst          |  4 +--
 drivers/scsi/aic94xx/aic94xx_scb.c     | 20 ++++++-------
 drivers/scsi/hisi_sas/hisi_sas_main.c  | 12 +++-----
 drivers/scsi/hisi_sas/hisi_sas_v1_hw.c |  3 +-
 drivers/scsi/hisi_sas/hisi_sas_v2_hw.c |  3 +-
 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c |  3 +-
 drivers/scsi/isci/port.c               |  7 ++---
 drivers/scsi/libsas/sas_event.c        | 13 +++------
 drivers/scsi/libsas/sas_init.c         |  6 ----
 drivers/scsi/libsas/sas_internal.h     |  1 -
 drivers/scsi/mvsas/mv_sas.c            | 14 ++++-----
 drivers/scsi/pm8001/pm8001_hwi.c       | 40 ++++++++++++--------------
 drivers/scsi/pm8001/pm8001_sas.c       |  7 ++---
 drivers/scsi/pm8001/pm80xx_hwi.c       | 35 ++++++++++------------
 include/scsi/libsas.h                  |  7 ++---
 15 files changed, 69 insertions(+), 106 deletions(-)

diff --git a/Documentation/scsi/libsas.rst b/Documentation/scsi/libsas.rst
index f9b77c7879db..a183b1d84713 100644
--- a/Documentation/scsi/libsas.rst
+++ b/Documentation/scsi/libsas.rst
@@ -189,8 +189,8 @@ num_phys
 The event interface::
 
 	/* LLDD calls these to notify the class of an event. */
-	void (*notify_port_event)(struct sas_phy *, enum port_event);
-	void (*notify_phy_event)(struct sas_phy *, enum phy_event);
+	void sas_notify_port_event(struct sas_phy *, enum port_event);
+	void sas_notify_phy_event(struct sas_phy *, enum phy_event);
 
 When sas_register_ha() returns, those are set and can be
 called by the LLDD to notify the SAS layer of such events
diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c
index 13677973da5c..4a4e8aa227c5 100644
--- a/drivers/scsi/aic94xx/aic94xx_scb.c
+++ b/drivers/scsi/aic94xx/aic94xx_scb.c
@@ -68,7 +68,6 @@ static void asd_phy_event_tasklet(struct asd_ascb *ascb,
 					 struct done_list_struct *dl)
 {
 	struct asd_ha_struct *asd_ha = ascb->ha;
-	struct sas_ha_struct *sas_ha = &asd_ha->sas_ha;
 	int phy_id = dl->status_block[0] & DL_PHY_MASK;
 	struct asd_phy *phy = &asd_ha->phys[phy_id];
 
@@ -81,7 +80,7 @@ static void asd_phy_event_tasklet(struct asd_ascb *ascb,
 		ASD_DPRINTK("phy%d: device unplugged\n", phy_id);
 		asd_turn_led(asd_ha, phy_id, 0);
 		sas_phy_disconnected(&phy->sas_phy);
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL);
+		sas_notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL);
 		break;
 	case CURRENT_OOB_DONE:
 		/* hot plugged device */
@@ -89,12 +88,12 @@ static void asd_phy_event_tasklet(struct asd_ascb *ascb,
 		get_lrate_mode(phy, oob_mode);
 		ASD_DPRINTK("phy%d device plugged: lrate:0x%x, proto:0x%x\n",
 			    phy_id, phy->sas_phy.linkrate, phy->sas_phy.iproto);
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE);
+		sas_notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE);
 		break;
 	case CURRENT_SPINUP_HOLD:
 		/* hot plug SATA, no COMWAKE sent */
 		asd_turn_led(asd_ha, phy_id, 1);
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD);
+		sas_notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD);
 		break;
 	case CURRENT_GTO_TIMEOUT:
 	case CURRENT_OOB_ERROR:
@@ -102,7 +101,7 @@ static void asd_phy_event_tasklet(struct asd_ascb *ascb,
 			    dl->status_block[1]);
 		asd_turn_led(asd_ha, phy_id, 0);
 		sas_phy_disconnected(&phy->sas_phy);
-		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR);
+		sas_notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR);
 		break;
 	}
 }
@@ -222,7 +221,6 @@ static void asd_bytes_dmaed_tasklet(struct asd_ascb *ascb,
 	int edb_el = edb_id + ascb->edb_index;
 	struct asd_dma_tok *edb = ascb->ha->seq.edb_arr[edb_el];
 	struct asd_phy *phy = &ascb->ha->phys[phy_id];
-	struct sas_ha_struct *sas_ha = phy->sas_phy.ha;
 	u16 size = ((dl->status_block[3] & 7) << 8) | dl->status_block[2];
 
 	size = min(size, (u16) sizeof(phy->frame_rcvd));
@@ -234,7 +232,7 @@ static void asd_bytes_dmaed_tasklet(struct asd_ascb *ascb,
 	spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags);
 	asd_dump_frame_rcvd(phy, dl);
 	asd_form_port(ascb->ha, phy);
-	sas_ha->notify_port_event(&phy->sas_phy, PORTE_BYTES_DMAED);
+	sas_notify_port_event(&phy->sas_phy, PORTE_BYTES_DMAED);
 }
 
 static void asd_link_reset_err_tasklet(struct asd_ascb *ascb,
@@ -270,7 +268,7 @@ static void asd_link_reset_err_tasklet(struct asd_ascb *ascb,
 	asd_turn_led(asd_ha, phy_id, 0);
 	sas_phy_disconnected(sas_phy);
 	asd_deform_port(asd_ha, phy);
-	sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+	sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
 
 	if (retries_left == 0) {
 		int num = 1;
@@ -315,7 +313,7 @@ static void asd_primitive_rcvd_tasklet(struct asd_ascb *ascb,
 			spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
 			sas_phy->sas_prim = ffs(cont);
 			spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
-			sas_ha->notify_port_event(sas_phy,PORTE_BROADCAST_RCVD);
+			sas_notify_port_event(sas_phy,PORTE_BROADCAST_RCVD);
 			break;
 
 		case LmUNKNOWNP:
@@ -336,7 +334,7 @@ static void asd_primitive_rcvd_tasklet(struct asd_ascb *ascb,
 			/* The sequencer disables all phys on that port.
 			 * We have to re-enable the phys ourselves. */
 			asd_deform_port(asd_ha, phy);
-			sas_ha->notify_port_event(sas_phy, PORTE_HARD_RESET);
+			sas_notify_port_event(sas_phy, PORTE_HARD_RESET);
 			break;
 
 		default:
@@ -567,7 +565,7 @@ static void escb_tasklet_complete(struct asd_ascb *ascb,
 		/* the device is gone */
 		sas_phy_disconnected(sas_phy);
 		asd_deform_port(asd_ha, phy);
-		sas_ha->notify_port_event(sas_phy, PORTE_TIMER_EVENT);
+		sas_notify_port_event(sas_phy, PORTE_TIMER_EVENT);
 		break;
 	default:
 		ASD_DPRINTK("%s: phy%d: unknown event:0x%x\n", __func__,
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index cf0bfac920a8..76f8fc3fad59 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -616,7 +616,6 @@ static void hisi_sas_bytes_dmaed(struct hisi_hba *hisi_hba, int phy_no)
 {
 	struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no];
 	struct asd_sas_phy *sas_phy = &phy->sas_phy;
-	struct sas_ha_struct *sas_ha;
 
 	if (!phy->phy_attached)
 		return;
@@ -627,8 +626,7 @@ static void hisi_sas_bytes_dmaed(struct hisi_hba *hisi_hba, int phy_no)
 		return;
 	}
 
-	sas_ha = &hisi_hba->sha;
-	sas_ha->notify_phy_event(sas_phy, PHYE_OOB_DONE);
+	sas_notify_phy_event(sas_phy, PHYE_OOB_DONE);
 
 	if (sas_phy->phy) {
 		struct sas_phy *sphy = sas_phy->phy;
@@ -656,7 +654,7 @@ static void hisi_sas_bytes_dmaed(struct hisi_hba *hisi_hba, int phy_no)
 	}
 
 	sas_phy->frame_rcvd_size = phy->frame_rcvd_size;
-	sas_ha->notify_port_event(sas_phy, PORTE_BYTES_DMAED);
+	sas_notify_port_event(sas_phy, PORTE_BYTES_DMAED);
 }
 
 static struct hisi_sas_device *hisi_sas_alloc_dev(struct domain_device *device)
@@ -1411,7 +1409,6 @@ static void hisi_sas_refresh_port_id(struct hisi_hba *hisi_hba)
 
 static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state)
 {
-	struct sas_ha_struct *sas_ha = &hisi_hba->sha;
 	struct asd_sas_port *_sas_port = NULL;
 	int phy_no;
 
@@ -1432,7 +1429,7 @@ static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state)
 				_sas_port = sas_port;
 
 				if (dev_is_expander(dev->dev_type))
-					sas_ha->notify_port_event(sas_phy,
+					sas_notify_port_event(sas_phy,
 							PORTE_BROADCAST_RCVD);
 			}
 		} else {
@@ -2194,7 +2191,6 @@ void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy)
 {
 	struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no];
 	struct asd_sas_phy *sas_phy = &phy->sas_phy;
-	struct sas_ha_struct *sas_ha = &hisi_hba->sha;
 	struct device *dev = hisi_hba->dev;
 
 	if (rdy) {
@@ -2210,7 +2206,7 @@ void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy)
 			return;
 		}
 		/* Phy down and not ready */
-		sas_ha->notify_phy_event(sas_phy, PHYE_LOSS_OF_SIGNAL);
+		sas_notify_phy_event(sas_phy, PHYE_LOSS_OF_SIGNAL);
 		sas_phy_disconnected(sas_phy);
 
 		if (port) {
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
index 45e866cb9164..22eecc89d41b 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
@@ -1408,7 +1408,6 @@ static irqreturn_t int_bcast_v1_hw(int irq, void *p)
 	struct hisi_sas_phy *phy = p;
 	struct hisi_hba *hisi_hba = phy->hisi_hba;
 	struct asd_sas_phy *sas_phy = &phy->sas_phy;
-	struct sas_ha_struct *sha = &hisi_hba->sha;
 	struct device *dev = hisi_hba->dev;
 	int phy_no = sas_phy->id;
 	u32 irq_value;
@@ -1424,7 +1423,7 @@ static irqreturn_t int_bcast_v1_hw(int irq, void *p)
 	}
 
 	if (!test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags))
-		sha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
 
 end:
 	hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT2,
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index 9adfdefef9ca..10ba0680da04 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -2818,14 +2818,13 @@ static void phy_bcast_v2_hw(int phy_no, struct hisi_hba *hisi_hba)
 {
 	struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no];
 	struct asd_sas_phy *sas_phy = &phy->sas_phy;
-	struct sas_ha_struct *sas_ha = &hisi_hba->sha;
 	u32 bcast_status;
 
 	hisi_sas_phy_write32(hisi_hba, phy_no, SL_RX_BCAST_CHK_MSK, 1);
 	bcast_status = hisi_sas_phy_read32(hisi_hba, phy_no, RX_PRIMS_STATUS);
 	if ((bcast_status & RX_BCAST_CHG_MSK) &&
 	    !test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags))
-		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
 	hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT0,
 			     CHL_INT0_SL_RX_BCST_ACK_MSK);
 	hisi_sas_phy_write32(hisi_hba, phy_no, SL_RX_BCAST_CHK_MSK, 0);
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 7c12804b4e1d..9d9dcc11a866 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -1600,14 +1600,13 @@ static irqreturn_t phy_bcast_v3_hw(int phy_no, struct hisi_hba *hisi_hba)
 {
 	struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no];
 	struct asd_sas_phy *sas_phy = &phy->sas_phy;
-	struct sas_ha_struct *sas_ha = &hisi_hba->sha;
 	u32 bcast_status;
 
 	hisi_sas_phy_write32(hisi_hba, phy_no, SL_RX_BCAST_CHK_MSK, 1);
 	bcast_status = hisi_sas_phy_read32(hisi_hba, phy_no, RX_PRIMS_STATUS);
 	if ((bcast_status & RX_BCAST_CHG_MSK) &&
 	    !test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags))
-		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
 	hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT0,
 			     CHL_INT0_SL_RX_BCST_ACK_MSK);
 	hisi_sas_phy_write32(hisi_hba, phy_no, SL_RX_BCAST_CHK_MSK, 0);
diff --git a/drivers/scsi/isci/port.c b/drivers/scsi/isci/port.c
index 1df45f028ea7..8d9349738067 100644
--- a/drivers/scsi/isci/port.c
+++ b/drivers/scsi/isci/port.c
@@ -164,7 +164,7 @@ static void isci_port_bc_change_received(struct isci_host *ihost,
 		"%s: isci_phy = %p, sas_phy = %p\n",
 		__func__, iphy, &iphy->sas_phy);
 
-	ihost->sas_ha.notify_port_event(&iphy->sas_phy, PORTE_BROADCAST_RCVD);
+	sas_notify_port_event(&iphy->sas_phy, PORTE_BROADCAST_RCVD);
 	sci_port_bcn_enable(iport);
 }
 
@@ -223,8 +223,7 @@ static void isci_port_link_up(struct isci_host *isci_host,
 	/* Notify libsas that we have an address frame, if indeed
 	 * we've found an SSP, SMP, or STP target */
 	if (success)
-		isci_host->sas_ha.notify_port_event(&iphy->sas_phy,
-						    PORTE_BYTES_DMAED);
+		sas_notify_port_event(&iphy->sas_phy, PORTE_BYTES_DMAED);
 }
 
 
@@ -270,7 +269,7 @@ static void isci_port_link_down(struct isci_host *isci_host,
 	 * isci_port_deformed and isci_dev_gone functions.
 	 */
 	sas_phy_disconnected(&isci_phy->sas_phy);
-	isci_host->sas_ha.notify_phy_event(&isci_phy->sas_phy,
+	sas_notify_phy_event(&isci_phy->sas_phy,
 					   PHYE_LOSS_OF_SIGNAL);
 
 	dev_dbg(&isci_host->pdev->dev,
diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
index a1852f6c042b..112a1b76f63b 100644
--- a/drivers/scsi/libsas/sas_event.c
+++ b/drivers/scsi/libsas/sas_event.c
@@ -109,7 +109,7 @@ void sas_enable_revalidation(struct sas_ha_struct *ha)
 
 		sas_phy = container_of(port->phy_list.next, struct asd_sas_phy,
 				port_phy_el);
-		ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+		sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
 	}
 	mutex_unlock(&ha->disco_mutex);
 }