LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] eventfd: protect eventfd_wake_count with a local_lock
@ 2021-07-19  7:54 Daniel Bristot de Oliveira
  2021-07-23 14:53 ` Nicolas Saenz Julienne
  0 siblings, 1 reply; 2+ messages in thread
From: Daniel Bristot de Oliveira @ 2021-07-19  7:54 UTC (permalink / raw)
  To: Alexander Viro, linux-kernel
  Cc: Daniel Bristot de Oliveira, He Zhe, Jens Axboe, Thomas Gleixner,
	Sebastian Andrzej Siewior, stable, linux-fsdevel, Paolo Bonzini

eventfd_signal assumes that spin_lock_irqsave/spin_unlock_irqrestore is
non-preemptable and therefore increments and decrements the percpu
variable inside the critical section.

This obviously does not fly with PREEMPT_RT. If eventfd_signal is
preempted and an unrelated thread calls eventfd_signal, the result is
a spurious WARN. To avoid this, protect the percpu variable with a
local_lock.

Reported-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Fixes: b5e683d5cab8 ("eventfd: track eventfd_signal() recursion depth")
Cc: He Zhe <zhe.he@windriver.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: stable@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Co-developed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
---
 fs/eventfd.c            | 27 ++++++++++++++++++++++-----
 include/linux/eventfd.h |  7 +------
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/fs/eventfd.c b/fs/eventfd.c
index e265b6dd4f34..9754fcd38690 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -12,6 +12,7 @@
 #include <linux/fs.h>
 #include <linux/sched/signal.h>
 #include <linux/kernel.h>
+#include <linux/local_lock.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
@@ -25,8 +26,6 @@
 #include <linux/idr.h>
 #include <linux/uio.h>
 
-DEFINE_PER_CPU(int, eventfd_wake_count);
-
 static DEFINE_IDA(eventfd_ida);
 
 struct eventfd_ctx {
@@ -45,6 +44,20 @@ struct eventfd_ctx {
 	int id;
 };
 
+struct event_fd_recursion {
+	local_lock_t lock;
+	int count;
+};
+
+static DEFINE_PER_CPU(struct event_fd_recursion, event_fd_recursion) = {
+	.lock = INIT_LOCAL_LOCK(lock),
+};
+
+bool eventfd_signal_count(void)
+{
+	return this_cpu_read(event_fd_recursion.count);
+}
+
 /**
  * eventfd_signal - Adds @n to the eventfd counter.
  * @ctx: [in] Pointer to the eventfd context.
@@ -71,18 +84,22 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
 	 * it returns true, the eventfd_signal() call should be deferred to a
 	 * safe context.
 	 */
-	if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
+	local_lock(&event_fd_recursion.lock);
+	if (WARN_ON_ONCE(this_cpu_read(event_fd_recursion.count))) {
+		local_unlock(&event_fd_recursion.lock);
 		return 0;
+	}
 
 	spin_lock_irqsave(&ctx->wqh.lock, flags);
-	this_cpu_inc(eventfd_wake_count);
+	this_cpu_inc(event_fd_recursion.count);
 	if (ULLONG_MAX - ctx->count < n)
 		n = ULLONG_MAX - ctx->count;
 	ctx->count += n;
 	if (waitqueue_active(&ctx->wqh))
 		wake_up_locked_poll(&ctx->wqh, EPOLLIN);
-	this_cpu_dec(eventfd_wake_count);
+	this_cpu_dec(event_fd_recursion.count);
 	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+	local_unlock(&event_fd_recursion.lock);
 
 	return n;
 }
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index fa0a524baed0..ca89d6c409c1 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -43,12 +43,7 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
 				  __u64 *cnt);
 void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
 
-DECLARE_PER_CPU(int, eventfd_wake_count);
-
-static inline bool eventfd_signal_count(void)
-{
-	return this_cpu_read(eventfd_wake_count);
-}
+bool eventfd_signal_count(void);
 
 #else /* CONFIG_EVENTFD */
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] eventfd: protect eventfd_wake_count with a local_lock
  2021-07-19  7:54 [PATCH] eventfd: protect eventfd_wake_count with a local_lock Daniel Bristot de Oliveira
@ 2021-07-23 14:53 ` Nicolas Saenz Julienne
  0 siblings, 0 replies; 2+ messages in thread
From: Nicolas Saenz Julienne @ 2021-07-23 14:53 UTC (permalink / raw)
  To: bristot
  Cc: axboe, bigeasy, linux-fsdevel, linux-kernel, pbonzini, stable,
	tglx, viro, zhe.he, Nicolas Saenz Julienne

On Mon, 19 Jul 2021 09:54:52 +0200, Daniel Bristot de Oliveira wrote:
> eventfd_signal assumes that spin_lock_irqsave/spin_unlock_irqrestore is
> non-preemptable and therefore increments and decrements the percpu
> variable inside the critical section.
> 
> This obviously does not fly with PREEMPT_RT. If eventfd_signal is
> preempted and an unrelated thread calls eventfd_signal, the result is
> a spurious WARN. To avoid this, protect the percpu variable with a
> local_lock.
> 
> Reported-by: Daniel Bristot de Oliveira <bristot@kernel.org>
> Fixes: b5e683d5cab8 ("eventfd: track eventfd_signal() recursion depth")
> Cc: He Zhe <zhe.he@windriver.com>
> Cc: Jens Axboe <axboe@kernel.dk>
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Cc: stable@vger.kernel.org
> Cc: linux-fsdevel@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Co-developed-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
> ---

Tested-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>

Thanks!

--
Nicolás Sáenz


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-07-23 14:54 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-19  7:54 [PATCH] eventfd: protect eventfd_wake_count with a local_lock Daniel Bristot de Oliveira
2021-07-23 14:53 ` Nicolas Saenz Julienne

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).