Linux-Fsdevel Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
To: Andrea Arcangeli <aarcange@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Al Viro <viro@zeniv.linux.org.uk>,
	syzbot <syzbot+96cc7aba7e969b1d305c@syzkaller.appspotmail.com>,
	syzkaller-bugs <syzkaller-bugs@googlegroups.com>,
	linux-fsdevel <linux-fsdevel@vger.kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Dmitry Vyukov <dvyukov@google.com>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: Re: INFO: task hung in pipe_read (2)
Date: Thu, 13 Aug 2020 16:00:38 +0900	[thread overview]
Message-ID: <d1e83b55-eb09-45e0-95f1-ece41251b036@i-love.sakura.ne.jp> (raw)
In-Reply-To: <20200810192941.GA16925@redhat.com>

On 2020/08/11 4:29, Andrea Arcangeli wrote:
> However once the mutex is killable there's no concern anymore and the
> hangcheck timer is correct also not reporting any misbehavior anymore.

Do you mean something like below untested patch? I think that the difficult
part is that mutex for close() operation can't become killable. And I worry
that syzbot soon reports a hung task at pipe_release() instead of pipe_read()
or pipe_write(). If pagefault with locks held can be avoided, there will be no
such worry.

 fs/pipe.c                 | 106 ++++++++++++++++++++++++++++++++++++++--------
 fs/splice.c               |  41 ++++++++++++------
 include/linux/pipe_fs_i.h |   5 ++-
 3 files changed, 120 insertions(+), 32 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 60dbee4..537d1ef 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -66,6 +66,13 @@ static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
 		mutex_lock_nested(&pipe->mutex, subclass);
 }
 
+static int __must_check pipe_lock_killable_nested(struct pipe_inode_info *pipe, int subclass)
+{
+	if (pipe->files)
+		return mutex_lock_killable_nested(&pipe->mutex, subclass);
+	return 0;
+}
+
 void pipe_lock(struct pipe_inode_info *pipe)
 {
 	/*
@@ -75,6 +82,14 @@ void pipe_lock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_lock);
 
+int pipe_lock_killable(struct pipe_inode_info *pipe)
+{
+	/*
+	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
+	 */
+	return pipe_lock_killable_nested(pipe, I_MUTEX_PARENT);
+}
+
 void pipe_unlock(struct pipe_inode_info *pipe)
 {
 	if (pipe->files)
@@ -87,23 +102,37 @@ static inline void __pipe_lock(struct pipe_inode_info *pipe)
 	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
 }
 
+static inline int __must_check __pipe_lock_killable(struct pipe_inode_info *pipe)
+{
+	return mutex_lock_killable_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+
 static inline void __pipe_unlock(struct pipe_inode_info *pipe)
 {
 	mutex_unlock(&pipe->mutex);
 }
 
-void pipe_double_lock(struct pipe_inode_info *pipe1,
-		      struct pipe_inode_info *pipe2)
+int pipe_double_lock_killable(struct pipe_inode_info *pipe1,
+			      struct pipe_inode_info *pipe2)
 {
 	BUG_ON(pipe1 == pipe2);
 
 	if (pipe1 < pipe2) {
-		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
-		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
+		if (pipe_lock_killable_nested(pipe1, I_MUTEX_PARENT))
+			return -ERESTARTSYS;
+		if (pipe_lock_killable_nested(pipe2, I_MUTEX_CHILD)) {
+			pipe_unlock(pipe1);
+			return -ERESTARTSYS;
+		}
 	} else {
-		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
-		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
+		if (pipe_lock_killable_nested(pipe2, I_MUTEX_PARENT))
+			return -ERESTARTSYS;
+		if (pipe_lock_killable_nested(pipe1, I_MUTEX_CHILD)) {
+			pipe_unlock(pipe2);
+			return -ERESTARTSYS;
+		}
 	}
+	return 0;
 }
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
@@ -125,6 +154,24 @@ void pipe_wait(struct pipe_inode_info *pipe)
 	pipe_lock(pipe);
 }
 
+int pipe_wait_killable(struct pipe_inode_info *pipe)
+{
+	DEFINE_WAIT(rdwait);
+	DEFINE_WAIT(wrwait);
+
+	/*
+	 * Pipes are system-local resources, so sleeping on them
+	 * is considered a noninteractive wait:
+	 */
+	prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
+	pipe_unlock(pipe);
+	schedule();
+	finish_wait(&pipe->rd_wait, &rdwait);
+	finish_wait(&pipe->wr_wait, &wrwait);
+	return pipe_lock_killable(pipe);
+}
+
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 				  struct pipe_buffer *buf)
 {
@@ -244,7 +291,8 @@ static inline bool pipe_readable(const struct pipe_inode_info *pipe)
 		return 0;
 
 	ret = 0;
-	__pipe_lock(pipe);
+	if (__pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 
 	/*
 	 * We only wake up writers if the pipe was full when we started
@@ -381,7 +429,8 @@ static inline bool pipe_readable(const struct pipe_inode_info *pipe)
 		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
 			return -ERESTARTSYS;
 
-		__pipe_lock(pipe);
+		if (__pipe_lock_killable(pipe))
+			return -ERESTARTSYS;
 		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
 		wake_next_reader = true;
 	}
@@ -432,7 +481,8 @@ static inline bool pipe_writable(const struct pipe_inode_info *pipe)
 	if (unlikely(total_len == 0))
 		return 0;
 
-	__pipe_lock(pipe);
+	if (__pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 
 	if (!pipe->readers) {
 		send_sig(SIGPIPE, current, 0);
@@ -577,7 +627,14 @@ static inline bool pipe_writable(const struct pipe_inode_info *pipe)
 			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		}
 		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
-		__pipe_lock(pipe);
+		if (__pipe_lock_killable(pipe)) {
+			if (!ret)
+				ret = -ERESTARTSYS;
+			/* Extra notification is better than missing notification? */
+			was_empty = true;
+			wake_next_writer = true;
+			goto out_unlocked;
+		}
 		was_empty = pipe_empty(pipe->head, pipe->tail);
 		wake_next_writer = true;
 	}
@@ -586,6 +643,7 @@ static inline bool pipe_writable(const struct pipe_inode_info *pipe)
 		wake_next_writer = false;
 	__pipe_unlock(pipe);
 
+out_unlocked:
 	/*
 	 * If we do do a wakeup event, we do a 'sync' wakeup, because we
 	 * want the reader to start processing things asap, rather than
@@ -617,7 +675,8 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case FIONREAD:
-		__pipe_lock(pipe);
+		if (__pipe_lock_killable(pipe))
+			return -ERESTARTSYS;
 		count = 0;
 		head = pipe->head;
 		tail = pipe->tail;
@@ -634,7 +693,8 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 #ifdef CONFIG_WATCH_QUEUE
 	case IOC_WATCH_QUEUE_SET_SIZE: {
 		int ret;
-		__pipe_lock(pipe);
+		if (__pipe_lock_killable(pipe))
+			return -ERESTARTSYS;
 		ret = watch_queue_set_size(pipe, arg);
 		__pipe_unlock(pipe);
 		return ret;
@@ -719,6 +779,10 @@ static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
 {
 	struct pipe_inode_info *pipe = file->private_data;
 
+	/*
+	 * Think lock can't be killable. How to avoid deadlock if page fault
+	 * with pipe mutex held does not finish quickly?
+	 */
 	__pipe_lock(pipe);
 	if (file->f_mode & FMODE_READ)
 		pipe->readers--;
@@ -744,7 +808,8 @@ static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
 	struct pipe_inode_info *pipe = filp->private_data;
 	int retval = 0;
 
-	__pipe_lock(pipe);
+	if (__pipe_lock_killable(pipe)) /* Can this be safely killable? */
+		return -ERESTARTSYS;
 	if (filp->f_mode & FMODE_READ)
 		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
 	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -1040,7 +1105,8 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
 	int cur = *cnt;
 
 	while (cur == *cnt) {
-		pipe_wait(pipe);
+		if (pipe_wait_killable(pipe))
+			break;
 		if (signal_pending(current))
 			break;
 	}
@@ -1083,10 +1149,13 @@ static int fifo_open(struct inode *inode, struct file *filp)
 			spin_unlock(&inode->i_lock);
 		}
 	}
-	filp->private_data = pipe;
-	/* OK, we have a pipe and it's pinned down */
 
-	__pipe_lock(pipe);
+	/* OK, we have a pipe and it's pinned down */
+	if (__pipe_lock_killable(pipe)) {
+		put_pipe_info(inode, pipe);
+		return -ERESTARTSYS;
+	}
+	filp->private_data = pipe;
 
 	/* We can only do regular read/write on fifos */
 	stream_open(inode, filp);
@@ -1349,7 +1418,8 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 	if (!pipe)
 		return -EBADF;
 
-	__pipe_lock(pipe);
+	if (__pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 
 	switch (cmd) {
 	case F_SETPIPE_SZ:
diff --git a/fs/splice.c b/fs/splice.c
index d7c8a7c..65d24df 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -563,7 +563,8 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des
 			sd->need_wakeup = false;
 		}
 
-		pipe_wait(pipe);
+		if (pipe_wait_killable(pipe))
+			return -ERESTARTSYS;
 	}
 
 	return 1;
@@ -657,7 +658,8 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 		.u.file = out,
 	};
 
-	pipe_lock(pipe);
+	if (pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 	ret = __splice_from_pipe(pipe, &sd, actor);
 	pipe_unlock(pipe);
 
@@ -696,7 +698,10 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	if (unlikely(!array))
 		return -ENOMEM;
 
-	pipe_lock(pipe);
+	if (pipe_lock_killable(pipe)) {
+		kfree(array);
+		return -ERESTARTSYS;
+	}
 
 	splice_from_pipe_begin(&sd);
 	while (sd.total_len) {
@@ -1077,7 +1082,8 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
 			return -EAGAIN;
 		if (signal_pending(current))
 			return -ERESTARTSYS;
-		pipe_wait(pipe);
+		if (pipe_wait_killable(pipe))
+			return -ERESTARTSYS;
 	}
 }
 
@@ -1167,7 +1173,8 @@ long do_splice(struct file *in, loff_t __user *off_in,
 		if (out->f_flags & O_NONBLOCK)
 			flags |= SPLICE_F_NONBLOCK;
 
-		pipe_lock(opipe);
+		if (pipe_lock_killable(opipe))
+			return -ERESTARTSYS;
 		ret = wait_for_space(opipe, flags);
 		if (!ret) {
 			unsigned int p_space;
@@ -1264,7 +1271,8 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
 		return -EBADF;
 
 	if (sd.total_len) {
-		pipe_lock(pipe);
+		if (pipe_lock_killable(pipe))
+			return -ERESTARTSYS;
 		ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
 		pipe_unlock(pipe);
 	}
@@ -1291,7 +1299,8 @@ static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
 	if (!pipe)
 		return -EBADF;
 
-	pipe_lock(pipe);
+	if (pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 	ret = wait_for_space(pipe, flags);
 	if (!ret)
 		ret = iter_to_pipe(iter, pipe, buf_flag);
@@ -1441,7 +1450,8 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 		return 0;
 
 	ret = 0;
-	pipe_lock(pipe);
+	if (pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 
 	while (pipe_empty(pipe->head, pipe->tail)) {
 		if (signal_pending(current)) {
@@ -1454,7 +1464,8 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 			ret = -EAGAIN;
 			break;
 		}
-		pipe_wait(pipe);
+		if (pipe_wait_killable(pipe))
+			return -ERESTARTSYS;
 	}
 
 	pipe_unlock(pipe);
@@ -1477,7 +1488,8 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 		return 0;
 
 	ret = 0;
-	pipe_lock(pipe);
+	if (pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 
 	while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
 		if (!pipe->readers) {
@@ -1493,7 +1505,8 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 			ret = -ERESTARTSYS;
 			break;
 		}
-		pipe_wait(pipe);
+		if (pipe_wait_killable(pipe))
+			return -ERESTARTSYS;
 	}
 
 	pipe_unlock(pipe);
@@ -1529,7 +1542,8 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
 	 * grabbing by pipe info address. Otherwise two different processes
 	 * could deadlock (one doing tee from A -> B, the other from B -> A).
 	 */
-	pipe_double_lock(ipipe, opipe);
+	if (pipe_double_lock_killable(ipipe, opipe))
+		return -ERESTARTSYS;
 
 	i_tail = ipipe->tail;
 	i_mask = ipipe->ring_size - 1;
@@ -1655,7 +1669,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 	 * grabbing by pipe info address. Otherwise two different processes
 	 * could deadlock (one doing tee from A -> B, the other from B -> A).
 	 */
-	pipe_double_lock(ipipe, opipe);
+	if (pipe_double_lock_killable(ipipe, opipe))
+		return -ERESTARTSYS;
 
 	i_tail = ipipe->tail;
 	i_mask = ipipe->ring_size - 1;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 50afd0d..eb99c18 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -233,8 +233,10 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe,
 
 /* Pipe lock and unlock operations */
 void pipe_lock(struct pipe_inode_info *);
+int __must_check pipe_lock_killable(struct pipe_inode_info *pipe);
 void pipe_unlock(struct pipe_inode_info *);
-void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
+int __must_check pipe_double_lock_killable(struct pipe_inode_info *pipe1,
+					   struct pipe_inode_info *pipe2);
 
 extern unsigned int pipe_max_size;
 extern unsigned long pipe_user_pages_hard;
@@ -242,6 +244,7 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe,
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
 void pipe_wait(struct pipe_inode_info *pipe);
+int __must_check pipe_wait_killable(struct pipe_inode_info *pipe);
 
 struct pipe_inode_info *alloc_pipe_info(void);
 void free_pipe_info(struct pipe_inode_info *);
-- 
1.8.3.1


  reply	other threads:[~2020-08-13  7:01 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-01  8:45 syzbot
2020-08-01 15:29 ` Tetsuo Handa
2020-08-01 17:39   ` Linus Torvalds
2020-08-07  5:31     ` Andrea Arcangeli
2020-08-08  1:01       ` Tetsuo Handa
2020-08-10 19:29         ` Andrea Arcangeli
2020-08-13  7:00           ` Tetsuo Handa [this message]
2020-08-13 11:20             ` Tetsuo Handa
2020-08-22  4:34               ` [RFC PATCH] pipe: make pipe_release() deferrable Tetsuo Handa
2020-08-22 16:30                 ` Linus Torvalds
2020-08-23  0:21                   ` Tetsuo Handa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d1e83b55-eb09-45e0-95f1-ece41251b036@i-love.sakura.ne.jp \
    --to=penguin-kernel@i-love.sakura.ne.jp \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=dvyukov@google.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=syzbot+96cc7aba7e969b1d305c@syzkaller.appspotmail.com \
    --cc=syzkaller-bugs@googlegroups.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    --subject='Re: INFO: task hung in pipe_read (2)' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).