Linux-Fsdevel Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH V2] fuse: Remove __GFP_FS flag to avoid allocator recursing
@ 2020-09-16  6:39 Pradeep P V K
  2020-09-16 14:56 ` Matthew Wilcox
  0 siblings, 1 reply; 3+ messages in thread
From: Pradeep P V K @ 2020-09-16  6:39 UTC (permalink / raw)
  To: miklos, willy; +Cc: linux-fsdevel, stummala, sayalil, Pradeep P V K

Found a deadlock between kswapd, writeback thread and fuse process
Here are the sequence of events with callstacks on the deadlock.

process#1		process#2		process#3
__switch_to+0x150	__switch_to+0x150	try_to_free_pages
__schedule+0x984	__schedule+0x984
					memalloc_noreclaim_restore
schedule+0x70		schedule+0x70		__perform_reclaim
bit_wait+0x14		__fuse_request_send+0x154
					__alloc_pages_direct_reclaim
__wait_on_bit+0x70	fuse_simple_request+0x174
inode_wait_for_writeback+0xa0
						__alloc_pages_slowpath
			fuse_flush_times+0x10c
evict+0xa4		fuse_write_inode+0x5c	__alloc_pages_nodemask
iput+0x248		__writeback_single_inode+0x3d4
dentry_unlink_inode+0xd8			__alloc_pages_node
			writeback_sb_inodes+0x4a0
__dentry_kill+0x160	__writeback_inodes_wb+0xac
shrink_dentry_list+0x170			alloc_pages_node
			wb_writeback+0x26c	fuse_copy_fill
prune_dcache_sb+0x54	wb_workfn+0x2c0		fuse_copy_one
super_cache_scan+0x114	process_one_work+0x278	fuse_read_single_forget
do_shrink_slab+0x24c	worker_thread+0x26c	fuse_read_forget
shrink_slab+0xa8	kthread+0x118		fuse_dev_do_read
shrink_node+0x118				fuse_dev_splice_read
kswapd+0x92c					do_splice_to
						do_splice

Process#1(kswapd) held an inode lock and initaited a writeback to free
the pages, as the inode superblock is fuse, process#2 forms a fuse
request. Process#3 (Fuse daemon threads) while serving process#2 request,
it requires memory(pages) and as the system is already running in low
memory it ends up in calling try_to_ free_pages(), which might now call
kswapd again, which is already stuck with an inode lock held. Thus forms
a deadlock.

So, drop  __GFP_FS flag to avoid allocator recursing into the
filesystem that might already held locks by using memalloc_nofs_save()
and memalloc_nofs_restore() respectively.

Changes since V1:
- Used memalloc_nofs_save() in all allocation paths of fuse daemons
  to avoid use __GFP_FS flag as per Matthew comments.

 __GFP_FS flags very
Signed-off-by: Pradeep P V K <ppvk@codeaurora.org>
---
 fs/fuse/dev.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 02b3c36..9f790fd 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -21,6 +21,7 @@
 #include <linux/swap.h>
 #include <linux/splice.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS("devname:fuse");
@@ -1314,6 +1315,8 @@ static int fuse_dev_open(struct inode *inode, struct file *file)
 
 static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
 {
+	ssize_t size;
+	unsigned nofs_flag;
 	struct fuse_copy_state cs;
 	struct file *file = iocb->ki_filp;
 	struct fuse_dev *fud = fuse_get_dev(file);
@@ -1326,7 +1329,11 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
 
 	fuse_copy_init(&cs, 1, to);
 
-	return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
+	nofs_flag = memalloc_nofs_save();
+	size = fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
+	memalloc_nofs_restore(nofs_flag);
+
+	return size;
 }
 
 static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
@@ -1335,6 +1342,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
 {
 	int total, ret;
 	int page_nr = 0;
+	unsigned nofs_flag;
 	struct pipe_buffer *bufs;
 	struct fuse_copy_state cs;
 	struct fuse_dev *fud = fuse_get_dev(in);
@@ -1342,15 +1350,21 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
 	if (!fud)
 		return -EPERM;
 
+	nofs_flag = memalloc_nofs_save();
 	bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
 			      GFP_KERNEL);
+	memalloc_nofs_restore(nofs_flag);
 	if (!bufs)
 		return -ENOMEM;
 
 	fuse_copy_init(&cs, 1, NULL);
 	cs.pipebufs = bufs;
 	cs.pipe = pipe;
+
+	nofs_flag = memalloc_nofs_save();
 	ret = fuse_dev_do_read(fud, in, &cs, len);
+	memalloc_nofs_restore(nofs_flag);
+
 	if (ret < 0)
 		goto out;
 
@@ -1918,6 +1932,8 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
 
 static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
 {
+	ssize_t size;
+	unsigned nofs_flag;
 	struct fuse_copy_state cs;
 	struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
 
@@ -1929,7 +1945,11 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
 
 	fuse_copy_init(&cs, 0, from);
 
-	return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
+	nofs_flag = memalloc_nofs_save();
+	size = fuse_dev_do_write(fud, &cs, iov_iter_count(from));
+	memalloc_nofs_restore(nofs_flag);
+
+	return size;
 }
 
 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
@@ -1938,7 +1958,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 {
 	unsigned int head, tail, mask, count;
 	unsigned nbuf;
-	unsigned idx;
+	unsigned idx, nofs_flag;
 	struct pipe_buffer *bufs;
 	struct fuse_copy_state cs;
 	struct fuse_dev *fud;
@@ -1956,7 +1976,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 	mask = pipe->ring_size - 1;
 	count = head - tail;
 
+	nofs_flag = memalloc_nofs_save();
 	bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
+	memalloc_nofs_restore(nofs_flag);
 	if (!bufs) {
 		pipe_unlock(pipe);
 		return -ENOMEM;
@@ -2010,7 +2032,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 	if (flags & SPLICE_F_MOVE)
 		cs.move_pages = 1;
 
+	nofs_flag = memalloc_nofs_save();
 	ret = fuse_dev_do_write(fud, &cs, len);
+	memalloc_nofs_restore(nofs_flag);
 
 	pipe_lock(pipe);
 out_free:
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH V2] fuse: Remove __GFP_FS flag to avoid allocator recursing
  2020-09-16  6:39 [PATCH V2] fuse: Remove __GFP_FS flag to avoid allocator recursing Pradeep P V K
@ 2020-09-16 14:56 ` Matthew Wilcox
  2020-09-21 11:39   ` ppvk
  0 siblings, 1 reply; 3+ messages in thread
From: Matthew Wilcox @ 2020-09-16 14:56 UTC (permalink / raw)
  To: Pradeep P V K; +Cc: miklos, linux-fsdevel, stummala, sayalil

On Wed, Sep 16, 2020 at 12:09:40PM +0530, Pradeep P V K wrote:
> Changes since V1:
> - Used memalloc_nofs_save() in all allocation paths of fuse daemons
>   to avoid use __GFP_FS flag as per Matthew comments.

That's not how to use memalloc_nofs_save().  You call it when entering a
context in which any memory allocation would cause a deadlock.  You don't
look for every place which allocates memory and wrap the memory allocation
calls in memalloc_nofs_save() because you're likely to miss one.

>  static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
>  {
> +	ssize_t size;
> +	unsigned nofs_flag;

This is almost certainly too low in the call stack.


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH V2] fuse: Remove __GFP_FS flag to avoid allocator recursing
  2020-09-16 14:56 ` Matthew Wilcox
@ 2020-09-21 11:39   ` ppvk
  0 siblings, 0 replies; 3+ messages in thread
From: ppvk @ 2020-09-21 11:39 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: miklos, linux-fsdevel, stummala, sayalil

On 2020-09-16 20:26, Matthew Wilcox wrote:
> On Wed, Sep 16, 2020 at 12:09:40PM +0530, Pradeep P V K wrote:
>> Changes since V1:
>> - Used memalloc_nofs_save() in all allocation paths of fuse daemons
>>   to avoid use __GFP_FS flag as per Matthew comments.
> 
> That's not how to use memalloc_nofs_save().  You call it when entering 
> a
> context in which any memory allocation would cause a deadlock.  You 
> don't
> look for every place which allocates memory and wrap the memory 
> allocation
> calls in memalloc_nofs_save() because you're likely to miss one.

> ok, i will fix this in my next patch set.
>>  static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
>>  {
>> +	ssize_t size;
>> +	unsigned nofs_flag;
> 
> This is almost certainly too low in the call stack.
ok, i will update this in my next patch set.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-09-21 11:44 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-16  6:39 [PATCH V2] fuse: Remove __GFP_FS flag to avoid allocator recursing Pradeep P V K
2020-09-16 14:56 ` Matthew Wilcox
2020-09-21 11:39   ` ppvk

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).