Linux-Fsdevel Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH RFC] fuse: optimize writepages search
@ 2019-09-19 14:11 Vasily Averin
  2020-06-11  8:53 ` Miklos Szeredi
  0 siblings, 1 reply; 2+ messages in thread
From: Vasily Averin @ 2019-09-19 14:11 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel

Dear Miklos,

This patch was originally developed for RHEL7-based 
Virtuozzo kernels and widely used few years
but seems it was not submitted upstream.
I've rebased it to v5.3, compiled but was not tested,
just to get feedback and then update it.

Author: Maxim Patlasov <mpatlasov@virtuozzo.com>

The patch re-works fi->writepages replacing list with rb-tree.
This improves performance because kernel fuse iterates through
fi->writepages for each writeback page and typical number of
entries is about 800 (for 100MB of fuse writeback).

Before patch:

10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s

 2  1      0 57445400  40416 6323676    0    0    33 374743 8633 19210  1  8 88  3  0

  29.86%  [kernel]               [k] _raw_spin_lock
  26.62%  [fuse]                 [k] fuse_page_is_writeback

After patch:

10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s

 2  9      0 53676040  31744 10265984    0    0    64 854790 10956 48387  1  6 88  6  0

  23.55%  [kernel]             [k] copy_user_enhanced_fast_string
   9.87%  [kernel]             [k] __memcpy
   3.10%  [kernel]             [k] _raw_spin_lock

Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
---
 fs/fuse/file.c   | 60 ++++++++++++++++++++++++++++++++++++++----------
 fs/fuse/fuse_i.h |  4 ++--
 2 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 91c99724dee0..ef7feb897ada 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -338,17 +338,23 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
 static struct fuse_req *fuse_find_writeback(struct fuse_inode *fi,
 					    pgoff_t idx_from, pgoff_t idx_to)
 {
-	struct fuse_req *req;
+	struct rb_node *n;
+
+	n = fi->writepages.rb_node;
 
-	list_for_each_entry(req, &fi->writepages, writepages_entry) {
+	while (n) {
+		struct fuse_req *req;
 		pgoff_t curr_index;
 
+		req = rb_entry(n, struct fuse_req, writepages_entry);
 		WARN_ON(get_fuse_inode(req->inode) != fi);
 		curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
-		if (idx_from < curr_index + req->num_pages &&
-		    curr_index <= idx_to) {
+		if (idx_from >= curr_index + req->num_pages)
+			n = n->rb_right;
+		else if (idx_to < curr_index)
+			n = n->rb_left;
+		else
 			return req;
-		}
 	}
 	return NULL;
 }
@@ -1527,7 +1533,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
 	struct backing_dev_info *bdi = inode_to_bdi(inode);
 	int i;
 
-	list_del(&req->writepages_entry);
+	rb_erase(&req->writepages_entry, &fi->writepages);
 	for (i = 0; i < req->num_pages; i++) {
 		dec_wb_stat(&bdi->wb, WB_WRITEBACK);
 		dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
@@ -1605,6 +1611,36 @@ __acquires(fi->lock)
 	}
 }
 
+static int tree_insert(struct rb_root *root, struct fuse_req *ins_req)
+{
+	pgoff_t idx_from = ins_req->misc.write.in.offset >> PAGE_SHIFT;
+	pgoff_t idx_to   = idx_from + (ins_req->num_pages ?
+				ins_req->num_pages - 1 : 0);
+	struct rb_node **p = &root->rb_node;
+	struct rb_node  *parent = NULL;
+
+	while (*p) {
+		struct fuse_req *req;
+		pgoff_t curr_index;
+
+		parent = *p;
+		req = rb_entry(parent, struct fuse_req, writepages_entry);
+		BUG_ON(req->inode != ins_req->inode);
+		curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
+
+		if (idx_from >= curr_index + req->num_pages)
+			p = &(*p)->rb_right;
+		else if (idx_to < curr_index)
+			p = &(*p)->rb_left;
+		else
+			BUG();
+	}
+
+	rb_link_node(&ins_req->writepages_entry, parent, p);
+	rb_insert_color(&ins_req->writepages_entry, root);
+	return 0;
+}
+
 static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
 {
 	struct inode *inode = req->inode;
@@ -1619,7 +1655,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
 		req->misc.write.next = next->misc.write.next;
 		next->misc.write.next = NULL;
 		next->ff = fuse_file_get(req->ff);
-		list_add(&next->writepages_entry, &fi->writepages);
+		tree_insert(&fi->writepages, next);
 
 		/*
 		 * Skip fuse_flush_writepages() to make it easy to crop requests
@@ -1735,7 +1771,7 @@ static int fuse_writepage_locked(struct page *page)
 	inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
 
 	spin_lock(&fi->lock);
-	list_add(&req->writepages_entry, &fi->writepages);
+	tree_insert(&fi->writepages, req);
 	list_add_tail(&req->list, &fi->queued_writes);
 	fuse_flush_writepages(inode);
 	spin_unlock(&fi->lock);
@@ -1820,10 +1856,10 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
 	WARN_ON(new_req->num_pages != 0);
 
 	spin_lock(&fi->lock);
-	list_del(&new_req->writepages_entry);
+	rb_erase(&new_req->writepages_entry, &fi->writepages);
 	old_req = fuse_find_writeback(fi, page->index, page->index);
 	if (!old_req) {
-		list_add(&new_req->writepages_entry, &fi->writepages);
+		tree_insert(&fi->writepages, new_req);
 		spin_unlock(&fi->lock);
 		return false;
 	}
@@ -1940,7 +1976,7 @@ static int fuse_writepages_fill(struct page *page,
 		req->inode = inode;
 
 		spin_lock(&fi->lock);
-		list_add(&req->writepages_entry, &fi->writepages);
+		tree_insert(&fi->writepages, req);
 		spin_unlock(&fi->lock);
 
 		data->req = req;
@@ -3262,5 +3298,5 @@ void fuse_init_file_inode(struct inode *inode)
 	INIT_LIST_HEAD(&fi->queued_writes);
 	fi->writectr = 0;
 	init_waitqueue_head(&fi->page_waitq);
-	INIT_LIST_HEAD(&fi->writepages);
+	fi->writepages = RB_ROOT;
 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 24dbca777775..bbb3ca395892 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -114,7 +114,7 @@ struct fuse_inode {
 			wait_queue_head_t page_waitq;
 
 			/* List of writepage requestst (pending or sent) */
-			struct list_head writepages;
+			struct rb_root writepages;
 		};
 
 		/* readdir cache (directory only) */
@@ -437,7 +437,7 @@ struct fuse_req {
 	struct fuse_io_priv *io;
 
 	/** Link on fi->writepages */
-	struct list_head writepages_entry;
+	struct rb_node writepages_entry;
 
 	/** Request completion callback */
 	void (*end)(struct fuse_conn *, struct fuse_req *);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH RFC] fuse: optimize writepages search
  2019-09-19 14:11 [PATCH RFC] fuse: optimize writepages search Vasily Averin
@ 2020-06-11  8:53 ` Miklos Szeredi
  0 siblings, 0 replies; 2+ messages in thread
From: Miklos Szeredi @ 2020-06-11  8:53 UTC (permalink / raw)
  To: Vasily Averin; +Cc: linux-fsdevel

On Thu, Sep 19, 2019 at 4:11 PM Vasily Averin <vvs@virtuozzo.com> wrote:
>
> Dear Miklos,
>
> This patch was originally developed for RHEL7-based
> Virtuozzo kernels and widely used few years
> but seems it was not submitted upstream.
> I've rebased it to v5.3, compiled but was not tested,
> just to get feedback and then update it.

Applied and  pulled by Linus.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-06-11  8:53 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-19 14:11 [PATCH RFC] fuse: optimize writepages search Vasily Averin
2020-06-11  8:53 ` Miklos Szeredi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).