LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Miklos Szeredi <miklos@szeredi.hu>
To: akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: [patch 22/22] fuse: allow big write requests
Date: Wed, 28 Feb 2007 00:15:04 +0100	[thread overview]
Message-ID: <20070227231744.371044552@szeredi.hu> (raw)
In-Reply-To: <20070227231442.627972152@szeredi.hu>

[-- Attachment #1: fuse_bigwrite.patch --]
[-- Type: text/plain, Size: 8132 bytes --]

From: Miklos Szeredi <mszeredi@suse.cz>

Up to now, file writes were split into page size WRITE requests.  This
is inefficient, since there are two context switches per request.

So allow bigger writes, but still do it synchronously.  Asynchronous
writeback would be even better, but is very difficult, because of the
way file attributes are handled in fuse.

Pages to be written back are collected in a per-file request.  When no
more pages fit in the request or the size of the write would exceed
the maximum write count, send the request and start a new.  The write,
and hence the per-file request is protected by i_mutex.

Pages are not dirtied, but changed to write-back state immediately.
If page is already being written back, then wait for writeback to
finish.

Change s_blocksize and s_blocksize_bits to the number of bytes that
fit in a single write request, so that userspace tools utilize the big
write requests.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---

Index: linux/fs/fuse/file.c
===================================================================
--- linux.orig/fs/fuse/file.c	2007-02-27 20:45:07.000000000 +0100
+++ linux/fs/fuse/file.c	2007-02-27 21:58:06.000000000 +0100
@@ -56,6 +56,7 @@ struct fuse_file *fuse_file_alloc(void)
 		}
 		INIT_LIST_HEAD(&ff->write_entry);
 		atomic_set(&ff->count, 0);
+		ff->write_req = NULL;
 	}
 	return ff;
 }
@@ -91,6 +92,12 @@ void fuse_finish_open(struct inode *inod
 		invalidate_inode_pages2(inode->i_mapping);
 	ff->fh = outarg->fh;
 	file->private_data = fuse_file_get(ff);
+	/*
+	 * Writes are synchronous anyway, and the page syncing in
+	 * generic_file_aio_write_nolock() interferes with our special
+	 * batched write thingy
+	 */
+	file->f_flags &= ~O_SYNC;
 }
 
 int fuse_open_common(struct inode *inode, struct file *file, int isdir)
@@ -486,44 +493,135 @@ static int fuse_prepare_write(struct fil
 	return 0;
 }
 
+static int fuse_send_write_chunk(struct fuse_file *ff, struct inode *inode)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req = ff->write_req;
+	struct fuse_write_in *inarg = &req->misc.write.in;
+	struct fuse_write_out *outarg = &req->misc.write.out;
+	loff_t pos = inarg->offset + inarg->size;
+	int err;
+	int i;
+
+	req->in.args[1].size = inarg->size;
+	request_send(fc, req);
+	err = req->out.h.error;
+	if (!err && outarg->size != inarg->size)
+		err = -EIO;
+	for (i = 0; i < req->num_pages; i++) {
+		struct page *page = req->pages[i];
+		if (err)
+			ClearPageUptodate(page);
+		end_page_writeback(page);
+	}
+	fuse_put_request(fc, req);
+	ff->write_req = NULL;
+	if (!err) {
+		spin_lock(&fc->lock);
+		if (pos > inode->i_size)
+			i_size_write(inode, pos);
+		spin_unlock(&fc->lock);
+	}
+	fuse_invalidate_attr(inode);
+	return err;
+}
+
+static int fuse_alloc_write_chunk(struct fuse_file *ff, struct inode *inode,
+				  struct page *page, unsigned offset)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req = fuse_get_req(fc);
+	if (IS_ERR(req)) {
+		ClearPageUptodate(page);
+		return PTR_ERR(req);
+	}
+	fuse_write_fill(req, ff, inode, page_offset(page) + offset, 0, 0);
+	req->page_offset = offset;
+	ff->write_req = req;
+	return 0;
+}
+
+/*
+ * Fuse wants synchronous, interruptible writes, so this is slightly
+ * unconventional.
+ *
+ * Instead of just dirtying pages, they are accumulated in
+ * ff->write_req, in WriteBack state.  When the request is full or the
+ * number of bytes exceeds max_write, then the request is sent and a
+ * new one is started.
+ */
 static int fuse_commit_write(struct file *file, struct page *page,
 			     unsigned offset, unsigned to)
 {
 	int err;
-	size_t nres;
 	unsigned count = to - offset;
 	struct inode *inode = page->mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	loff_t pos = page_offset(page) + offset;
-	struct fuse_req *req;
+	struct fuse_file *ff = file->private_data;
+	struct fuse_req *req = ff->write_req;
 
 	if (is_bad_inode(inode))
 		return -EIO;
 
-	req = fuse_get_req(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	BUG_ON(!count);
+	if (req) {
+		struct fuse_write_in *inarg = &req->misc.write.in;
+
+		/* Writes are always contiguous */
+		BUG_ON(inarg->offset+inarg->size != page_offset(page)+offset);
+
+		if ((!offset && req->num_pages == FUSE_MAX_PAGES_PER_REQ) ||
+		    inarg->size + count > fc->max_write) {
+			err = fuse_send_write_chunk(ff, inode);
+			if (err)
+				return err;
+		}
+	}
+	if (!ff->write_req) {
+		err = fuse_alloc_write_chunk(ff, inode, page, offset);
+		if (err)
+			return err;
+	}
+	req = ff->write_req;
 
-	req->num_pages = 1;
-	req->pages[0] = page;
-	req->page_offset = offset;
-	nres = fuse_send_write(req, file, inode, pos, count);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
-	if (!err && nres != count)
-		err = -EIO;
-	if (!err) {
-		pos += count;
-		spin_lock(&fc->lock);
-		if (pos > inode->i_size)
-			i_size_write(inode, pos);
-		spin_unlock(&fc->lock);
+	/* Whole page has been overwritten, it's now clean and up-to-date */
+	if (to == PAGE_CACHE_SIZE &&
+	    (!req->page_offset || req->num_pages > 1)) {
+		clear_page_dirty_for_io(page);
+		SetPageUptodate(page);
+	}
 
-		if (offset == 0 && to == PAGE_CACHE_SIZE)
-			SetPageUptodate(page);
+	if (!req->num_pages || req->pages[req->num_pages - 1] != page) {
+		req->pages[req->num_pages] = page;
+		req->num_pages++;
+		wait_on_page_writeback(page);
+		set_page_writeback(page);
 	}
-	fuse_invalidate_attr(inode);
-	return err;
+	req->misc.write.in.size += count;
+	return 0;
+}
+
+static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+				   unsigned long nr_segs, loff_t pos)
+{
+	ssize_t ret;
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	struct fuse_file *ff = file->private_data;
+	BUG_ON(iocb->ki_pos != pos);
+
+	mutex_lock(&inode->i_mutex);
+	BUG_ON(ff->write_req);
+	ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos);
+	if (ff->write_req) {
+		ssize_t written = ff->write_req->misc.write.in.offset - pos;
+		int err = fuse_send_write_chunk(ff, inode);
+		if (err)
+			ret = written ? written : err;
+	}
+	mutex_unlock(&inode->i_mutex);
+
+	return ret;
 }
 
 static void fuse_release_user_pages(struct fuse_req *req, int write)
@@ -1067,7 +1165,7 @@ static const struct file_operations fuse
 	.read		= do_sync_read,
 	.aio_read	= generic_file_aio_read,
 	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.aio_write	= fuse_file_aio_write,
 	.mmap		= fuse_file_mmap,
 	.open		= fuse_open,
 	.flush		= fuse_flush,
Index: linux/fs/fuse/fuse_i.h
===================================================================
--- linux.orig/fs/fuse/fuse_i.h	2007-02-27 20:45:07.000000000 +0100
+++ linux/fs/fuse/fuse_i.h	2007-02-27 20:46:51.000000000 +0100
@@ -85,6 +85,9 @@ struct fuse_file {
 
 	/** Entry on inode's write_files list */
 	struct list_head write_entry;
+
+	/** Request used in synchronous writes */
+	struct fuse_req *write_req;
 };
 
 /** One input argument of a request */
Index: linux/fs/fuse/inode.c
===================================================================
--- linux.orig/fs/fuse/inode.c	2007-02-27 20:45:07.000000000 +0100
+++ linux/fs/fuse/inode.c	2007-02-27 21:57:19.000000000 +0100
@@ -108,6 +108,9 @@ static int fuse_remount_fs(struct super_
 	if (*flags & MS_MANDLOCK)
 		return -EINVAL;
 
+	/* Writes are synchronous anyway, also see O_SYNC */
+	*flags &= ~MS_SYNCHRONOUS;
+
 	return 0;
 }
 
@@ -542,6 +545,9 @@ static int fuse_fill_super(struct super_
 	if (sb->s_flags & MS_MANDLOCK)
 		return -EINVAL;
 
+	/* Writes are synchronous anyway, also see O_SYNC */
+	sb->s_flags &= ~MS_SYNCHRONOUS;
+
 	if (!parse_fuse_opt((char *) data, &d, is_bdev))
 		return -EINVAL;
 
@@ -551,8 +557,8 @@ static int fuse_fill_super(struct super_
 			return -EINVAL;
 #endif
 	} else {
-		sb->s_blocksize = PAGE_CACHE_SIZE;
-		sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+		sb->s_blocksize = PAGE_CACHE_SIZE * FUSE_MAX_PAGES_PER_REQ;
+		sb->s_blocksize_bits = ilog2(sb->s_blocksize);
 	}
 	sb->s_magic = FUSE_SUPER_MAGIC;
 	sb->s_op = &fuse_super_operations;

--

      parent reply	other threads:[~2007-02-27 23:18 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-02-27 23:14 [patch 00/22] misc VFS/VM patches and fuse writable shared mapping support Miklos Szeredi
2007-02-27 23:14 ` [patch 01/22] update ctime and mtime for mmaped write Miklos Szeredi
2007-02-28 14:16   ` Peter Staubach
2007-02-28 17:06     ` Miklos Szeredi
2007-02-28 17:21       ` Peter Staubach
2007-02-28 17:51         ` Miklos Szeredi
2007-02-28 20:01           ` Peter Staubach
2007-02-28 20:35             ` Miklos Szeredi
2007-02-28 20:58               ` Miklos Szeredi
2007-02-28 21:09                 ` Peter Staubach
2007-03-01  7:25                   ` Miklos Szeredi
2007-02-27 23:14 ` [patch 02/22] fix quadratic behavior of shrink_dcache_parent() Miklos Szeredi
2007-02-27 23:14 ` [patch 03/22] fix deadlock in balance_dirty_pages Miklos Szeredi
2007-02-27 23:14 ` [patch 04/22] fix deadlock in throttle_vm_writeout Miklos Szeredi
2007-02-27 23:14 ` [patch 05/22] balance dirty pages from loop device Miklos Szeredi
2007-02-27 23:14 ` [patch 06/22] consolidate generic_writepages and mpage_writepages Miklos Szeredi
2007-02-27 23:14 ` [patch 07/22] add filesystem subtype support Miklos Szeredi
2007-02-27 23:14 ` [patch 08/22] fuse: update backing_dev_info congestion state Miklos Szeredi
2007-02-27 23:14 ` [patch 09/22] fuse: fix reserved request wake up Miklos Szeredi
2007-02-27 23:14 ` [patch 10/22] fuse: add reference counting to fuse_file Miklos Szeredi
2007-02-27 23:14 ` [patch 11/22] fuse: add truncation semaphore Miklos Szeredi
2007-02-27 23:14 ` [patch 12/22] fuse: fix page invalidation Miklos Szeredi
2007-02-27 23:14 ` [patch 13/22] fuse: add list of writable files to fuse_inode Miklos Szeredi
2007-02-27 23:14 ` [patch 14/22] fuse: add helper for asynchronous writes Miklos Szeredi
2007-02-27 23:14 ` [patch 15/22] add non-owner variant of down_read_trylock() Miklos Szeredi
2007-02-27 23:14 ` [patch 16/22] fuse: add fuse_writepage() function Miklos Szeredi
2007-02-27 23:14 ` [patch 17/22] fuse: writable shared mmap support Miklos Szeredi
2007-02-27 23:15 ` [patch 18/22] fuse: add fuse_writepages() function Miklos Szeredi
2007-02-27 23:15 ` [patch 19/22] export sync_sb() to modules Miklos Szeredi
2007-02-27 23:15 ` [patch 20/22] fuse: make dirty stats available Miklos Szeredi
2007-02-27 23:15 ` [patch 21/22] fuse: limit dirty pages Miklos Szeredi
2007-02-27 23:15 ` Miklos Szeredi [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070227231744.371044552@szeredi.hu \
    --to=miklos@szeredi.hu \
    --cc=akpm@linux-foundation.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --subject='Re: [patch 22/22] fuse: allow big write requests' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).