Linux-Fsdevel Archive on lore.kernel.org
help / color / mirror / Atom feed
From: John Hubbard <jhubbard@nvidia.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
	Christoph Hellwig <hch@infradead.org>,
	Ilya Dryomov <idryomov@gmail.com>, Jens Axboe <axboe@kernel.dk>,
	Jeff Layton <jlayton@kernel.org>, <linux-xfs@vger.kernel.org>,
	<linux-fsdevel@vger.kernel.org>, <linux-block@vger.kernel.org>,
	<ceph-devel@vger.kernel.org>, <linux-mm@kvack.org>,
	LKML <linux-kernel@vger.kernel.org>,
	John Hubbard <jhubbard@nvidia.com>
Subject: [PATCH 4/5] bio: introduce BIO_FOLL_PIN flag
Date: Fri, 21 Aug 2020 21:20:58 -0700	[thread overview]
Message-ID: <20200822042059.1805541-5-jhubbard@nvidia.com> (raw)
In-Reply-To: <20200822042059.1805541-1-jhubbard@nvidia.com>

Add a new BIO_FOLL_PIN flag to struct bio, whose "short int" flags field
was full, thuse triggering an expansion of the field from 16, to 32
bits. This allows for a nice assertion in bio_release_pages(), that the
bio page release mechanism matches the page acquisition mechanism.

Set BIO_FOLL_PIN whenever pin_user_pages_fast() is used, and check for
BIO_FOLL_PIN before using unpin_user_page().

Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 block/bio.c               | 9 +++++++--
 block/blk-map.c           | 3 ++-
 fs/direct-io.c            | 4 ++--
 include/linux/blk_types.h | 5 +++--
 include/linux/uio.h       | 5 +++--
 lib/iov_iter.c            | 9 +++++++--
 6 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 00d548e3c2b8..dd8e85618d5e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -952,6 +952,9 @@ void bio_release_pages(struct bio *bio, bool mark_dirty)
 	if (bio_flagged(bio, BIO_NO_PAGE_REF))
 		return;
 
+	if (WARN_ON_ONCE(!bio_flagged(bio, BIO_FOLL_PIN)))
+		return;
+
 	bio_for_each_segment_all(bvec, bio, iter_all) {
 		if (mark_dirty && !PageCompound(bvec->bv_page))
 			set_page_dirty_lock(bvec->bv_page);
@@ -1009,7 +1012,8 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
 	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
 
-	size = iov_iter_pin_user_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+	size = iov_iter_pin_user_pages(bio, iter, pages, LONG_MAX, nr_pages,
+				       &offset);
 	if (unlikely(size <= 0))
 		return size ? size : -EFAULT;
 
@@ -1056,7 +1060,8 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
 	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
 	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
 
-	size = iov_iter_pin_user_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+	size = iov_iter_pin_user_pages(bio, iter, pages, LONG_MAX, nr_pages,
+				       &offset);
 	if (unlikely(size <= 0))
 		return size ? size : -EFAULT;
 
diff --git a/block/blk-map.c b/block/blk-map.c
index 7a095b4947ea..ddfff2f0b1cb 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -275,7 +275,8 @@ static struct bio *bio_map_user_iov(struct request_queue *q,
 		size_t offs, added = 0;
 		int npages;
 
-		bytes = iov_iter_pin_user_pages_alloc(iter, &pages, LONG_MAX, &offs);
+		bytes = iov_iter_pin_user_pages_alloc(bio, iter, &pages,
+						      LONG_MAX, &offs);
 		if (unlikely(bytes <= 0)) {
 			ret = bytes ? bytes : -EFAULT;
 			goto out_unmap;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b01c8d003bd3..4d0787ba85eb 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -170,8 +170,8 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 {
 	ssize_t ret;
 
-	ret = iov_iter_pin_user_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
-				&sdio->from);
+	ret = iov_iter_pin_user_pages(sdio->bio, sdio->iter, dio->pages,
+				      LONG_MAX, DIO_PAGES, &sdio->from);
 
 	if (ret < 0 && sdio->blocks_available && (dio->op == REQ_OP_WRITE)) {
 		struct page *page = ZERO_PAGE(0);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 4ecf4fed171f..d0e0da762af3 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -188,7 +188,7 @@ struct bio {
 						 * top bits REQ_OP. Use
 						 * accessors.
 						 */
-	unsigned short		bi_flags;	/* status, etc and bvec pool number */
+	unsigned int		bi_flags;	/* status, etc and bvec pool number */
 	unsigned short		bi_ioprio;
 	unsigned short		bi_write_hint;
 	blk_status_t		bi_status;
@@ -267,6 +267,7 @@ enum {
 				 * of this bio. */
 	BIO_CGROUP_ACCT,	/* has been accounted to a cgroup */
 	BIO_TRACKED,		/* set if bio goes through the rq_qos path */
+	BIO_FOLL_PIN,		/* must release pages via unpin_user_pages() */
 	BIO_FLAG_LAST
 };
 
@@ -285,7 +286,7 @@ enum {
  * freed.
  */
 #define BVEC_POOL_BITS		(3)
-#define BVEC_POOL_OFFSET	(16 - BVEC_POOL_BITS)
+#define BVEC_POOL_OFFSET	(32 - BVEC_POOL_BITS)
 #define BVEC_POOL_IDX(bio)	((bio)->bi_flags >> BVEC_POOL_OFFSET)
 #if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1)
 # error "BVEC_POOL_BITS is too small"
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 29b0504a27cc..62bcf5e45f2b 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -209,6 +209,7 @@ size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
 		return _copy_to_iter_mcsafe(addr, bytes, i);
 }
 
+struct bio;
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
@@ -229,9 +230,9 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages);
 
 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
 
-ssize_t iov_iter_pin_user_pages(struct iov_iter *i, struct page **pages,
+ssize_t iov_iter_pin_user_pages(struct bio *bio, struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned int maxpages, size_t *start);
-ssize_t iov_iter_pin_user_pages_alloc(struct iov_iter *i, struct page ***pages,
+ssize_t iov_iter_pin_user_pages_alloc(struct bio *bio, struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
 
 static inline size_t iov_iter_count(const struct iov_iter *i)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index d818b16d136b..a4bc1b3a3fda 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -3,6 +3,7 @@
 #include <linux/export.h>
 #include <linux/bvec.h>
 #include <linux/uio.h>
+#include <linux/bio.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -1309,7 +1310,7 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
 	return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
 }
 
-ssize_t iov_iter_pin_user_pages(struct iov_iter *i,
+ssize_t iov_iter_pin_user_pages(struct bio *bio, struct iov_iter *i,
 		   struct page **pages, size_t maxsize, unsigned int maxpages,
 		   size_t *start)
 {
@@ -1335,6 +1336,8 @@ ssize_t iov_iter_pin_user_pages(struct iov_iter *i,
 		addr &= ~(PAGE_SIZE - 1);
 		n = DIV_ROUND_UP(len, PAGE_SIZE);
 
+		bio_set_flag(bio, BIO_FOLL_PIN);
+
 		res = pin_user_pages_fast(addr, n,
 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
 				pages);
@@ -1426,7 +1429,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
 	return n;
 }
 
-ssize_t iov_iter_pin_user_pages_alloc(struct iov_iter *i,
+ssize_t iov_iter_pin_user_pages_alloc(struct bio *bio, struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
 		   size_t *start)
 {
@@ -1454,6 +1457,8 @@ ssize_t iov_iter_pin_user_pages_alloc(struct iov_iter *i,
 		if (!p)
 			return -ENOMEM;
 
+		bio_set_flag(bio, BIO_FOLL_PIN);
+
 		res = pin_user_pages_fast(addr, n,
 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
 		if (unlikely(res < 0)) {
-- 
2.28.0


  parent reply	other threads:[~2020-08-22  4:21 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-22  4:20 [PATCH 0/5] bio: Direct IO: convert to pin_user_pages_fast() John Hubbard
2020-08-22  4:20 ` [PATCH 1/5] iov_iter: introduce iov_iter_pin_user_pages*() routines John Hubbard
2020-08-22  4:20 ` [PATCH 2/5] mm/gup: introduce pin_user_page() John Hubbard
2020-08-22  4:20 ` [PATCH 3/5] bio: convert get_user_pages_fast() --> pin_user_pages_fast() John Hubbard
2020-08-22  4:20 ` John Hubbard [this message]
2020-08-23  6:25   ` [PATCH 4/5] bio: introduce BIO_FOLL_PIN flag Christoph Hellwig
2020-08-23  6:57     ` John Hubbard
2020-08-24  7:36       ` John Hubbard
2020-08-24  9:20     ` Jens Axboe
2020-08-24 14:42       ` Christoph Hellwig
2020-08-22  4:20 ` [PATCH 5/5] fs/ceph: use pipe_get_pages_alloc() for pipe John Hubbard
2020-08-24 10:53   ` Jeff Layton
2020-08-24 17:54     ` John Hubbard
2020-08-24 18:47       ` Jeff Layton
2020-08-24 18:54         ` Matthew Wilcox
2020-08-24 19:02           ` John Hubbard
2020-08-25  1:20           ` [PATCH v2] " John Hubbard
2020-08-25 16:22             ` Jeff Layton
2020-08-25  1:54 ` [PATCH 0/5] bio: Direct IO: convert to pin_user_pages_fast() Al Viro
2020-08-25  2:07   ` Al Viro
2020-08-25  2:13     ` John Hubbard
2020-08-25  2:07   ` John Hubbard
2020-08-25  2:22     ` Al Viro
2020-08-25  2:27       ` John Hubbard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200822042059.1805541-5-jhubbard@nvidia.com \
    --to=jhubbard@nvidia.com \
    --cc=akpm@linux-foundation.org \
    --cc=axboe@kernel.dk \
    --cc=ceph-devel@vger.kernel.org \
    --cc=hch@infradead.org \
    --cc=idryomov@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    --subject='Re: [PATCH 4/5] bio: introduce BIO_FOLL_PIN flag' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).