LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Roman Gushchin <guro@fb.com>
To: <linux-fsdevel@vger.kernel.org>, <linux-ext4@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
	Andreas Dilger <adilger.kernel@dilger.ca>,
	Roman Gushchin <guro@fb.com>,
	Andrew Perepechko <andrew.perepechko@seagate.com>,
	Theodore Ts'o <tytso@mit.edu>, Gioh Kim <gioh.kim@lge.com>,
	Jan Kara <jack@suse.cz>
Subject: [PATCH v2] ext4: use non-movable memory for superblock readahead
Date: Fri, 28 Feb 2020 16:14:11 -0800	[thread overview]
Message-ID: <20200229001411.128010-1-guro@fb.com> (raw)

Since commit a8ac900b8163 ("ext4: use non-movable memory for the
superblock") buffers for ext4 superblock were allocated using
the sb_bread_unmovable() helper which allocated buffer heads
out of non-movable memory blocks. It was necessarily to not block
page migrations and do not cause cma allocation failures.

However commit 85c8f176a611 ("ext4: preload block group descriptors")
broke this by introducing pre-reading of the ext4 superblock.
The problem is that __breadahead() is using __getblk() underneath,
which allocates buffer heads out of movable memory.

It resulted in page migration failures I've seen on a machine
with an ext4 partition and a preallocated cma area.

Fix this by introducing sb_breadahead_unmovable() and
__breadahead_gfp() helpers which use non-movable memory for buffer
head allocations and use them for the ext4 superblock readahead.

v2: found a similar issue in __ext4_get_inode_loc()

Fixes: 85c8f176a611 ("ext4: preload block group descriptors")
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Andrew Perepechko <andrew.perepechko@seagate.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Gioh Kim <gioh.kim@lge.com>
Cc: Jan Kara <jack@suse.cz>
---
 fs/buffer.c                 | 11 +++++++++++
 fs/ext4/inode.c             |  2 +-
 fs/ext4/super.c             |  2 +-
 include/linux/buffer_head.h |  8 ++++++++
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 4299e100a05b..25462edd920e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1414,6 +1414,17 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
 }
 EXPORT_SYMBOL(__breadahead);
 
+void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
+		      gfp_t gfp)
+{
+	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
+	if (likely(bh)) {
+		ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
+		brelse(bh);
+	}
+}
+EXPORT_SYMBOL(__breadahead_gfp);
+
 /**
  *  __bread_gfp() - reads a specified block and returns the bh
  *  @bdev: the block_device to read from
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fa0ff78dc033..b131fedc6b77 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4348,7 +4348,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
 			if (end > table)
 				end = table;
 			while (b <= end)
-				sb_breadahead(sb, b++);
+				sb_breadahead_unmovable(sb, b++);
 		}
 
 		/*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ff1b764b0c0e..fb2338a5220e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4331,7 +4331,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	/* Pre-read the descriptors into the buffer cache */
 	for (i = 0; i < db_count; i++) {
 		block = descriptor_loc(sb, logical_sb_block, i);
-		sb_breadahead(sb, block);
+		sb_breadahead_unmovable(sb, block);
 	}
 
 	for (i = 0; i < db_count; i++) {
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 7b73ef7f902d..b56cc825f64d 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -189,6 +189,8 @@ struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block,
 void __brelse(struct buffer_head *);
 void __bforget(struct buffer_head *);
 void __breadahead(struct block_device *, sector_t block, unsigned int size);
+void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size,
+		  gfp_t gfp);
 struct buffer_head *__bread_gfp(struct block_device *,
 				sector_t block, unsigned size, gfp_t gfp);
 void invalidate_bh_lrus(void);
@@ -319,6 +321,12 @@ sb_breadahead(struct super_block *sb, sector_t block)
 	__breadahead(sb->s_bdev, block, sb->s_blocksize);
 }
 
+static inline void
+sb_breadahead_unmovable(struct super_block *sb, sector_t block)
+{
+	__breadahead_gfp(sb->s_bdev, block, sb->s_blocksize, 0);
+}
+
 static inline struct buffer_head *
 sb_getblk(struct super_block *sb, sector_t block)
 {
-- 
2.24.1


             reply	other threads:[~2020-02-29  0:14 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-29  0:14 Roman Gushchin [this message]
2020-02-29  7:49 ` Andreas Dilger
2020-03-02 16:37   ` Roman Gushchin
2020-03-03 22:17   ` Roman Gushchin
2020-04-06 17:20   ` Roman Gushchin
2020-04-10  3:23 ` Theodore Y. Ts'o
2020-04-10 16:12   ` Roman Gushchin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200229001411.128010-1-guro@fb.com \
    --to=guro@fb.com \
    --cc=adilger.kernel@dilger.ca \
    --cc=andrew.perepechko@seagate.com \
    --cc=gioh.kim@lge.com \
    --cc=jack@suse.cz \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    --subject='Re: [PATCH v2] ext4: use non-movable memory for superblock readahead' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).