Linux-Fsdevel Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH v3 0/3] Unaligned DIO read error path fix and clean ups
@ 2020-09-05  5:20 Gabriel Krisman Bertazi
  2020-09-05  5:20 ` [PATCH v3 1/3] direct-io: clean up error paths of do_blockdev_direct_IO Gabriel Krisman Bertazi
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Gabriel Krisman Bertazi @ 2020-09-05  5:20 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, jack, khazhy, Gabriel Krisman Bertazi, kernel

This is v3 of Unaligned DIO read error path fix and clean ups.  This
version applies some small fixes to patch 1 suggested by Jan Kara (thank
you!)  and it was tested with xfstests aio group over f2fs and fio
workloads.

Gabriel Krisman Bertazi (3):
  direct-io: clean up error paths of do_blockdev_direct_IO
  direct-io: don't force writeback for reads beyond EOF
  direct-io: defer alignment check until after the EOF check

 fs/direct-io.c | 69 ++++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 39 deletions(-)

-- 
2.28.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v3 1/3] direct-io: clean up error paths of do_blockdev_direct_IO
  2020-09-05  5:20 [PATCH v3 0/3] Unaligned DIO read error path fix and clean ups Gabriel Krisman Bertazi
@ 2020-09-05  5:20 ` Gabriel Krisman Bertazi
  2020-09-07  9:32   ` Jan Kara
  2020-09-05  5:20 ` [PATCH v3 2/3] direct-io: don't force writeback for reads beyond EOF Gabriel Krisman Bertazi
  2020-09-05  5:20 ` [PATCH v3 3/3] direct-io: defer alignment check until after the EOF check Gabriel Krisman Bertazi
  2 siblings, 1 reply; 6+ messages in thread
From: Gabriel Krisman Bertazi @ 2020-09-05  5:20 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, jack, khazhy, Gabriel Krisman Bertazi, kernel

In preparation to resort DIO checks, reduce code duplication of error
handling in do_blockdev_direct_IO.

Changes since V1:
  - Remove fail_dio_unlocked (Me)
  - Ensure fail_dio won't call inode_unlock() for writes (Jan Kara)

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 fs/direct-io.c | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 183299892465..6c11db1cec27 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1170,7 +1170,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 			blkbits = blksize_bits(bdev_logical_block_size(bdev));
 		blocksize_mask = (1 << blkbits) - 1;
 		if (align & blocksize_mask)
-			goto out;
+			return -EINVAL;
 	}
 
 	/* watch out for a 0 len io from a tricksy fs */
@@ -1178,9 +1178,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		return 0;
 
 	dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
-	retval = -ENOMEM;
 	if (!dio)
-		goto out;
+		return -ENOMEM;
 	/*
 	 * Believe it or not, zeroing out the page array caused a .5%
 	 * performance regression in a database benchmark.  So, we take
@@ -1199,22 +1198,16 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 
 			retval = filemap_write_and_wait_range(mapping, offset,
 							      end - 1);
-			if (retval) {
-				inode_unlock(inode);
-				kmem_cache_free(dio_cache, dio);
-				goto out;
-			}
+			if (retval)
+				goto fail_dio;
 		}
 	}
 
 	/* Once we sampled i_size check for reads beyond EOF */
 	dio->i_size = i_size_read(inode);
 	if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
-		if (dio->flags & DIO_LOCKING)
-			inode_unlock(inode);
-		kmem_cache_free(dio_cache, dio);
 		retval = 0;
-		goto out;
+		goto fail_dio;
 	}
 
 	/*
@@ -1258,14 +1251,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 			 */
 			retval = sb_init_dio_done_wq(dio->inode->i_sb);
 		}
-		if (retval) {
-			/*
-			 * We grab i_mutex only for reads so we don't have
-			 * to release it here
-			 */
-			kmem_cache_free(dio_cache, dio);
-			goto out;
-		}
+		if (retval)
+			goto fail_dio;
 	}
 
 	/*
@@ -1368,7 +1355,13 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	} else
 		BUG_ON(retval != -EIOCBQUEUED);
 
-out:
+	return retval;
+
+fail_dio:
+	if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ)
+		inode_unlock(inode);
+
+	kmem_cache_free(dio_cache, dio);
 	return retval;
 }
 
-- 
2.28.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v3 2/3] direct-io: don't force writeback for reads beyond EOF
  2020-09-05  5:20 [PATCH v3 0/3] Unaligned DIO read error path fix and clean ups Gabriel Krisman Bertazi
  2020-09-05  5:20 ` [PATCH v3 1/3] direct-io: clean up error paths of do_blockdev_direct_IO Gabriel Krisman Bertazi
@ 2020-09-05  5:20 ` Gabriel Krisman Bertazi
  2020-09-05  5:20 ` [PATCH v3 3/3] direct-io: defer alignment check until after the EOF check Gabriel Krisman Bertazi
  2 siblings, 0 replies; 6+ messages in thread
From: Gabriel Krisman Bertazi @ 2020-09-05  5:20 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, jack, khazhy, Gabriel Krisman Bertazi, kernel

If a DIO read starts past EOF, the kernel won't attempt it, so we don't
need to flush dirty pages before failing the syscall.

Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 fs/direct-io.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 6c11db1cec27..c17efe58f1c9 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1188,19 +1188,9 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	memset(dio, 0, offsetof(struct dio, pages));
 
 	dio->flags = flags;
-	if (dio->flags & DIO_LOCKING) {
-		if (iov_iter_rw(iter) == READ) {
-			struct address_space *mapping =
-					iocb->ki_filp->f_mapping;
-
-			/* will be released by direct_io_worker */
-			inode_lock(inode);
-
-			retval = filemap_write_and_wait_range(mapping, offset,
-							      end - 1);
-			if (retval)
-				goto fail_dio;
-		}
+	if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) {
+		/* will be released by direct_io_worker */
+		inode_lock(inode);
 	}
 
 	/* Once we sampled i_size check for reads beyond EOF */
@@ -1210,6 +1200,14 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		goto fail_dio;
 	}
 
+	if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) {
+		struct address_space *mapping = iocb->ki_filp->f_mapping;
+
+		retval = filemap_write_and_wait_range(mapping, offset, end - 1);
+		if (retval)
+			goto fail_dio;
+	}
+
 	/*
 	 * For file extending writes updating i_size before data writeouts
 	 * complete can expose uninitialized blocks in dumb filesystems.
-- 
2.28.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v3 3/3] direct-io: defer alignment check until after the EOF check
  2020-09-05  5:20 [PATCH v3 0/3] Unaligned DIO read error path fix and clean ups Gabriel Krisman Bertazi
  2020-09-05  5:20 ` [PATCH v3 1/3] direct-io: clean up error paths of do_blockdev_direct_IO Gabriel Krisman Bertazi
  2020-09-05  5:20 ` [PATCH v3 2/3] direct-io: don't force writeback for reads beyond EOF Gabriel Krisman Bertazi
@ 2020-09-05  5:20 ` Gabriel Krisman Bertazi
  2020-09-07  9:36   ` Jan Kara
  2 siblings, 1 reply; 6+ messages in thread
From: Gabriel Krisman Bertazi @ 2020-09-05  5:20 UTC (permalink / raw)
  To: viro
  Cc: linux-fsdevel, jack, khazhy, Gabriel Krisman Bertazi, kernel, Jamie Liu

Prior to commit 9fe55eea7e4b ("Fix race when checking i_size on direct
i/o read"), an unaligned direct read past end of file would trigger EOF,
since generic_file_aio_read detected this read-at-EOF condition and
skipped the direct IO read entirely, returning 0. After that change, the
read now reaches dio_generic, which detects the misalignment and returns
EINVAL.

This consolidates the generic direct-io to follow the same behavior of
filesystems.  Apparently, this fix will only affect ocfs2 since other
filesystems do this verification before calling do_blockdev_direct_IO,
with the exception of f2fs, which has the same bug, but is fixed in the
next patch.

it can be verified by a read loop on a file that does a partial read
before EOF (On file that doesn't end at an aligned address).  The
following code fails on an unaligned file on filesystems without
prior validation without this patch, but not on btrfs, ext4, and xfs.

  while (done < total) {
    ssize_t delta = pread(fd, buf + done, total - done, off + done);
    if (!delta)
      break;
    ...
  }

Fix this regression by moving the misalignment check to after the EOF
check added by commit 74cedf9b6c60 ("direct-io: Fix negative return from
dio read beyond eof").

Based on a patch by Jamie Liu.

Reported-by: Jamie Liu <jamieliu@google.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 fs/direct-io.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index c17efe58f1c9..82838cca934b 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1165,14 +1165,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	 * the early prefetch in the caller enough time.
 	 */
 
-	if (align & blocksize_mask) {
-		if (bdev)
-			blkbits = blksize_bits(bdev_logical_block_size(bdev));
-		blocksize_mask = (1 << blkbits) - 1;
-		if (align & blocksize_mask)
-			return -EINVAL;
-	}
-
 	/* watch out for a 0 len io from a tricksy fs */
 	if (iov_iter_rw(iter) == READ && !count)
 		return 0;
@@ -1200,6 +1192,14 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		goto fail_dio;
 	}
 
+	if (align & blocksize_mask) {
+		if (bdev)
+			blkbits = blksize_bits(bdev_logical_block_size(bdev));
+		blocksize_mask = (1 << blkbits) - 1;
+		if (align & blocksize_mask)
+			goto fail_dio;
+	}
+
 	if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) {
 		struct address_space *mapping = iocb->ki_filp->f_mapping;
 
-- 
2.28.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 1/3] direct-io: clean up error paths of do_blockdev_direct_IO
  2020-09-05  5:20 ` [PATCH v3 1/3] direct-io: clean up error paths of do_blockdev_direct_IO Gabriel Krisman Bertazi
@ 2020-09-07  9:32   ` Jan Kara
  0 siblings, 0 replies; 6+ messages in thread
From: Jan Kara @ 2020-09-07  9:32 UTC (permalink / raw)
  To: Gabriel Krisman Bertazi; +Cc: viro, linux-fsdevel, jack, khazhy, kernel

On Sat 05-09-20 01:20:21, Gabriel Krisman Bertazi wrote:
> In preparation to resort DIO checks, reduce code duplication of error
> handling in do_blockdev_direct_IO.
> 
> Changes since V1:
>   - Remove fail_dio_unlocked (Me)
>   - Ensure fail_dio won't call inode_unlock() for writes (Jan Kara)

Please add the patch changelogs below the diffstat. That way they won't be
in the final changelog (which is the right thing to do because they are
mostly irrelevant for the final patch).

Otherwise the patch looks good to me so feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza 

> 
> Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
> ---
>  fs/direct-io.c | 35 ++++++++++++++---------------------
>  1 file changed, 14 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/direct-io.c b/fs/direct-io.c
> index 183299892465..6c11db1cec27 100644
> --- a/fs/direct-io.c
> +++ b/fs/direct-io.c
> @@ -1170,7 +1170,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
>  			blkbits = blksize_bits(bdev_logical_block_size(bdev));
>  		blocksize_mask = (1 << blkbits) - 1;
>  		if (align & blocksize_mask)
> -			goto out;
> +			return -EINVAL;
>  	}
>  
>  	/* watch out for a 0 len io from a tricksy fs */
> @@ -1178,9 +1178,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
>  		return 0;
>  
>  	dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
> -	retval = -ENOMEM;
>  	if (!dio)
> -		goto out;
> +		return -ENOMEM;
>  	/*
>  	 * Believe it or not, zeroing out the page array caused a .5%
>  	 * performance regression in a database benchmark.  So, we take
> @@ -1199,22 +1198,16 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
>  
>  			retval = filemap_write_and_wait_range(mapping, offset,
>  							      end - 1);
> -			if (retval) {
> -				inode_unlock(inode);
> -				kmem_cache_free(dio_cache, dio);
> -				goto out;
> -			}
> +			if (retval)
> +				goto fail_dio;
>  		}
>  	}
>  
>  	/* Once we sampled i_size check for reads beyond EOF */
>  	dio->i_size = i_size_read(inode);
>  	if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
> -		if (dio->flags & DIO_LOCKING)
> -			inode_unlock(inode);
> -		kmem_cache_free(dio_cache, dio);
>  		retval = 0;
> -		goto out;
> +		goto fail_dio;
>  	}
>  
>  	/*
> @@ -1258,14 +1251,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
>  			 */
>  			retval = sb_init_dio_done_wq(dio->inode->i_sb);
>  		}
> -		if (retval) {
> -			/*
> -			 * We grab i_mutex only for reads so we don't have
> -			 * to release it here
> -			 */
> -			kmem_cache_free(dio_cache, dio);
> -			goto out;
> -		}
> +		if (retval)
> +			goto fail_dio;
>  	}
>  
>  	/*
> @@ -1368,7 +1355,13 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
>  	} else
>  		BUG_ON(retval != -EIOCBQUEUED);
>  
> -out:
> +	return retval;
> +
> +fail_dio:
> +	if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ)
> +		inode_unlock(inode);
> +
> +	kmem_cache_free(dio_cache, dio);
>  	return retval;
>  }
>  
> -- 
> 2.28.0
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 3/3] direct-io: defer alignment check until after the EOF check
  2020-09-05  5:20 ` [PATCH v3 3/3] direct-io: defer alignment check until after the EOF check Gabriel Krisman Bertazi
@ 2020-09-07  9:36   ` Jan Kara
  0 siblings, 0 replies; 6+ messages in thread
From: Jan Kara @ 2020-09-07  9:36 UTC (permalink / raw)
  To: Gabriel Krisman Bertazi
  Cc: viro, linux-fsdevel, jack, khazhy, kernel, Jamie Liu

On Sat 05-09-20 01:20:23, Gabriel Krisman Bertazi wrote:
> Prior to commit 9fe55eea7e4b ("Fix race when checking i_size on direct
> i/o read"), an unaligned direct read past end of file would trigger EOF,
> since generic_file_aio_read detected this read-at-EOF condition and
> skipped the direct IO read entirely, returning 0. After that change, the
> read now reaches dio_generic, which detects the misalignment and returns
> EINVAL.
> 
> This consolidates the generic direct-io to follow the same behavior of
> filesystems.  Apparently, this fix will only affect ocfs2 since other
> filesystems do this verification before calling do_blockdev_direct_IO,
> with the exception of f2fs, which has the same bug, but is fixed in the
> next patch.
> 
> it can be verified by a read loop on a file that does a partial read
> before EOF (On file that doesn't end at an aligned address).  The
> following code fails on an unaligned file on filesystems without
> prior validation without this patch, but not on btrfs, ext4, and xfs.
> 
>   while (done < total) {
>     ssize_t delta = pread(fd, buf + done, total - done, off + done);
>     if (!delta)
>       break;
>     ...
>   }
> 
> Fix this regression by moving the misalignment check to after the EOF
> check added by commit 74cedf9b6c60 ("direct-io: Fix negative return from
> dio read beyond eof").
> 
> Based on a patch by Jamie Liu.
> 
> Reported-by: Jamie Liu <jamieliu@google.com>
> Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>

The patch looks good to me. You can add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/direct-io.c | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/direct-io.c b/fs/direct-io.c
> index c17efe58f1c9..82838cca934b 100644
> --- a/fs/direct-io.c
> +++ b/fs/direct-io.c
> @@ -1165,14 +1165,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
>  	 * the early prefetch in the caller enough time.
>  	 */
>  
> -	if (align & blocksize_mask) {
> -		if (bdev)
> -			blkbits = blksize_bits(bdev_logical_block_size(bdev));
> -		blocksize_mask = (1 << blkbits) - 1;
> -		if (align & blocksize_mask)
> -			return -EINVAL;
> -	}
> -
>  	/* watch out for a 0 len io from a tricksy fs */
>  	if (iov_iter_rw(iter) == READ && !count)
>  		return 0;
> @@ -1200,6 +1192,14 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
>  		goto fail_dio;
>  	}
>  
> +	if (align & blocksize_mask) {
> +		if (bdev)
> +			blkbits = blksize_bits(bdev_logical_block_size(bdev));
> +		blocksize_mask = (1 << blkbits) - 1;
> +		if (align & blocksize_mask)
> +			goto fail_dio;
> +	}
> +
>  	if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) {
>  		struct address_space *mapping = iocb->ki_filp->f_mapping;
>  
> -- 
> 2.28.0
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-09-07  9:36 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-05  5:20 [PATCH v3 0/3] Unaligned DIO read error path fix and clean ups Gabriel Krisman Bertazi
2020-09-05  5:20 ` [PATCH v3 1/3] direct-io: clean up error paths of do_blockdev_direct_IO Gabriel Krisman Bertazi
2020-09-07  9:32   ` Jan Kara
2020-09-05  5:20 ` [PATCH v3 2/3] direct-io: don't force writeback for reads beyond EOF Gabriel Krisman Bertazi
2020-09-05  5:20 ` [PATCH v3 3/3] direct-io: defer alignment check until after the EOF check Gabriel Krisman Bertazi
2020-09-07  9:36   ` Jan Kara

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).