LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: Jens Axboe <axboe@kernel.dk>,
	io-uring@vger.kernel.org, Josh Triplett <josh@joshtriplett.org>
Cc: "David S . Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	Stefan Metzmacher <metze@samba.org>
Subject: [PATCH v2 2/4] io_uring: openat directly into fixed fd table
Date: Fri, 13 Aug 2021 17:43:11 +0100	[thread overview]
Message-ID: <686a860d973d0b751cf1fa3b1408179b2833720f.1628871893.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1628871893.git.asml.silence@gmail.com>

Instead of opening a file into a process's file table as usual and then
registering the fd within io_uring, some users may want to skip the
first step and place it directly into io_uring's fixed file table.
This patch adds such a capability for IORING_OP_OPENAT and
IORING_OP_OPENAT2.

The behaviour is controlled by setting sqe->file_index, where 0 implies
the old behaviour. If non-zero value is specified, then it will behave
as described and place the file into a fixed file slot
sqe->file_index - 1. A file table should be already created, the slot
should be valid and empty, otherwise the operation will fail.

Note 1: we can't use IOSQE_FIXED_FILE to switch between modes, because
accept takes a file, and it already uses the flag with a different
meaning.

Note 2: it's u16, where in theory the limit for fixed file tables might
get increased in the future. If would ever happen so, we'll better
workaround later, e.g. by making ioprio to represent upper bits 16 bits.
The layout for open is tight already enough.

Suggested-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c                 | 76 ++++++++++++++++++++++++++++++-----
 include/uapi/linux/io_uring.h |  2 +
 2 files changed, 69 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 51c4166f68b5..b4f7de5147dc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1060,6 +1060,9 @@ static void io_req_task_queue(struct io_kiocb *req);
 static void io_submit_flush_completions(struct io_ring_ctx *ctx);
 static int io_req_prep_async(struct io_kiocb *req);
 
+static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+				 unsigned int issue_flags);
+
 static struct kmem_cache *req_cachep;
 
 static const struct file_operations io_uring_fops;
@@ -3803,11 +3806,10 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
-	if (unlikely(sqe->ioprio || sqe->buf_index))
+	if (unlikely(sqe->ioprio))
 		return -EINVAL;
 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
 		return -EBADF;
-
 	/* open.how should be already initialised */
 	if (!(req->open.how.flags & O_PATH) && force_o_largefile())
 		req->open.how.flags |= O_LARGEFILE;
@@ -3820,6 +3822,10 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 		req->open.filename = NULL;
 		return ret;
 	}
+	req->buf_index = READ_ONCE(sqe->file_index);
+	if (req->buf_index && (req->open.how.flags & O_CLOEXEC))
+		return -EINVAL;
+
 	req->open.nofile = rlimit(RLIMIT_NOFILE);
 	req->flags |= REQ_F_NEED_CLEANUP;
 	return 0;
@@ -3857,8 +3863,8 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct open_flags op;
 	struct file *file;
-	bool nonblock_set;
-	bool resolve_nonblock;
+	bool resolve_nonblock, nonblock_set;
+	bool fixed = !!req->buf_index;
 	int ret;
 
 	ret = build_open_flags(&req->open.how, &op);
@@ -3877,9 +3883,11 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
 		op.open_flag |= O_NONBLOCK;
 	}
 
-	ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
-	if (ret < 0)
-		goto err;
+	if (!fixed) {
+		ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
+		if (ret < 0)
+			goto err;
+	}
 
 	file = do_filp_open(req->open.dfd, req->open.filename, &op);
 	if (IS_ERR(file)) {
@@ -3888,7 +3896,8 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
 		 * marginal gain for something that is now known to be a slower
 		 * path. So just put it, and we'll get a new one when we retry.
 		 */
-		put_unused_fd(ret);
+		if (!fixed)
+			put_unused_fd(ret);
 
 		ret = PTR_ERR(file);
 		/* only retry if RESOLVE_CACHED wasn't already set by application */
@@ -3901,7 +3910,11 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
 	if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set)
 		file->f_flags &= ~O_NONBLOCK;
 	fsnotify_open(file);
-	fd_install(ret, file);
+
+	if (!fixed)
+		fd_install(ret, file);
+	else
+		ret = io_install_fixed_file(req, file, issue_flags);
 err:
 	putname(req->open.filename);
 	req->flags &= ~REQ_F_NEED_CLEANUP;
@@ -7835,6 +7848,50 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
 #endif
 }
 
+static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+				 unsigned int issue_flags)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	int i = req->buf_index - 1;
+	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+	struct io_fixed_file *file_slot;
+	int ret = -EBADF;
+
+	if (WARN_ON_ONCE(req->buf_index == 0))
+		goto err;
+
+	io_ring_submit_lock(ctx, !force_nonblock);
+	if (file->f_op == &io_uring_fops)
+		goto err;
+	ret = -ENXIO;
+	if (!ctx->file_data)
+		goto err;
+	ret = -EINVAL;
+	if (i > ctx->nr_user_files)
+		goto err;
+
+	i = array_index_nospec(i, ctx->nr_user_files);
+	file_slot = io_fixed_file_slot(&ctx->file_table, i);
+	ret = -EEXIST;
+	if (file_slot->file_ptr)
+		goto err;
+
+	*io_get_tag_slot(ctx->file_data, i) = 0;
+	io_fixed_file_set(file_slot, file);
+	ret = io_sqe_file_register(ctx, file, i);
+	if (ret) {
+		file_slot->file_ptr = 0;
+		goto err;
+	}
+
+	ret = 0;
+err:
+	io_ring_submit_unlock(ctx, !force_nonblock);
+	if (ret)
+		fput(file);
+	return ret;
+}
+
 static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
 				 struct io_rsrc_node *node, void *rsrc)
 {
@@ -10298,6 +10355,7 @@ static int __init io_uring_init(void)
 	BUILD_BUG_SQE_ELEM(32, __u64,  user_data);
 	BUILD_BUG_SQE_ELEM(40, __u16,  buf_index);
 	BUILD_BUG_SQE_ELEM(40, __u16,  buf_group);
+	BUILD_BUG_SQE_ELEM(40, __u16,  file_index);
 	BUILD_BUG_SQE_ELEM(42, __u16,  personality);
 	BUILD_BUG_SQE_ELEM(44, __s32,  splice_fd_in);
 
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 79126d5cd289..f105deb4da1d 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -52,6 +52,8 @@ struct io_uring_sqe {
 		__u16	buf_index;
 		/* for grouped buffer selection */
 		__u16	buf_group;
+		/* index into fixed files */
+		__u16	file_index;
 	} __attribute__((packed));
 	/* personality to use, if used */
 	__u16	personality;
-- 
2.32.0


  parent reply	other threads:[~2021-08-13 16:44 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-13 16:43 [PATCH v2 0/4] open/accept directly into io_uring fixed file table Pavel Begunkov
2021-08-13 16:43 ` [PATCH v2 1/4] net: add accept helper not installing fd Pavel Begunkov
2021-08-13 16:43 ` Pavel Begunkov [this message]
2021-08-13 16:43 ` [PATCH v2 3/4] io_uring: hand code io_accept() fd installing Pavel Begunkov
2021-08-13 16:43 ` [PATCH v2 4/4] io_uring: accept directly into fixed file table Pavel Begunkov
2021-08-13 19:00 ` [PATCH v2 0/4] open/accept directly into io_uring " Josh Triplett
2021-08-14 12:50   ` Pavel Begunkov
2021-08-14 23:03     ` Jens Axboe
2021-08-15  3:42       ` Josh Triplett
2021-08-15 15:05         ` Jens Axboe
2021-08-15 15:12           ` Jens Axboe
2021-08-15 13:00       ` Pavel Begunkov
2021-08-15  3:31     ` Josh Triplett
2021-08-15 10:48       ` Pavel Begunkov
2021-08-15 14:23         ` Pavel Begunkov
2021-08-16 15:45 ` Stefan Metzmacher
2021-08-17  9:33   ` Pavel Begunkov
2021-08-17 14:57     ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=686a860d973d0b751cf1fa3b1408179b2833720f.1628871893.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=davem@davemloft.net \
    --cc=io-uring@vger.kernel.org \
    --cc=josh@joshtriplett.org \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=metze@samba.org \
    --cc=netdev@vger.kernel.org \
    --subject='Re: [PATCH v2 2/4] io_uring: openat directly into fixed fd table' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).