Linux-Fsdevel Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Jeff Moyer <jmoyer@redhat.com>
To: Christoph Hellwig <hch@lst.de>
Cc: viro@zeniv.linux.org.uk, Avi Kivity <avi@scylladb.com>,
	linux-aio@kvack.org, linux-fsdevel@vger.kernel.org,
	netdev@vger.kernel.org, linux-api@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 32/32] aio: implement io_pgetevents
Date: Fri, 12 Jan 2018 15:44:52 -0500	[thread overview]
Message-ID: <x49k1wmdcxn.fsf@segfault.boston.devel.redhat.com> (raw)
In-Reply-To: <20180110155853.32348-33-hch@lst.de> (Christoph Hellwig's message of "Wed, 10 Jan 2018 16:58:53 +0100")

Christoph Hellwig <hch@lst.de> writes:

> This is the io_getevents equivalent of ppoll/pselect and allows to
> properly mix signals and aio completions (especially with IOCB_CMD_POLL)
> and atomically executes the following sequence:
>
> 	sigset_t origmask;
>
> 	pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
> 	ret = io_getevents(ctx, min_nr, nr, events, timeout);
> 	pthread_sigmask(SIG_SETMASK, &origmask, NULL);
>
> Note that unlike many other signal related calls we do not pass a sigmask
> size, as that would get us to 7 arguments, which aren't easily supported
> by the syscall infrastructure.  It seems a lot less painful to just add a
> new syscall variant in the unlikely case we're going to increase the
> sigset size.

pselect, as an example, crams the sigmask and size together.  Why not
just do that?  libaio can take care of setting that up.

Cheers,
Jeff


>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  arch/x86/entry/syscalls/syscall_32.tbl |  1 +
>  arch/x86/entry/syscalls/syscall_64.tbl |  1 +
>  fs/aio.c                               | 96 ++++++++++++++++++++++++++++++----
>  include/linux/compat.h                 |  6 +++
>  include/linux/syscalls.h               |  6 +++
>  include/uapi/asm-generic/unistd.h      |  4 +-
>  kernel/sys_ni.c                        |  2 +
>  7 files changed, 105 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
> index 448ac2161112..5997c3e9ac3e 100644
> --- a/arch/x86/entry/syscalls/syscall_32.tbl
> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
> @@ -391,3 +391,4 @@
>  382	i386	pkey_free		sys_pkey_free
>  383	i386	statx			sys_statx
>  384	i386	arch_prctl		sys_arch_prctl			compat_sys_arch_prctl
> +385	i386	io_pgetevents		sys_io_pgetevents		compat_sys_io_pgetevents
> diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
> index 5aef183e2f85..e995cd2b4e65 100644
> --- a/arch/x86/entry/syscalls/syscall_64.tbl
> +++ b/arch/x86/entry/syscalls/syscall_64.tbl
> @@ -339,6 +339,7 @@
>  330	common	pkey_alloc		sys_pkey_alloc
>  331	common	pkey_free		sys_pkey_free
>  332	common	statx			sys_statx
> +333	common	io_pgetevents		sys_io_pgetevents
>  
>  #
>  # x32-specific system call numbers start at 512 to avoid cache impact
> diff --git a/fs/aio.c b/fs/aio.c
> index cae90ac6e4a3..57a4e8d89f78 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -1299,10 +1299,6 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
>  		wait_event_interruptible_hrtimeout(ctx->wait,
>  				aio_read_events(ctx, min_nr, nr, event, &ret),
>  				until);
> -
> -	if (!ret && signal_pending(current))
> -		ret = -EINTR;
> -
>  	return ret;
>  }
>  
> @@ -1978,13 +1974,54 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
>  		struct timespec __user *, timeout)
>  {
>  	struct timespec64	ts;
> +	int			ret;
> +
> +	if (timeout && unlikely(get_timespec64(&ts, timeout)))
> +		return -EFAULT;
>  
> -	if (timeout) {
> -		if (unlikely(get_timespec64(&ts, timeout)))
> +	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
> +	if (!ret && signal_pending(current))
> +		ret = -EINTR;
> +	return ret;
> +}
> +
> +SYSCALL_DEFINE6(io_pgetevents,
> +		aio_context_t, ctx_id,
> +		long, min_nr,
> +		long, nr,
> +		struct io_event __user *, events,
> +		struct timespec __user *, timeout,
> +		const sigset_t __user *, sigmask)
> +{
> +	sigset_t		ksigmask, sigsaved;
> +	struct timespec64	ts;
> +	int ret;
> +
> +	if (timeout && unlikely(get_timespec64(&ts, timeout)))
> +		return -EFAULT;
> +
> +	if (sigmask) {
> +		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
>  			return -EFAULT;
> +		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
> +		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
>  	}
>  
> -	return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
> +	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
> +	if (signal_pending(current)) {
> +		if (sigmask) {
> +			current->saved_sigmask = sigsaved;
> +			set_restore_sigmask();
> +		}
> +
> +		if (!ret)
> +			ret = -ERESTARTNOHAND;
> +	} else {
> +		if (sigmask)
> +			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
> +	}
> +
> +	return ret;
>  }
>  
>  #ifdef CONFIG_COMPAT
> @@ -1995,13 +2032,52 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
>  		       struct compat_timespec __user *, timeout)
>  {
>  	struct timespec64 t;
> +	int ret;
> +
> +	if (timeout && compat_get_timespec64(&t, timeout))
> +		return -EFAULT;
> +
> +	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
> +	if (!ret && signal_pending(current))
> +		ret = -EINTR;
> +	return ret;
> +}
> +
> +COMPAT_SYSCALL_DEFINE6(io_pgetevents,
> +		compat_aio_context_t, ctx_id,
> +		compat_long_t, min_nr,
> +		compat_long_t, nr,
> +		struct io_event __user *, events,
> +		struct compat_timespec __user *, timeout,
> +		const compat_sigset_t __user *, sigmask)
> +{
> +	sigset_t ksigmask, sigsaved;
> +	struct timespec64 t;
> +	int ret;
>  
> -	if (timeout) {
> -		if (compat_get_timespec64(&t, timeout))
> +	if (timeout && compat_get_timespec64(&t, timeout))
> +		return -EFAULT;
> +
> +	if (sigmask) {
> +		if (get_compat_sigset(&ksigmask, sigmask))
>  			return -EFAULT;
> +		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
> +		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
> +	}
>  
> +	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
> +	if (signal_pending(current)) {
> +		if (sigmask) {
> +			current->saved_sigmask = sigsaved;
> +			set_restore_sigmask();
> +		}
> +		if (!ret)
> +			ret = -ERESTARTNOHAND;
> +	} else {
> +		if (sigmask)
> +			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
>  	}
>  
> -	return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
> +	return ret;
>  }
>  #endif
> diff --git a/include/linux/compat.h b/include/linux/compat.h
> index 0fc36406f32c..a4cda98073f1 100644
> --- a/include/linux/compat.h
> +++ b/include/linux/compat.h
> @@ -536,6 +536,12 @@ asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id,
>  					compat_long_t nr,
>  					struct io_event __user *events,
>  					struct compat_timespec __user *timeout);
> +asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id,
> +					compat_long_t min_nr,
> +					compat_long_t nr,
> +					struct io_event __user *events,
> +					struct compat_timespec __user *timeout,
> +					const compat_sigset_t __user *sigmask);
>  asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr,
>  				     u32 __user *iocb);
>  asmlinkage long compat_sys_mount(const char __user *dev_name,
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index a78186d826d7..3bc9a130f4a9 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -539,6 +539,12 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id,
>  				long nr,
>  				struct io_event __user *events,
>  				struct timespec __user *timeout);
> +asmlinkage long sys_io_pgetevents(aio_context_t ctx_id,
> +				long min_nr,
> +				long nr,
> +				struct io_event __user *events,
> +				struct timespec __user *timeout,
> +				const sigset_t __user *sigmask);
>  asmlinkage long sys_io_submit(aio_context_t, long,
>  				struct iocb __user * __user *);
>  asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
> diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
> index 8b87de067bc7..ce2ebbeece10 100644
> --- a/include/uapi/asm-generic/unistd.h
> +++ b/include/uapi/asm-generic/unistd.h
> @@ -732,9 +732,11 @@ __SYSCALL(__NR_pkey_alloc,    sys_pkey_alloc)
>  __SYSCALL(__NR_pkey_free,     sys_pkey_free)
>  #define __NR_statx 291
>  __SYSCALL(__NR_statx,     sys_statx)
> +#define __NR_io_pgetevents 292
> +__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
>  
>  #undef __NR_syscalls
> -#define __NR_syscalls 292
> +#define __NR_syscalls 293
>  
>  /*
>   * All syscalls below here should go away really,
> diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
> index b5189762d275..8f7705559b38 100644
> --- a/kernel/sys_ni.c
> +++ b/kernel/sys_ni.c
> @@ -151,9 +151,11 @@ cond_syscall(sys_io_destroy);
>  cond_syscall(sys_io_submit);
>  cond_syscall(sys_io_cancel);
>  cond_syscall(sys_io_getevents);
> +cond_syscall(sys_io_pgetevents);
>  cond_syscall(compat_sys_io_setup);
>  cond_syscall(compat_sys_io_submit);
>  cond_syscall(compat_sys_io_getevents);
> +cond_syscall(compat_sys_io_pgetevents);
>  cond_syscall(sys_sysfs);
>  cond_syscall(sys_syslog);
>  cond_syscall(sys_process_vm_readv);

  reply	other threads:[~2018-01-12 20:44 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-10 15:58 aio poll, io_pgetevents and a new in-kernel poll API V2 Christoph Hellwig
2018-01-10 15:58 ` [PATCH 01/32] fs: update documentation for __poll_t Christoph Hellwig
2018-01-10 15:58 ` [PATCH 02/32] fs: add new vfs_poll and file_can_poll helpers Christoph Hellwig
2018-01-10 15:58 ` [PATCH 03/32] fs: introduce new ->get_poll_head and ->poll_mask methods Christoph Hellwig
2018-01-10 21:04   ` Al Viro
2018-01-11  5:22     ` Al Viro
2018-01-11  8:28       ` Christoph Hellwig
2018-01-11 11:32       ` Christoph Hellwig
2018-01-11 11:36     ` Christoph Hellwig
2018-01-11 17:47       ` Al Viro
2018-01-12  9:06         ` Christoph Hellwig
2018-01-17 16:05         ` Christoph Hellwig
2018-01-10 15:58 ` [PATCH 04/32] net: refactor socket_poll Christoph Hellwig
2018-01-10 15:58 ` [PATCH 05/32] net: add support for ->poll_mask in proto_ops Christoph Hellwig
2018-01-10 15:58 ` [PATCH 06/32] net: remove sock_no_poll Christoph Hellwig
2018-01-10 15:58 ` [PATCH 07/32] net/tcp: convert to ->poll_mask Christoph Hellwig
2018-01-10 15:58 ` [PATCH 08/32] net/unix: " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 09/32] net: convert datagram_poll users tp ->poll_mask Christoph Hellwig
2018-01-10 15:58 ` [PATCH 10/32] net/dccp: convert to ->poll_mask Christoph Hellwig
2018-01-10 15:58 ` [PATCH 11/32] net/atm: " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 12/32] net/vmw_vsock: " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 13/32] net/tipc: " Christoph Hellwig
2018-01-10 19:32   ` Jon Maloy
2018-01-10 15:58 ` [PATCH 14/32] net/sctp: " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 15/32] net/bluetooth: " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 16/32] net/caif: " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 17/32] net/nfc: " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 18/32] net/phonet: " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 19/32] net/iucv: " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 20/32] net/rxrpc: " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 21/32] pipe: " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 22/32] eventfd: switch " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 23/32] timerfd: convert " Christoph Hellwig
2018-01-10 15:58 ` [PATCH 24/32] aio: don't print the page size at boot time Christoph Hellwig
2018-01-10 15:58 ` [PATCH 25/32] aio: remove an outdated comment in aio_complete Christoph Hellwig
2018-01-10 15:58 ` [PATCH 26/32] aio: refactor read/write iocb setup Christoph Hellwig
2018-01-10 21:19   ` Jeff Moyer
2018-01-11 13:38     ` Christoph Hellwig
2018-01-10 15:58 ` [PATCH 27/32] aio: sanitize ki_list handling Christoph Hellwig
2018-01-10 21:29   ` Jeff Moyer
2018-01-10 15:58 ` [PATCH 28/32] aio: simplify cancellation Christoph Hellwig
2018-01-10 22:50   ` Jeff Moyer
2018-01-10 15:58 ` [PATCH 29/32] aio: delete iocbs from the active_reqs list in kiocb_cancel Christoph Hellwig
2018-01-10 22:52   ` Jeff Moyer
2018-01-10 15:58 ` [PATCH 30/32] aio: add delayed cancel support Christoph Hellwig
2018-01-10 22:59   ` Jeff Moyer
2018-01-10 23:26     ` Jeff Moyer
2018-01-11 13:43       ` Christoph Hellwig
2018-01-11 15:27         ` Jeff Moyer
2018-01-15  8:54           ` Christoph Hellwig
2018-01-10 15:58 ` [PATCH 31/32] aio: implement IOCB_CMD_POLL Christoph Hellwig
2018-01-10 15:58 ` [PATCH 32/32] aio: implement io_pgetevents Christoph Hellwig
2018-01-12 20:44   ` Jeff Moyer [this message]
2018-01-15  8:53     ` Christoph Hellwig
2018-01-16 12:04       ` Christoph Hellwig
2018-01-17  0:41         ` Jeff Moyer
2018-01-17  4:27           ` Al Viro
2018-01-17  7:08             ` Christoph Hellwig
2018-01-17 13:49               ` Jeff Moyer
2018-01-17  7:36           ` Christoph Hellwig
2018-01-17 13:51             ` Jeff Moyer
2018-01-10 22:36 ` aio poll, io_pgetevents and a new in-kernel poll API V2 Michael Kerrisk (man-pages)
2018-01-10 23:34   ` Jeff Moyer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=x49k1wmdcxn.fsf@segfault.boston.devel.redhat.com \
    --to=jmoyer@redhat.com \
    --cc=avi@scylladb.com \
    --cc=hch@lst.de \
    --cc=linux-aio@kvack.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    --subject='Re: [PATCH 32/32] aio: implement io_pgetevents' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).