LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH -mm 0/5][AIO] - AIO completion signal notification v3
@ 2006-11-29 10:24 Sébastien Dugué
  2006-11-29 10:32 ` [PATCH -mm 1/5][AIO] - Rework compat_sys_io_submit Sébastien Dugué
                   ` (4 more replies)
  0 siblings, 5 replies; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-29 10:24 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

  Hi

  Here is the latest rework of the AIO completion signal notification patches.

  This set consists in 5 patches:

	1. rework-compat-sys-io-submit: rework the sys_io_submit() compat layer,
	   laying out the base for the following patches

	2. aio-header-fix-includes: fixes the double inclusion of uio.h in aio.h

	3. export-good_sigevent: move good_sigevent into signal.c and export it

	4. aio-notify-sig: the AIO completion signal notification

	5. listio: adds listio support

  Description are in the individual patches.


  Changes from v2:
	- rebased to 2.6.19-rc6-mm2
	- reworked the sys_io_submit() compat layer as suggested by Zach Brown
	- fixed saving of a pointer to a task struct in aio-notify-sig as
	  pointed out by Andrew Morton

  Changes from v1:
	- cleanups suggested by Christoph Hellwig, Badari Pulavarty and Zach Brown
	- added lisio patch


  Comments welcomed, as usual.

  Thanks,

  Sébastien.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH -mm 1/5][AIO] - Rework compat_sys_io_submit
  2006-11-29 10:24 [PATCH -mm 0/5][AIO] - AIO completion signal notification v3 Sébastien Dugué
@ 2006-11-29 10:32 ` Sébastien Dugué
  2006-11-30  0:47   ` Zach Brown
  2006-11-29 10:32 ` [PATCH -mm 2/5][AIO] - fix aio.h includes Sébastien Dugué
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-29 10:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion


                 compat_sys_io_submit() cleanup


  Cleanup compat_sys_io_submit by duplicating some of the native syscall
logic in the compat layer and directly calling io_submit_one() instead
of fooling the syscall into thinking it is called from a native 64-bit
caller.

  This is needed for the completion notification patch to avoid having
to rewrite each iocb on the caller stack for sys_io_submit() to find the
sigevents.



 compat.c |   61 +++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 35 insertions(+), 26 deletions(-)

Signed-off-by: Sébastien Dugué <sebastien.dugue@bull.net>

Index: linux-2.6.19-rc6-mm2/fs/compat.c
===================================================================
--- linux-2.6.19-rc6-mm2.orig/fs/compat.c	2006-11-28 12:49:40.000000000
+0100 +++ linux-2.6.19-rc6-mm2/fs/compat.c	2006-11-28 12:51:35.000000000
+0100 @@ -642,40 +642,49 @@ out:
 	return ret;
 }
 
-static inline long
-copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
+asmlinkage long
+compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb)
 {
-	compat_uptr_t uptr;
+	struct kioctx *ctx;
+	long ret = 0;
 	int i;
 
-	for (i = 0; i < nr; ++i) {
-		if (get_user(uptr, ptr32 + i))
-			return -EFAULT;
-		if (put_user(compat_ptr(uptr), ptr64 + i))
-			return -EFAULT;
-	}
-	return 0;
-}
+	if (unlikely(nr < 0))
+		return -EINVAL;
 
-#define MAX_AIO_SUBMITS 	(PAGE_SIZE/sizeof(struct iocb *))
+	if (unlikely(!access_ok(VERIFY_READ, iocb, (nr * sizeof(u32)))))
+		return -EFAULT;
 
-asmlinkage long
-compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb)
-{
-	struct iocb __user * __user *iocb64; 
-	long ret;
+	ctx = lookup_ioctx(ctx_id);
 
-	if (unlikely(nr < 0))
+	if (unlikely(!ctx))
 		return -EINVAL;
+	for (i=0; i<nr; i++) {
+		compat_uptr_t uptr;
+		struct iocb __user *user_iocb;
+		struct iocb tmp;
 
-	if (nr > MAX_AIO_SUBMITS)
-		nr = MAX_AIO_SUBMITS;
-	
-	iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
-	ret = copy_iocb(nr, iocb, iocb64);
-	if (!ret)
-		ret = sys_io_submit(ctx_id, nr, iocb64);
-	return ret;
+		if (get_user(uptr, iocb + i)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		user_iocb = compat_ptr(uptr);
+
+		if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		ret = io_submit_one(ctx, user_iocb, &tmp);
+
+		if (ret)
+			break;
+	}
+
+	put_ioctx(ctx);
+
+	return i? i: ret;
 }
 
 struct compat_ncp_mount_data {

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH -mm 2/5][AIO] - fix aio.h includes
  2006-11-29 10:24 [PATCH -mm 0/5][AIO] - AIO completion signal notification v3 Sébastien Dugué
  2006-11-29 10:32 ` [PATCH -mm 1/5][AIO] - Rework compat_sys_io_submit Sébastien Dugué
@ 2006-11-29 10:32 ` Sébastien Dugué
  2006-11-29 10:32 ` [PATCH -mm 3/5][AIO] - export good_sigevent() Sébastien Dugué
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-29 10:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion


  Fix the double inclusion of linux/uio.h in linux/aio.h



 aio.h |    1 -
 1 file changed, 1 deletion(-)

Signed-off-by: Sébastien Dugué <sebastien.dugue@bull.net>

Index: linux-2.6.19-rc6-mm2/include/linux/aio.h
===================================================================
--- linux-2.6.19-rc6-mm2.orig/include/linux/aio.h	2006-11-16
05:03:40.000000000 +0100 +++ linux-2.6.19-rc6-mm2/include/linux/aio.h
2006-11-28 12:51:41.000000000 +0100 @@ -7,7 +7,6 @@
 #include <linux/uio.h>
 
 #include <asm/atomic.h>
-#include <linux/uio.h>
 
 #define AIO_MAXSEGS		4
 #define AIO_KIOGRP_NR_ATOMIC	8


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH -mm 3/5][AIO] - export good_sigevent()
  2006-11-29 10:24 [PATCH -mm 0/5][AIO] - AIO completion signal notification v3 Sébastien Dugué
  2006-11-29 10:32 ` [PATCH -mm 1/5][AIO] - Rework compat_sys_io_submit Sébastien Dugué
  2006-11-29 10:32 ` [PATCH -mm 2/5][AIO] - fix aio.h includes Sébastien Dugué
@ 2006-11-29 10:32 ` Sébastien Dugué
  2006-11-29 10:38   ` Christoph Hellwig
                     ` (2 more replies)
  2006-11-29 10:33 ` [PATCH -mm 4/5][AIO] - AIO completion signal notification Sébastien Dugué
  2006-11-29 10:33 ` [PATCH -mm 5/5][AIO] - Listio support Sébastien Dugué
  4 siblings, 3 replies; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-29 10:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion


                      Export good_sigevent()


  Move good_sigevent() from posix-timers.c to signal.c where it belongs,
and export it so that it can be used by other subsystems.


 include/linux/signal.h |    1 +
 kernel/posix-timers.c  |   17 -----------------
 kernel/signal.c        |   23 +++++++++++++++++++++++
 3 files changed, 24 insertions(+), 17 deletions(-)

Signed-off-by: Sébastien Dugué <sebastien.dugue@bull.net>


Index: linux-2.6.19-rc6-mm2/include/linux/signal.h
===================================================================
--- linux-2.6.19-rc6-mm2.orig/include/linux/signal.h	2006-11-28
12:49:40.000000000 +0100 +++ linux-2.6.19-rc6-mm2/include/linux/signal.h
2006-11-28 12:51:43.000000000 +0100 @@ -240,6 +240,7 @@ extern int
sigprocmask(int, sigset_t *, 
 struct pt_regs;
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction
*return_ka, struct pt_regs *regs, void *cookie); +extern struct task_struct *
good_sigevent(sigevent_t *); 
 extern struct kmem_cache *sighand_cachep;
 
Index: linux-2.6.19-rc6-mm2/kernel/posix-timers.c
===================================================================
--- linux-2.6.19-rc6-mm2.orig/kernel/posix-timers.c	2006-11-28
12:49:40.000000000 +0100 +++ linux-2.6.19-rc6-mm2/kernel/posix-timers.c
2006-11-28 12:51:43.000000000 +0100 @@ -367,23 +367,6 @@ static enum
hrtimer_restart posix_timer_ return ret;
 }
 
-static struct task_struct * good_sigevent(sigevent_t * event)
-{
-	struct task_struct *rtn = current->group_leader;
-
-	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
-		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
-		 rtn->tgid != current->tgid ||
-		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
-		return NULL;
-
-	if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
-	    ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
-		return NULL;
-
-	return rtn;
-}
-
 void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock)
 {
 	if ((unsigned) clock_id >= MAX_CLOCKS) {
Index: linux-2.6.19-rc6-mm2/kernel/signal.c
===================================================================
--- linux-2.6.19-rc6-mm2.orig/kernel/signal.c	2006-11-28
12:49:40.000000000 +0100 +++ linux-2.6.19-rc6-mm2/kernel/signal.c
2006-11-28 12:51:43.000000000 +0100 @@ -1189,6 +1189,29 @@ int
group_send_sig_info(int sig, struct return ret;
 }
 
+/***
+ * good_sigevent - check and get target task from a sigevent.
+ * @event: the sigevent to be checked
+ *
+ * This function must be called with tasklist_lock held for reading.
+ */
+struct task_struct * good_sigevent(sigevent_t * event)
+{
+	struct task_struct *rtn = current->group_leader;
+
+	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
+		 rtn->tgid != current->tgid ||
+		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
+		return NULL;
+
+	if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
+	    ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
+		return NULL;
+
+	return rtn;
+}
+
 /*
  * kill_pgrp_info() sends a signal to a process group: this is what the tty
  * control characters do (^C, ^Z etc)

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH -mm 4/5][AIO] - AIO completion signal notification
  2006-11-29 10:24 [PATCH -mm 0/5][AIO] - AIO completion signal notification v3 Sébastien Dugué
                   ` (2 preceding siblings ...)
  2006-11-29 10:32 ` [PATCH -mm 3/5][AIO] - export good_sigevent() Sébastien Dugué
@ 2006-11-29 10:33 ` Sébastien Dugué
  2006-11-29 10:51   ` Christoph Hellwig
  2006-11-29 11:33   ` Jakub Jelinek
  2006-11-29 10:33 ` [PATCH -mm 5/5][AIO] - Listio support Sébastien Dugué
  4 siblings, 2 replies; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-29 10:33 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion


                      AIO completion signal notification

  The current 2.6 kernel does not support notification of user space via
an RT signal upon an asynchronous IO completion. The POSIX specification
states that when an AIO request completes, a signal can be delivered to
the application as notification.

  This patch adds a struct sigevent *aio_sigeventp to the iocb.
The relevant fields (pid, signal number and value) are stored in the kiocb
for use when the request completes.

  That sigevent structure is filled by the application as part of the AIO
request preparation. Upon request completion, the kernel notifies the
application using those sigevent parameters. If SIGEV_NONE has been specified,
then the old behaviour is retained and the application must rely on polling
the completion queue using io_getevents().


  A struct sigevent *aio_sigeventp field is added to struct iocb in
include/linux/aio_abi.h

  A struct aio_notify containing the sigevent parameters is defined in aio.h:

  struct aio_notify {
	struct task_struct	*target;
	__u16			signo;
	__u16			notify;
	sigval_t		value;
  };

  A struct aio_notify ki_notify is added to struct kiocb in include/linux/aio.h

  In io_submit_one(), if the application provided a sigevent then
setup_sigevent() is called which does the following:

	- check access to the user sigevent and make a local copy

	- if the requested notification is SIGEV_NONE, then nothing to do

	- fill in the kiocb->ki_notify fields (notify, signo, value)

	- check sigevent consistency, get the signal target task and
	  save it in kiocb->ki_notify.target

	- preallocate a sigqueue for this event using sigqueue_alloc()

  Upon request completion, in aio_complete(), if notification is needed for
this request (iocb->ki_notify.notify != SIGEV_NONE), then aio_send_signal()
is called to signal the target task as follows:

	- fill in the siginfo struct to be sent to the task

	- if notify is SIGEV_THREAD_ID then send signal to specific task
	  using send_sigqueue()

	- else send signal to task group using send_5group_sigqueue()



Notes concerning sigqueue preallocation:

 To ensure reliable delivery of completion notification, the sigqueue is
preallocated in the submission path so that there is no chance it can fail
in the completion path.

 Unlike the posix-timers case (currently the single other user of sigqueue
preallocation), where the sigqueue is allocated for the lifetime of the timer
and freed at timer destruction time, the aio case is a bit more tricky due to
the async nature of the whole thing.

  In the aio case, the sigqueue exists for the lifetime of the request,
therefore it must be freed only once the signal for the request completion has
been delivered. This involves changing __sigqueue_free() to free the sigqueue
when the signal is collected if si_code is SI_ASYNCIO even if it was
preallocated as well as explicitly calling sigqueue_free() in submission and
completion error paths.


 fs/aio.c                |  115 ++++++++++++++++++++++++++++++++++++++++++++++--
 fs/compat.c             |   18 +++++++
 include/linux/aio.h     |   12 +++++
 include/linux/aio_abi.h |    3 -
 kernel/signal.c         |    2
 5 files changed, 144 insertions(+), 6 deletions(-)

Signed-off-by: Sébastien Dugué <sebastien.dugue@bull.net>
Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>

Index: linux-2.6.19-rc6-mm2/fs/aio.c
===================================================================
--- linux-2.6.19-rc6-mm2.orig/fs/aio.c	2006-11-28 12:49:40.000000000
+0100 +++ linux-2.6.19-rc6-mm2/fs/aio.c	2006-11-29 10:14:47.000000000
+0100 @@ -416,6 +416,7 @@ static struct kiocb fastcall *__aio_get_
 	req->ki_dtor = NULL;
 	req->private = NULL;
 	req->ki_iovec = NULL;
+	req->ki_notify.sigq = NULL;
 	INIT_LIST_HEAD(&req->ki_run_list);
 
 	/* Check if the completion queue has enough free space to
@@ -463,6 +464,11 @@ static inline void really_put_req(struct
 		req->ki_dtor(req);
 	if (req->ki_iovec != &req->ki_inline_vec)
 		kfree(req->ki_iovec);
+
+	/* Release task ref */
+	if (req->ki_notify.notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
+		put_task_struct(req->ki_notify.target);
+
 	kmem_cache_free(kiocb_cachep, req);
 	ctx->reqs_active--;
 
@@ -929,6 +935,80 @@ void fastcall kick_iocb(struct kiocb *io
 }
 EXPORT_SYMBOL(kick_iocb);
 
+static int aio_send_signal(struct aio_notify *notify)
+{
+	struct sigqueue *sigq = notify->sigq;
+	struct siginfo *info = &sigq->info;
+	int ret;
+
+	memset(info, 0, sizeof(struct siginfo));
+
+	info->si_signo = notify->signo;
+	info->si_errno = 0;
+	info->si_code = SI_ASYNCIO;
+	info->si_pid = 0;
+	info->si_uid = 0;
+	info->si_value = notify->value;
+
+	if (notify->notify & SIGEV_THREAD_ID)
+		ret = send_sigqueue(notify->signo, sigq, notify->target);
+	else
+		ret = send_group_sigqueue(notify->signo, sigq, notify->target);
+
+	return ret;
+}
+
+static long aio_setup_sigevent(struct aio_notify *notify,
+			       struct sigevent __user *user_event)
+{
+	sigevent_t event;
+	struct task_struct *target;
+
+	if (copy_from_user(&event, user_event, sizeof (event)))
+		return -EFAULT;
+
+	if (event.sigev_notify == SIGEV_NONE)
+		return 0;
+
+	notify->notify = event.sigev_notify;
+	notify->signo = event.sigev_signo;
+	notify->value = event.sigev_value;
+
+	read_lock(&tasklist_lock);
+	target = good_sigevent(&event);
+
+	if (unlikely(!target || (target->flags & PF_EXITING)))
+		goto out_unlock;
+
+	if (notify->notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) {
+		/*
+		 * This reference will be dropped in really_put_req() when
+		 * we're done with the request.
+		 */
+		get_task_struct(target);
+	}
+
+	notify->target = target;
+	read_unlock(&tasklist_lock);
+
+	/*
+	 * NOTE: we cannot free the sigqueue in the completion path as
+	 * the signal may not have been delivered to the target task.
+	 * Therefore it has to be freed in __sigqueue_free() when the
+	 * signal is collected if si_code is SI_ASYNCIO.
+	 */
+	notify->sigq = sigqueue_alloc();
+
+	if (unlikely(!notify->sigq))
+		return -EAGAIN;
+
+	return 0;
+
+out_unlock:
+	read_unlock(&tasklist_lock);
+	return -EINVAL;
+}
+
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  *	Returns true if this is the last user of the request.  The 
@@ -976,8 +1056,11 @@ int fastcall aio_complete(struct kiocb *
 	 * cancelled requests don't get events, userland was given one
 	 * when the event got cancelled.
 	 */
-	if (kiocbIsCancelled(iocb))
+	if (kiocbIsCancelled(iocb)) {
+		if (iocb->ki_notify.sigq)
+			sigqueue_free(iocb->ki_notify.sigq);
 		goto put_rq;
+	}
 
 	ring = kmap_atomic(info->ring_pages[0], KM_IRQ1);
 
@@ -1008,6 +1091,14 @@ int fastcall aio_complete(struct kiocb *
 
 	pr_debug("added to ring %p at [%lu]\n", iocb, tail);
 
+	if (iocb->ki_notify.notify != SIGEV_NONE) {
+		ret = aio_send_signal(&iocb->ki_notify);
+
+		/* If signal generation failed, release the sigqueue */
+		if (ret)
+			sigqueue_free(iocb->ki_notify.sigq);
+	}
+
 	pr_debug("%ld retries: %zd of %zd\n", iocb->ki_retried,
 		iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
 put_rq:
@@ -1549,8 +1640,7 @@ int fastcall io_submit_one(struct kioctx
 	ssize_t ret;
 
 	/* enforce forwards compatibility on users */
-	if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2 ||
-		     iocb->aio_reserved3)) {
+	if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved3)) {
 		pr_debug("EINVAL: io_submit: reserve field set\n");
 		return -EINVAL;
 	}
@@ -1559,6 +1649,7 @@ int fastcall io_submit_one(struct kioctx
 	if (unlikely(
 	    (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
 	    (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
+	    (iocb->aio_sigeventp != (unsigned long)iocb->aio_sigeventp) ||
 	    ((ssize_t)iocb->aio_nbytes < 0)
 	   )) {
 		pr_debug("EINVAL: io_submit: overflow check\n");
@@ -1593,10 +1684,21 @@ int fastcall io_submit_one(struct kioctx
 	INIT_LIST_HEAD(&req->ki_wait.task_list);
 	req->ki_retried = 0;
 
+	/* handle setting up the sigevent for POSIX AIO signals */
+	req->ki_notify.notify = SIGEV_NONE;
+
+	if (iocb->aio_sigeventp) {
+		ret = aio_setup_sigevent(&req->ki_notify,
+					 (struct sigevent __user *)(unsigned
long)
+					 iocb->aio_sigeventp);
+		if (ret)
+			goto out_put_req;
+	}
+
 	ret = aio_setup_iocb(req);
 
 	if (ret)
-		goto out_put_req;
+		goto out_sigqfree;
 
 	spin_lock_irq(&ctx->ctx_lock);
 	aio_run_iocb(req);
@@ -1609,6 +1711,11 @@ int fastcall io_submit_one(struct kioctx
 	aio_put_req(req);	/* drop extra ref to req */
 	return 0;
 
+out_sigqfree:
+	/* Undo the sigqueue alloc if someting went bad */
+	if (req->ki_notify.sigq)
+		sigqueue_free(req->ki_notify.sigq);
+
 out_put_req:
 	aio_put_req(req);	/* drop extra ref to req */
 	aio_put_req(req);	/* drop i/o ref to req */
Index: linux-2.6.19-rc6-mm2/include/linux/aio_abi.h
===================================================================
--- linux-2.6.19-rc6-mm2.orig/include/linux/aio_abi.h	2006-11-16
05:03:40.000000000 +0100 +++
linux-2.6.19-rc6-mm2/include/linux/aio_abi.h	2006-11-29
10:14:29.000000000 +0100 @@ -82,8 +82,9 @@ struct iocb { __u64
aio_nbytes; __s64	aio_offset;
 
+	__u64	aio_sigeventp;	/* pointer to struct sigevent */
+
 	/* extra parameters */
-	__u64	aio_reserved2;	/* TODO: use this for a (struct
sigevent *) */ __u64	aio_reserved3;
 }; /* 64 bytes */
 
Index: linux-2.6.19-rc6-mm2/include/linux/aio.h
===================================================================
--- linux-2.6.19-rc6-mm2.orig/include/linux/aio.h	2006-11-28
12:51:41.000000000 +0100 +++ linux-2.6.19-rc6-mm2/include/linux/aio.h
2006-11-29 10:14:29.000000000 +0100 @@ -7,6 +7,7 @@
 #include <linux/uio.h>
 
 #include <asm/atomic.h>
+#include <asm/siginfo.h>
 
 #define AIO_MAXSEGS		4
 #define AIO_KIOGRP_NR_ATOMIC	8
@@ -49,6 +50,14 @@ struct kioctx;
 #define kiocbIsKicked(iocb)	test_bit(KIF_KICKED, &(iocb)->ki_flags)
 #define kiocbIsCancelled(iocb)	test_bit(KIF_CANCELLED,
&(iocb)->ki_flags) 
+struct aio_notify {
+	struct task_struct	*target;
+	__u16			signo;
+	__u16			notify;
+	sigval_t		value;
+	struct sigqueue		*sigq;
+};
+
 /* is there a better place to document function pointer methods? */
 /**
  * ki_retry	-	iocb forward progress callback
@@ -118,6 +127,9 @@ struct kiocb {
 
 	struct list_head	ki_list;	/* the aio core uses this
 						 * for cancellation */
+
+	/* to notify a process on I/O event */
+	struct aio_notify	ki_notify;
 };
 
 #define is_sync_kiocb(iocb)	((iocb)->ki_key == KIOCB_SYNC_KEY)
Index: linux-2.6.19-rc6-mm2/kernel/signal.c
===================================================================
--- linux-2.6.19-rc6-mm2.orig/kernel/signal.c	2006-11-28
12:51:43.000000000 +0100 +++ linux-2.6.19-rc6-mm2/kernel/signal.c
2006-11-28 12:51:45.000000000 +0100 @@ -297,7 +297,7 @@ static struct sigqueue
*__sigqueue_alloc 
 static void __sigqueue_free(struct sigqueue *q)
 {
-	if (q->flags & SIGQUEUE_PREALLOC)
+	if (q->flags & SIGQUEUE_PREALLOC && q->info.si_code != SI_ASYNCIO)
 		return;
 	atomic_dec(&q->user->sigpending);
 	free_uid(q->user);
Index: linux-2.6.19-rc6-mm2/fs/compat.c
===================================================================
--- linux-2.6.19-rc6-mm2.orig/fs/compat.c	2006-11-28 12:51:35.000000000
+0100 +++ linux-2.6.19-rc6-mm2/fs/compat.c	2006-11-29 10:14:29.000000000
+0100 @@ -663,6 +663,7 @@ compat_sys_io_submit(aio_context_t ctx_i
 		compat_uptr_t uptr;
 		struct iocb __user *user_iocb;
 		struct iocb tmp;
+		struct compat_sigevent __user *uevent;
 
 		if (get_user(uptr, iocb + i)) {
 			ret = -EFAULT;
@@ -676,6 +677,23 @@ compat_sys_io_submit(aio_context_t ctx_i
 			break;
 		}
 
+		uevent = (struct compat_sigevent __user *)tmp.aio_sigeventp;
+
+		if (uevent) {
+			struct sigevent __user *event = NULL;
+			struct sigevent kevent;
+
+			event = compat_alloc_user_space(sizeof(*event));
+
+			if (get_compat_sigevent(&kevent, uevent) ||
+			    copy_to_user(event, &kevent, sizeof(*event))) {
+				ret = -EFAULT;
+				break;
+			}
+
+			tmp.aio_sigeventp = (__u64)event;
+		}
+
 		ret = io_submit_one(ctx, user_iocb, &tmp);
 
 		if (ret)

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH -mm 5/5][AIO] - Listio support
  2006-11-29 10:24 [PATCH -mm 0/5][AIO] - AIO completion signal notification v3 Sébastien Dugué
                   ` (3 preceding siblings ...)
  2006-11-29 10:33 ` [PATCH -mm 4/5][AIO] - AIO completion signal notification Sébastien Dugué
@ 2006-11-29 10:33 ` Sébastien Dugué
  2006-11-30  8:25   ` Suparna Bhattacharya
  4 siblings, 1 reply; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-29 10:33 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

                        POSIX listio support

  This patch adds POSIX listio completion notification support. It builds
on support provided by the aio signal notification patch and adds an
IOCB_CMD_GROUP command to io_submit().

  The purpose of IOCB_CMD_GROUP is to group together the following requests in
the list up to the end of the list sumbitted to io_submit.

  As io_submit already accepts an array of iocbs, as part of listio submission,
the user process prepends to a list of requests an empty special aiocb with
an aio_lio_opcode of IOCB_CMD_GROUP, filling only the aio_sigevent fields.


  An IOCB_CMD_GROUP is added to the IOCB_CMD enum in include/linux/aio_abi.h

  A struct lio_event is added in include/linux/aio.h

  A struct lio_event *ki_lio is added to struct iocb in include/linux/aio.h

  In io_submit(), upon detecting such an IOCB_CMD_GROUP marker iocb, an
lio_event is created in lio_create() which contains the necessary information
for signaling a thread (signal number, pid, notify type and value) along with
a count of requests attached to this event.

        The following depicts the lio_event structure:

        struct lio_event {
                atomic_t        	lio_users;
                struct aio_notify	lio_notify;
        };

  lio_users holds an atomic counter of the number of requests attached to this
lio. It is incremented with each request submitted and decremented at each
request completion. When the counter reaches 0, we send the notification.

  Each subsequent submitted request is attached to this lio_event by setting
the request kiocb->ki_lio to that lio_event (in io_submit_one()) and
incrementing the lio_users count.

  In aio_complete(), if the request is attached to an lio (ki_lio <> 0),
then lio_check() is called to decrement the lio_users count and eventually
signal the user process when all the requests in the group have completed.


  The IOCB_CMD_GROUP semantic is as follows:

       - if the associated sigevent is NULL then we want to group
         requests for the purpose of blocking on the group completion
         (LIO_WAIT sync behavior).

       - if the associated sigevent is valid (not NULL) then we want to
         group requests for the purpose of being notified upon that
         group of requests completion (LIO_NOWAIT async behaviour).



 fs/aio.c                |  123 ++++++++++++++++++++++++++++++++++++++++++++++--
 fs/compat.c             |   62 +++++++++++++++++++++++-
 include/linux/aio.h     |   15 +++++
 include/linux/aio_abi.h |    1
 4 files changed, 192 insertions(+), 9 deletions(-)

Signed-off-by: Sébastien Dugué <sebastien.dugue@bull.net>
Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>

Index: linux-2.6.19-rc6-mm2/fs/aio.c
===================================================================
--- linux-2.6.19-rc6-mm2.orig/fs/aio.c	2006-11-28 12:51:45.000000000
+0100 +++ linux-2.6.19-rc6-mm2/fs/aio.c	2006-11-28 12:51:48.000000000
+0100 @@ -414,6 +414,7 @@ static struct kiocb fastcall *__aio_get_
 	req->ki_cancel = NULL;
 	req->ki_retry = NULL;
 	req->ki_dtor = NULL;
+	req->ki_lio = NULL;
 	req->private = NULL;
 	req->ki_iovec = NULL;
 	req->ki_notify.sigq = NULL;
@@ -1009,6 +1010,53 @@ out_unlock:
 	return -EINVAL;
 }
 
+void lio_check(struct lio_event *lio)
+{
+	int ret;
+
+	ret = atomic_dec_and_test(&lio->lio_users);
+
+	if (unlikely(ret) && lio->lio_notify.notify != SIGEV_NONE) {
+		/* last one -> notify process */
+		aio_send_signal(&lio->lio_notify);
+		kfree(lio);
+	}
+}
+
+struct lio_event *lio_create(struct sigevent __user *user_event)
+{
+	int ret = 0;
+	struct lio_event *lio = NULL;
+
+	lio = kzalloc(sizeof(*lio), GFP_KERNEL);
+
+	if (!lio)
+		return ERR_PTR(-EAGAIN);
+
+	/*
+	 * Grab an initial ref on the lio to avoid races between
+	 * submission and completion.
+	 */
+	atomic_set(&lio->lio_users, 1);
+
+	lio->lio_notify.notify = SIGEV_NONE;
+
+	if (user_event) {
+		/*
+		 * User specified an event for this lio,
+		 * he wants to be notified upon lio completion.
+		 */
+		ret = aio_setup_sigevent(&lio->lio_notify, user_event);
+
+		if (ret) {
+			kfree(lio);
+			return ERR_PTR(ret);
+		}
+	}
+
+	return lio;
+}
+
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  *	Returns true if this is the last user of the request.  The 
@@ -1057,8 +1105,12 @@ int fastcall aio_complete(struct kiocb *
 	 * when the event got cancelled.
 	 */
 	if (kiocbIsCancelled(iocb)) {
+		if (iocb->ki_lio)
+			lio_check(iocb->ki_lio);
+
 		if (iocb->ki_notify.sigq)
 			sigqueue_free(iocb->ki_notify.sigq);
+
 		goto put_rq;
 	}
 
@@ -1099,6 +1151,9 @@ int fastcall aio_complete(struct kiocb *
 			sigqueue_free(iocb->ki_notify.sigq);
 	}
 
+	if (iocb->ki_lio)
+		lio_check(iocb->ki_lio);
+
 	pr_debug("%ld retries: %zd of %zd\n", iocb->ki_retried,
 		iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
 put_rq:
@@ -1633,7 +1688,7 @@ static int aio_wake_function(wait_queue_
 }
 
 int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
-			 struct iocb *iocb)
+			   struct iocb *iocb, struct lio_event *lio)
 {
 	struct kiocb *req;
 	struct file *file;
@@ -1695,6 +1750,9 @@ int fastcall io_submit_one(struct kioctx
 			goto out_put_req;
 	}
 
+	/* Attach this iocb to its lio */
+	req->ki_lio = lio;
+
 	ret = aio_setup_iocb(req);
 
 	if (ret)
@@ -1738,6 +1796,8 @@ asmlinkage long sys_io_submit(aio_contex
 			      struct iocb __user * __user *iocbpp)
 {
 	struct kioctx *ctx;
+	struct lio_event *lio = NULL;
+	int lio_wait = 0;
 	long ret = 0;
 	int i;
 
@@ -1771,11 +1831,66 @@ asmlinkage long sys_io_submit(aio_contex
 			break;
 		}
 
-		ret = io_submit_one(ctx, user_iocb, &tmp);
-		if (ret)
-			break;
+		if (tmp.aio_lio_opcode == IOCB_CMD_GROUP) {
+
+			/* this command means that all following IO commands
+			 * are in the same group.
+			 *
+			 * Userspace either wants to be notified upon or block
until
+			 * completion of all the requests in the group.
+			 */
+			/*
+			 * Ignore an IOCB_CMD_GROUP request if we are already
+			 * processing one. This means only one listio per
+			 * io_submit call.
+			 */
+			if (lio)
+				continue;
+
+			lio = lio_create((struct sigevent __user *)(unsigned
long)
+					 tmp.aio_sigeventp);
+
+			ret = PTR_ERR(lio);
+
+			if (IS_ERR(lio))
+				goto out_put_ctx;
+
+			if (!tmp.aio_sigeventp)
+				lio_wait = 1;
+		} else {
+			if (lio)
+				atomic_inc(&lio->lio_users);
+
+			ret = io_submit_one(ctx, user_iocb, &tmp, lio);
+
+			if (ret) {
+				if (lio) {
+					/*
+					 * If a request failed, just decrement
+					 * the users count, but go on
submitting
+					 * subsequent requests.
+					 */
+					atomic_dec(&lio->lio_users);
+				} else
+					break;
+			}
+		}
+	}
+
+	if (lio) {
+		/*
+		 * Drop extra ref on the lio now that we're done submitting
+		 * requests
+		 */
+		lio_check(lio);
+
+		if (lio_wait) {
+			wait_event(ctx->wait, atomic_read(&lio->lio_users)==0);
+			kfree(lio);
+		}
 	}
 
+out_put_ctx:
 	put_ioctx(ctx);
 	return i ? i : ret;
 }
Index: linux-2.6.19-rc6-mm2/include/linux/aio_abi.h
===================================================================
--- linux-2.6.19-rc6-mm2.orig/include/linux/aio_abi.h	2006-11-28
12:51:45.000000000 +0100 +++
linux-2.6.19-rc6-mm2/include/linux/aio_abi.h	2006-11-28
12:51:48.000000000 +0100 @@ -43,6 +43,7 @@ enum { IOCB_CMD_NOOP = 6,
 	IOCB_CMD_PREADV = 7,
 	IOCB_CMD_PWRITEV = 8,
+	IOCB_CMD_GROUP = 9,
 };
 
 /* read() from /dev/aio returns these structures. */
Index: linux-2.6.19-rc6-mm2/include/linux/aio.h
===================================================================
--- linux-2.6.19-rc6-mm2.orig/include/linux/aio.h	2006-11-28
12:51:45.000000000 +0100 +++ linux-2.6.19-rc6-mm2/include/linux/aio.h
2006-11-28 12:51:48.000000000 +0100 @@ -58,6 +58,11 @@ struct aio_notify {
 	struct sigqueue		*sigq;
 };
 
+struct lio_event {
+	atomic_t		lio_users;
+	struct aio_notify	lio_notify;
+};
+
 /* is there a better place to document function pointer methods? */
 /**
  * ki_retry	-	iocb forward progress callback
@@ -113,6 +118,9 @@ struct kiocb {
 	wait_queue_t		ki_wait;
 	loff_t			ki_pos;
 
+	/* lio this iocb might be attached to */
+	struct lio_event	*ki_lio;
+
 	void			*private;
 	/* State that we remember to be able to restart/retry  */
 	unsigned short		ki_opcode;
@@ -220,12 +228,15 @@ struct mm_struct;
 extern void FASTCALL(exit_aio(struct mm_struct *mm));
 extern struct kioctx *lookup_ioctx(unsigned long ctx_id);
 extern int FASTCALL(io_submit_one(struct kioctx *ctx,
-			struct iocb __user *user_iocb, struct iocb *iocb));
+				  struct iocb __user *user_iocb, struct iocb
*iocb,
+				  struct lio_event *lio));
 
 /* semi private, but used by the 32bit emulations: */
 struct kioctx *lookup_ioctx(unsigned long ctx_id);
 int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
-				  struct iocb *iocb));
+			   struct iocb *iocb, struct lio_event *lio));
+struct lio_event *lio_create(struct sigevent __user *user_event);
+void lio_check(struct lio_event *lio);
 
 #define get_ioctx(kioctx) do {						\
 	BUG_ON(atomic_read(&(kioctx)->users) <= 0);			\
Index: linux-2.6.19-rc6-mm2/fs/compat.c
===================================================================
--- linux-2.6.19-rc6-mm2.orig/fs/compat.c	2006-11-28 12:51:45.000000000
+0100 +++ linux-2.6.19-rc6-mm2/fs/compat.c	2006-11-28 12:51:48.000000000
+0100 @@ -646,6 +646,8 @@ asmlinkage long
 compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb)
 {
 	struct kioctx *ctx;
+	struct lio_event *lio = NULL;
+	int lio_wait = 0;
 	long ret = 0;
 	int i;
 
@@ -694,12 +696,66 @@ compat_sys_io_submit(aio_context_t ctx_i
 			tmp.aio_sigeventp = (__u64)event;
 		}
 
-		ret = io_submit_one(ctx, user_iocb, &tmp);
+		if (tmp.aio_lio_opcode == IOCB_CMD_GROUP) {
 
-		if (ret)
-			break;
+			/* this command means that all following IO commands
+			 * are in the same group.
+			 *
+			 * Userspace either wants to be notified upon or block
until
+			 * completion of all the requests in the group.
+			 */
+			/*
+			 * Ignore an IOCB_CMD_GROUP request if we are already
+			 * processing one. This means only one listio per
+			 * io_submit call.
+			 */
+			if (lio)
+				continue;
+
+			lio = lio_create((struct sigevent __user *)(unsigned
long)
+					 tmp.aio_sigeventp);
+
+			ret = PTR_ERR(lio);
+
+			if (IS_ERR(lio))
+				goto out_put_ctx;
+
+			if (!tmp.aio_sigeventp)
+				lio_wait = 1;
+		} else {
+			if (lio)
+				atomic_inc(&lio->lio_users);
+
+			ret = io_submit_one(ctx, user_iocb, &tmp, lio);
+
+			if (ret) {
+				if (lio) {
+					/*
+					 * If a request failed, just decrement
+					 * the users count, but go on
submitting
+					 * subsequent requests.
+					 */
+					atomic_dec(&lio->lio_users);
+				} else
+					break;
+			}
+		}
+	}
+
+	if (lio) {
+		/*
+		 * Drop extra ref on the lio now that we're done submitting
+		 * requests
+		 */
+		lio_check(lio);
+
+		if (lio_wait) {
+			wait_event(ctx->wait, atomic_read(&lio->lio_users)==0);
+			kfree(lio);
+		}
 	}
 
+out_put_ctx:
 	put_ioctx(ctx);
 
 	return i? i: ret;

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 3/5][AIO] - export good_sigevent()
  2006-11-29 10:32 ` [PATCH -mm 3/5][AIO] - export good_sigevent() Sébastien Dugué
@ 2006-11-29 10:38   ` Christoph Hellwig
  2006-11-29 10:46     ` Sébastien Dugué
  2006-11-29 14:54   ` Christoph Hellwig
  2006-12-04 17:13   ` Bharata B Rao
  2 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2006-11-29 10:38 UTC (permalink / raw)
  To: S?bastien Dugu?
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

On Wed, Nov 29, 2006 at 11:32:34AM +0100, S?bastien Dugu? wrote:
> 
>                       Export good_sigevent()
> 
> 
>   Move good_sigevent() from posix-timers.c to signal.c where it belongs,
> and export it so that it can be used by other subsystems.

A little nitpick about the subject: we usually use the term 'export' when
adding an EXPORT_SYMBOL.  What would better describe your patch is

'make good_sigevent non-static'


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 3/5][AIO] - export good_sigevent()
  2006-11-29 10:38   ` Christoph Hellwig
@ 2006-11-29 10:46     ` Sébastien Dugué
  0 siblings, 0 replies; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-29 10:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

On Wed, 29 Nov 2006 10:38:25 +0000
Christoph Hellwig <hch@infradead.org> wrote:

> On Wed, Nov 29, 2006 at 11:32:34AM +0100, S?bastien Dugu? wrote:
> > 
> >                       Export good_sigevent()
> > 
> > 
> >   Move good_sigevent() from posix-timers.c to signal.c where it belongs,
> > and export it so that it can be used by other subsystems.
> 
> A little nitpick about the subject: we usually use the term 'export' when
> adding an EXPORT_SYMBOL.  What would better describe your patch is
> 
> 'make good_sigevent non-static'
> 

  No problem.

  Thanks,

  Sébastien.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 4/5][AIO] - AIO completion signal notification
  2006-11-29 10:33 ` [PATCH -mm 4/5][AIO] - AIO completion signal notification Sébastien Dugué
@ 2006-11-29 10:51   ` Christoph Hellwig
  2006-11-29 13:08     ` Sébastien Dugué
  2006-11-29 11:33   ` Jakub Jelinek
  1 sibling, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2006-11-29 10:51 UTC (permalink / raw)
  To: S?bastien Dugu?
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

I'm a little bit unhappy about the usage of the notify flag.  The usage
seems correct but very confusing:

In io_submit_one we set ki_notify.notify to SIGEV_NONE and possibly
call aio_setup_sigevent:

> +	/* handle setting up the sigevent for POSIX AIO signals */
> +	req->ki_notify.notify = SIGEV_NONE;
> +
> +	if (iocb->aio_sigeventp) {
> +		ret = aio_setup_sigevent(&req->ki_notify,
> +					 (struct sigevent __user *)(unsigned
> long)
> +					 iocb->aio_sigeventp);
> +		if (ret)
> +			goto out_put_req;
> +	}
> +

aio_setup_sigevent then checks the user passed even for which notify type
we have, and returns if it's none or otherwise sets notify->notify to it.

> +	if (event.sigev_notify == SIGEV_NONE)
> +		return 0;
> +
> +	notify->notify = event.sigev_notify;

Later aio_setup_sigevent gets a reference to the target task_structure
if notify->notify is (SIGEV_SIGNAL|SIGEV_THREAD_ID) but _always_ stores
the target pointer.

> +	if (notify->notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) {
> +		/*
> +		 * This reference will be dropped in really_put_req() when
> +		 * we're done with the request.
> +		 */
> +		get_task_struct(target);
> +	}
> +
> +	notify->target = target;


Once we're done with the iocb aio_complete aclls aio_send_signal if
notify.notify is not SIGEV_NONE.

> +	if (iocb->ki_notify.notify != SIGEV_NONE) {
> +		ret = aio_send_signal(&iocb->ki_notify);
> +
> +		/* If signal generation failed, release the sigqueue */
> +		if (ret)
> +			sigqueue_free(iocb->ki_notify.sigq);
> +	}
> +

Which then uses notify->target to send the signal:
> +	if (notify->notify & SIGEV_THREAD_ID)
> +		ret = send_sigqueue(notify->signo, sigq, notify->target);
> +	else
> +		ret = send_group_sigqueue(notify->signo, sigq, notify->target);

And finally really_put_req puts the target if notify.notify contains
either SIGEV_SIGNAL or SIGEV_THREAD_ID.

> +	/* Release task ref */
> +	if (req->ki_notify.notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
> +		put_task_struct(req->ki_notify.target);

Do you see the confusing?  I think all the notify.notify != SIGEV_NONE
in the above code should be replaces by the much more descriptive
notify.notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID). In addition we should
only store the target pointer inside the (SIGEV_SIGNAL|SIGEV_THREAD_ID)
if block that gets a reference to it.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 4/5][AIO] - AIO completion signal notification
  2006-11-29 10:33 ` [PATCH -mm 4/5][AIO] - AIO completion signal notification Sébastien Dugué
  2006-11-29 10:51   ` Christoph Hellwig
@ 2006-11-29 11:33   ` Jakub Jelinek
  2006-11-29 13:25     ` Sébastien Dugué
  1 sibling, 1 reply; 25+ messages in thread
From: Jakub Jelinek @ 2006-11-29 11:33 UTC (permalink / raw)
  To: Sebastian Dugue
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

On Wed, Nov 29, 2006 at 11:33:01AM +0100, S?bastien Dugu? wrote:
>                       AIO completion signal notification
> 
>   The current 2.6 kernel does not support notification of user space via
> an RT signal upon an asynchronous IO completion. The POSIX specification
> states that when an AIO request completes, a signal can be delivered to
> the application as notification.
> 
>   This patch adds a struct sigevent *aio_sigeventp to the iocb.
> The relevant fields (pid, signal number and value) are stored in the kiocb
> for use when the request completes.
> 
>   That sigevent structure is filled by the application as part of the AIO
> request preparation. Upon request completion, the kernel notifies the
> application using those sigevent parameters. If SIGEV_NONE has been specified,
> then the old behaviour is retained and the application must rely on polling
> the completion queue using io_getevents().

Well, from what I see applications must rely on polling the completion
queue using io_getevents() in any case, isn't that the only way how to free
the kernel resources associated with the AIO request, even if it uses
SIGEV_SIGNAL or thread notification?  aio_error/aio_return/aio_suspend
will still need to io_getevents, the only important difference with this
patch is that a) the polling doesn't need to be asynchronous (i.e. have
a special thread which just loops doing io_getevents)
b) it doesn't need to care about notification itself.

	Jakub

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 4/5][AIO] - AIO completion signal notification
  2006-11-29 10:51   ` Christoph Hellwig
@ 2006-11-29 13:08     ` Sébastien Dugué
  2006-11-29 13:50       ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-29 13:08 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

On Wed, 29 Nov 2006 10:51:50 +0000, Christoph Hellwig <hch@infradead.org> wrote:

> I'm a little bit unhappy about the usage of the notify flag.  The usage
> seems correct but very confusing:

  Well, I followed the logic from posix-timers.c, but it may be a poor
choice ;-)

  For a start, the SIGEV_* flags are quite confusing (for me at least).
SIGEV_SIGNAL is defined as 0, SIGEV_NONE as 1 and SIGEV_THREAD_ID as 4. I
would rather have seen SIGEV_NONE defined as 0 to avoid all this.

  I also wish I knew why those SIGEV_* constants were defined that way.

> 
> In io_submit_one we set ki_notify.notify to SIGEV_NONE and possibly
> call aio_setup_sigevent:
> 
> > +	/* handle setting up the sigevent for POSIX AIO signals */
> > +	req->ki_notify.notify = SIGEV_NONE;
> > +
> > +	if (iocb->aio_sigeventp) {
> > +		ret = aio_setup_sigevent(&req->ki_notify,
> > +					 (struct sigevent __user *)(unsigned
> > long)
> > +					 iocb->aio_sigeventp);
> > +		if (ret)
> > +			goto out_put_req;
> > +	}
> > +
> 
> aio_setup_sigevent then checks the user passed even for which notify type
> we have, and returns if it's none or otherwise sets notify->notify to it.
> 
> > +	if (event.sigev_notify == SIGEV_NONE)
> > +		return 0;
> > +
> > +	notify->notify = event.sigev_notify;
> 
> Later aio_setup_sigevent gets a reference to the target task_structure
> if notify->notify is (SIGEV_SIGNAL|SIGEV_THREAD_ID) but _always_ stores
> the target pointer.

  Yep, as SIGEV_SIGNAL is 0, this in fact checks that notify is SIGEV_THREAD_ID.
It could have been written as:

	if (notify->notify == SIGEV_THREAD_ID)

  And the target pointer is always store because at this point notify is either
SIGEV_SIGNAL or SIGEV_THREAD_ID.

> 
> > +	if (notify->notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) {
> > +		/*
> > +		 * This reference will be dropped in really_put_req() when
> > +		 * we're done with the request.
> > +		 */
> > +		get_task_struct(target);
> > +	}
> > +
> > +	notify->target = target;
> 
> 
> Once we're done with the iocb aio_complete aclls aio_send_signal if
> notify.notify is not SIGEV_NONE.

  Again, if it's not SIGEV_NONE, then it's either SIGEV_SIGNAL or
SIGEV_THREAD_ID.

> 
> > +	if (iocb->ki_notify.notify != SIGEV_NONE) {
> > +		ret = aio_send_signal(&iocb->ki_notify);
> > +
> > +		/* If signal generation failed, release the sigqueue */
> > +		if (ret)
> > +			sigqueue_free(iocb->ki_notify.sigq);
> > +	}
> > +
> 
> Which then uses notify->target to send the signal:
> > +	if (notify->notify & SIGEV_THREAD_ID)
> > +		ret = send_sigqueue(notify->signo, sigq, notify->target);
> > +	else
> > +		ret = send_group_sigqueue(notify->signo, sigq, notify->target);
> 
> And finally really_put_req puts the target if notify.notify contains
> either SIGEV_SIGNAL or SIGEV_THREAD_ID.
> 
> > +	/* Release task ref */
> > +	if (req->ki_notify.notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
> > +		put_task_struct(req->ki_notify.target);

  Could have been if (req->ki_notify.notify == SIGEV_THREAD_ID)

> 
> Do you see the confusing?  I think all the notify.notify != SIGEV_NONE
> in the above code should be replaces by the much more descriptive
> notify.notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID). In addition we should
> only store the target pointer inside the (SIGEV_SIGNAL|SIGEV_THREAD_ID)
> if block that gets a reference to it.

  No, I don't think so, notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID) really means
notify == SIGEV_THREAD_ID which leaves out notify == SIGEV_SIGNAL. Or
am I grossly mistaken?

  Thanks,

  Sébastien.



> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 4/5][AIO] - AIO completion signal notification
  2006-11-29 11:33   ` Jakub Jelinek
@ 2006-11-29 13:25     ` Sébastien Dugué
  0 siblings, 0 replies; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-29 13:25 UTC (permalink / raw)
  To: Jakub Jelinek
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

On Wed, 29 Nov 2006 06:33:35 -0500, Jakub Jelinek <jakub@redhat.com> wrote:

> On Wed, Nov 29, 2006 at 11:33:01AM +0100, S?bastien Dugu? wrote:
> >                       AIO completion signal notification
> > 
> >   The current 2.6 kernel does not support notification of user space via
> > an RT signal upon an asynchronous IO completion. The POSIX specification
> > states that when an AIO request completes, a signal can be delivered to
> > the application as notification.
> > 
> >   This patch adds a struct sigevent *aio_sigeventp to the iocb.
> > The relevant fields (pid, signal number and value) are stored in the kiocb
> > for use when the request completes.
> > 
> >   That sigevent structure is filled by the application as part of the AIO
> > request preparation. Upon request completion, the kernel notifies the
> > application using those sigevent parameters. If SIGEV_NONE has been specified,
> > then the old behaviour is retained and the application must rely on polling
> > the completion queue using io_getevents().
> 
> Well, from what I see applications must rely on polling the completion
> queue using io_getevents() in any case, isn't that the only way how to free
> the kernel resources associated with the AIO request, even if it uses
> SIGEV_SIGNAL or thread notification?

  Well, applications do not need to do any polling on the queue anymore.
io_getevents() needs to be called only once when the signal has been received,
either from a signal handler or from a thread blocking on the signal.

> aio_error/aio_return/aio_suspend
> will still need to io_getevents,

  Right, but only once.

> the only important difference with this
> patch is that a) the polling doesn't need to be asynchronous (i.e. have
> a special thread which just loops doing io_getevents)

  Yes, no more polling loop and I think this makes a big difference.

> b) it doesn't need to care about notification itself.
> 

  Uh! what do you mean here?

  Sébastien.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 4/5][AIO] - AIO completion signal notification
  2006-11-29 13:08     ` Sébastien Dugué
@ 2006-11-29 13:50       ` Christoph Hellwig
  2006-11-29 14:18         ` Sébastien Dugué
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2006-11-29 13:50 UTC (permalink / raw)
  To: S?bastien Dugu?
  Cc: Christoph Hellwig, linux-kernel, linux-aio, Andrew Morton,
	Suparna Bhattacharya, Zach Brown, Badari Pulavarty,
	Ulrich Drepper, Jean Pierre Dion

On Wed, Nov 29, 2006 at 02:08:01PM +0100, S?bastien Dugu? wrote:
> On Wed, 29 Nov 2006 10:51:50 +0000, Christoph Hellwig <hch@infradead.org> wrote:
> 
> > I'm a little bit unhappy about the usage of the notify flag.  The usage
> > seems correct but very confusing:
> 
>   Well, I followed the logic from posix-timers.c, but it may be a poor
> choice ;-)
> 
>   For a start, the SIGEV_* flags are quite confusing (for me at least).
> SIGEV_SIGNAL is defined as 0, SIGEV_NONE as 1 and SIGEV_THREAD_ID as 4. I
> would rather have seen SIGEV_NONE defined as 0 to avoid all this.
> 
>   I also wish I knew why those SIGEV_* constants were defined that way.

Ah, I missed that.  It explains some of the more wierd bits.  I suspect
we should then use != SIGEV_NONE for the any kind of signal notification
bit and == SIGEV_THREAD_ID for the case where we want to deliver to
a particular thread.

But this means we only get a thread reference for SIGEV_THREAD_ID
here:

> > > +	if (notify->notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) {
> > > +		/*
> > > +		 * This reference will be dropped in really_put_req() when
> > > +		 * we're done with the request.
> > > +		 */
> > > +		get_task_struct(target);
> > > +	}

But even use it for SIGEV_SIGNAL without SIGEV_THREAD_ID here:

> > > +	if (notify->notify & SIGEV_THREAD_ID)
> > > +		ret = send_sigqueue(notify->signo, sigq, notify->target);
> > > +	else
> > > +		ret = send_group_sigqueue(notify->signo, sigq, notify->target);

Or do I miss something?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 4/5][AIO] - AIO completion signal notification
  2006-11-29 13:50       ` Christoph Hellwig
@ 2006-11-29 14:18         ` Sébastien Dugué
  0 siblings, 0 replies; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-29 14:18 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Zach Brown, Badari Pulavarty, Ulrich Drepper, Jean Pierre Dion

On Wed, 29 Nov 2006 13:50:12 +0000, Christoph Hellwig <hch@infradead.org> wrote:

> On Wed, Nov 29, 2006 at 02:08:01PM +0100, S?bastien Dugu? wrote:
> > On Wed, 29 Nov 2006 10:51:50 +0000, Christoph Hellwig <hch@infradead.org> wrote:
> > 
> > > I'm a little bit unhappy about the usage of the notify flag.  The usage
> > > seems correct but very confusing:
> > 
> >   Well, I followed the logic from posix-timers.c, but it may be a poor
> > choice ;-)
> > 
> >   For a start, the SIGEV_* flags are quite confusing (for me at least).
> > SIGEV_SIGNAL is defined as 0, SIGEV_NONE as 1 and SIGEV_THREAD_ID as 4. I
> > would rather have seen SIGEV_NONE defined as 0 to avoid all this.
> > 
> >   I also wish I knew why those SIGEV_* constants were defined that way.
> 
> Ah, I missed that.  It explains some of the more wierd bits.  I suspect
> we should then use != SIGEV_NONE for the any kind of signal notification
> bit and == SIGEV_THREAD_ID for the case where we want to deliver to
> a particular thread.

  Right, that would make things much cleaner. Will try for it.

> 
> But this means we only get a thread reference for SIGEV_THREAD_ID
> here:
> 
> > > > +	if (notify->notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) {
> > > > +		/*
> > > > +		 * This reference will be dropped in really_put_req() when
> > > > +		 * we're done with the request.
> > > > +		 */
> > > > +		get_task_struct(target);
> > > > +	}

  It's the way it is in posix-timers and I'm not sure I understand why. We take
a ref on the specific task if notify is SIGEV_THREAD_ID but not for
SIGEV_SIGNAL.

  I'm wondering what I'm missing here, shouldn't we also take a ref on the task
group leader in the SIGEV_SIGNAL case in posix-timers? 

> 
> But even use it for SIGEV_SIGNAL without SIGEV_THREAD_ID here:
> 
> > > > +	if (notify->notify & SIGEV_THREAD_ID)
> > > > +		ret = send_sigqueue(notify->signo, sigq, notify->target);
> > > > +	else
> > > > +		ret = send_group_sigqueue(notify->signo, sigq, notify->target);
> 
> Or do I miss something?

  I missing something too here ;-)

  If someone cared to explain why there is no ref taken on the task for the
SIGEV_SIGNAL case, it would be much appreciated. Is this a bug in posix-timers?


  Thanks,

  Sébastien.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 3/5][AIO] - export good_sigevent()
  2006-11-29 10:32 ` [PATCH -mm 3/5][AIO] - export good_sigevent() Sébastien Dugué
  2006-11-29 10:38   ` Christoph Hellwig
@ 2006-11-29 14:54   ` Christoph Hellwig
  2006-11-29 16:10     ` Sébastien Dugué
  2006-12-04 17:13   ` Bharata B Rao
  2 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2006-11-29 14:54 UTC (permalink / raw)
  To: S?bastien Dugu?
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

> +/***
> + * good_sigevent - check and get target task from a sigevent.
> + * @event: the sigevent to be checked
> + *
> + * This function must be called with tasklist_lock held for reading.
> + */
> +struct task_struct * good_sigevent(sigevent_t * event)
> +{
> +	struct task_struct *rtn = current->group_leader;
> +
> +	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
> +		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
> +		 rtn->tgid != current->tgid ||
> +		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
> +		return NULL;
> +
> +	if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
> +	    ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
> +		return NULL;
> +
> +	return rtn;
> +}

And while we're at it we should badly beat up the person that wrote this
mess in the first time.  To be somewhat readable it should look like:

static struct task_struct *good_sigevent(sigevent_t *event)
{
	struct task_struct *task = current->group_leader;

	if ((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) {
		if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
			return NULL;
	}

	if (event->sigev_notify & SIGEV_THREAD_ID) {
		if ((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)
			return NULL;
		task = find_task_by_pid(event->sigev_notify_thread_id);
		if (!task || task->tgid != current->tgid)
			return NULL;
	}

	return task;
}

And btw, looking at its currentl caller I see why we need the PF_EXITING
flag I recommended to remove easiler on, it even has a big comment that
we should copy & paste to aio.c aswell.  Still no idea why it's doing
the selectiv reference grabbing, though.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 3/5][AIO] - export good_sigevent()
  2006-11-29 14:54   ` Christoph Hellwig
@ 2006-11-29 16:10     ` Sébastien Dugué
  0 siblings, 0 replies; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-29 16:10 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

On Wed, 29 Nov 2006 14:54:25 +0000, Christoph Hellwig <hch@infradead.org> wrote:

> > +/***
> > + * good_sigevent - check and get target task from a sigevent.
> > + * @event: the sigevent to be checked
> > + *
> > + * This function must be called with tasklist_lock held for reading.
> > + */
> > +struct task_struct * good_sigevent(sigevent_t * event)
> > +{
> > +	struct task_struct *rtn = current->group_leader;
> > +
> > +	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
> > +		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
> > +		 rtn->tgid != current->tgid ||
> > +		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
> > +		return NULL;
> > +
> > +	if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
> > +	    ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
> > +		return NULL;
> > +
> > +	return rtn;
> > +}
> 
> And while we're at it we should badly beat up the person that wrote this
> mess in the first time.

  Agreed.

>  To be somewhat readable it should look like:
> 
> static struct task_struct *good_sigevent(sigevent_t *event)
> {
> 	struct task_struct *task = current->group_leader;
> 
> 	if ((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) {
> 		if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
> 			return NULL;
> 	}
> 
> 	if (event->sigev_notify & SIGEV_THREAD_ID) {
> 		if ((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)
> 			return NULL;
> 		task = find_task_by_pid(event->sigev_notify_thread_id);
> 		if (!task || task->tgid != current->tgid)
> 			return NULL;
> 	}
> 
> 	return task;
> }

  Yes, will incorporate this.

> 
> And btw, looking at its currentl caller I see why we need the PF_EXITING
> flag I recommended to remove easiler on, it even has a big comment that
> we should copy & paste to aio.c aswell.

  Well, I do not take the siglock anymore, so I don't think the comment
really applies here.

>  Still no idea why it's doing
> the selectiv reference grabbing, though.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 1/5][AIO] - Rework compat_sys_io_submit
  2006-11-29 10:32 ` [PATCH -mm 1/5][AIO] - Rework compat_sys_io_submit Sébastien Dugué
@ 2006-11-30  0:47   ` Zach Brown
  2006-11-30  9:57     ` Sébastien Dugué
  0 siblings, 1 reply; 25+ messages in thread
From: Zach Brown @ 2006-11-30  0:47 UTC (permalink / raw)
  To: Sébastien Dugué
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

On Nov 29, 2006, at 2:32 AM, Sébastien Dugué wrote:

>                  compat_sys_io_submit() cleanup
>
>
>   Cleanup compat_sys_io_submit by duplicating some of the native  
> syscall
> logic in the compat layer and directly calling io_submit_one() instead
> of fooling the syscall into thinking it is called from a native 64-bit
> caller.
>
>   This is needed for the completion notification patch to avoid having
> to rewrite each iocb on the caller stack for sys_io_submit() to  
> find the
> sigevents.

You could explicitly mention that this eliminates:

  - the overhead of copying nr pointers on the userspace caller's stack

  - the arbitrary PAGE_SIZE/(sizeof(void *)) limit on the number of  
iocbs that can be submitted

Those alone make this worth merging.

> +	if (unlikely(!access_ok(VERIFY_READ, iocb, (nr * sizeof(u32)))))
> +		return -EFAULT;

I'm glad you got that right :)  I no doubt would have initially  
hoisted these little checks into a shared helper function and missed  
that detail of getting the size of the access_ok() right in the  
compat case.

> +	put_ioctx(ctx);
> +
> +	return i? i: ret;

sys_io_getevents() reads:

         put_ioctx(ctx);
         return i ? i : ret;

So while this compat_sys_io_submit() logic seems fine and I would be  
comfortable with it landing as-is, I'd also appreciate it if we  
didn't introduce differences between the two functions when it seems  
just as easy to make them the same.  (That chunk is just one  
example.  There's whitespace, missing unlikely()s, etc).

- z

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 5/5][AIO] - Listio support
  2006-11-29 10:33 ` [PATCH -mm 5/5][AIO] - Listio support Sébastien Dugué
@ 2006-11-30  8:25   ` Suparna Bhattacharya
  2006-11-30 10:04     ` Sébastien Dugué
  0 siblings, 1 reply; 25+ messages in thread
From: Suparna Bhattacharya @ 2006-11-30  8:25 UTC (permalink / raw)
  To: =?iso-8859-1?Q?S=E9bastien_Dugu=E9_=3Csebastien=2Edugue=40bull=2Enet?=.=?iso-8859-1?Q?=3E?=
  Cc: linux-kernel, linux-aio, Andrew Morton, Christoph Hellwig,
	Zach Brown, Badari Pulavarty, Ulrich Drepper, Jean Pierre Dion


Could you mention changes in this patch since your last post ? 

BTW, if I try to apply your patches, I get the following error (diffstat
works ok, but something has mangled the patch, maybe mailer problems ?)

patch: **** Only garbage was found in the patch input.

Regards
Suparna

On Wed, Nov 29, 2006 at 11:33:26AM +0100, Sébastien Dugué wrote:
>   This patch adds POSIX listio completion notification support. It builds
> on support provided by the aio signal notification patch and adds an
> IOCB_CMD_GROUP command to io_submit().
> 
>   The purpose of IOCB_CMD_GROUP is to group together the following requests in
> the list up to the end of the list sumbitted to io_submit.
> 
>   As io_submit already accepts an array of iocbs, as part of listio submission,
> the user process prepends to a list of requests an empty special aiocb with
> an aio_lio_opcode of IOCB_CMD_GROUP, filling only the aio_sigevent fields.
> 
> 
>   An IOCB_CMD_GROUP is added to the IOCB_CMD enum in include/linux/aio_abi.h
> 
>   A struct lio_event is added in include/linux/aio.h
> 
>   A struct lio_event *ki_lio is added to struct iocb in include/linux/aio.h
> 
>   In io_submit(), upon detecting such an IOCB_CMD_GROUP marker iocb, an
> lio_event is created in lio_create() which contains the necessary information
> for signaling a thread (signal number, pid, notify type and value) along with
> a count of requests attached to this event.
> 
>         The following depicts the lio_event structure:
> 
>         struct lio_event {
>                 atomic_t        	lio_users;
>                 struct aio_notify	lio_notify;
>         };
> 
>   lio_users holds an atomic counter of the number of requests attached to this
> lio. It is incremented with each request submitted and decremented at each
> request completion. When the counter reaches 0, we send the notification.
> 
>   Each subsequent submitted request is attached to this lio_event by setting
> the request kiocb->ki_lio to that lio_event (in io_submit_one()) and
> incrementing the lio_users count.
> 
>   In aio_complete(), if the request is attached to an lio (ki_lio <> 0),
> then lio_check() is called to decrement the lio_users count and eventually
> signal the user process when all the requests in the group have completed.
> 
> 
>   The IOCB_CMD_GROUP semantic is as follows:
> 
>        - if the associated sigevent is NULL then we want to group
>          requests for the purpose of blocking on the group completion
>          (LIO_WAIT sync behavior).
> 
>        - if the associated sigevent is valid (not NULL) then we want to
>          group requests for the purpose of being notified upon that
>          group of requests completion (LIO_NOWAIT async behaviour).
> 
> 
> 
>  fs/aio.c                |  123 ++++++++++++++++++++++++++++++++++++++++++++++--
>  fs/compat.c             |   62 +++++++++++++++++++++++-
>  include/linux/aio.h     |   15 +++++
>  include/linux/aio_abi.h |    1
>  4 files changed, 192 insertions(+), 9 deletions(-)
> 
> Signed-off-by: Sébastien Dugué <sebastien.dugue@bull.net>
> Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>
> 
> Index: linux-2.6.19-rc6-mm2/fs/aio.c
> ===================================================================
> --- linux-2.6.19-rc6-mm2.orig/fs/aio.c	2006-11-28 12:51:45.000000000
> +0100 +++ linux-2.6.19-rc6-mm2/fs/aio.c	2006-11-28 12:51:48.000000000
> +0100 @@ -414,6 +414,7 @@ static struct kiocb fastcall *__aio_get_
>  	req->ki_cancel = NULL;
>  	req->ki_retry = NULL;
>  	req->ki_dtor = NULL;
> +	req->ki_lio = NULL;
>  	req->private = NULL;
>  	req->ki_iovec = NULL;
>  	req->ki_notify.sigq = NULL;
> @@ -1009,6 +1010,53 @@ out_unlock:
>  	return -EINVAL;
>  }
> 
> +void lio_check(struct lio_event *lio)
> +{
> +	int ret;
> +
> +	ret = atomic_dec_and_test(&lio->lio_users);
> +
> +	if (unlikely(ret) && lio->lio_notify.notify != SIGEV_NONE) {
> +		/* last one -> notify process */
> +		aio_send_signal(&lio->lio_notify);
> +		kfree(lio);
> +	}
> +}
> +
> +struct lio_event *lio_create(struct sigevent __user *user_event)
> +{
> +	int ret = 0;
> +	struct lio_event *lio = NULL;
> +
> +	lio = kzalloc(sizeof(*lio), GFP_KERNEL);
> +
> +	if (!lio)
> +		return ERR_PTR(-EAGAIN);
> +
> +	/*
> +	 * Grab an initial ref on the lio to avoid races between
> +	 * submission and completion.
> +	 */
> +	atomic_set(&lio->lio_users, 1);
> +
> +	lio->lio_notify.notify = SIGEV_NONE;
> +
> +	if (user_event) {
> +		/*
> +		 * User specified an event for this lio,
> +		 * he wants to be notified upon lio completion.
> +		 */
> +		ret = aio_setup_sigevent(&lio->lio_notify, user_event);
> +
> +		if (ret) {
> +			kfree(lio);
> +			return ERR_PTR(ret);
> +		}
> +	}
> +
> +	return lio;
> +}
> +
>  /* aio_complete
>   *	Called when the io request on the given iocb is complete.
>   *	Returns true if this is the last user of the request.  The
> @@ -1057,8 +1105,12 @@ int fastcall aio_complete(struct kiocb *
>  	 * when the event got cancelled.
>  	 */
>  	if (kiocbIsCancelled(iocb)) {
> +		if (iocb->ki_lio)
> +			lio_check(iocb->ki_lio);
> +
>  		if (iocb->ki_notify.sigq)
>  			sigqueue_free(iocb->ki_notify.sigq);
> +
>  		goto put_rq;
>  	}
> 
> @@ -1099,6 +1151,9 @@ int fastcall aio_complete(struct kiocb *
>  			sigqueue_free(iocb->ki_notify.sigq);
>  	}
> 
> +	if (iocb->ki_lio)
> +		lio_check(iocb->ki_lio);
> +
>  	pr_debug("%ld retries: %zd of %zd\n", iocb->ki_retried,
>  		iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
>  put_rq:
> @@ -1633,7 +1688,7 @@ static int aio_wake_function(wait_queue_
>  }
> 
>  int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
> -			 struct iocb *iocb)
> +			   struct iocb *iocb, struct lio_event *lio)
>  {
>  	struct kiocb *req;
>  	struct file *file;
> @@ -1695,6 +1750,9 @@ int fastcall io_submit_one(struct kioctx
>  			goto out_put_req;
>  	}
> 
> +	/* Attach this iocb to its lio */
> +	req->ki_lio = lio;
> +
>  	ret = aio_setup_iocb(req);
> 
>  	if (ret)
> @@ -1738,6 +1796,8 @@ asmlinkage long sys_io_submit(aio_contex
>  			      struct iocb __user * __user *iocbpp)
>  {
>  	struct kioctx *ctx;
> +	struct lio_event *lio = NULL;
> +	int lio_wait = 0;
>  	long ret = 0;
>  	int i;
> 
> @@ -1771,11 +1831,66 @@ asmlinkage long sys_io_submit(aio_contex
>  			break;
>  		}
> 
> -		ret = io_submit_one(ctx, user_iocb, &tmp);
> -		if (ret)
> -			break;
> +		if (tmp.aio_lio_opcode == IOCB_CMD_GROUP) {
> +
> +			/* this command means that all following IO commands
> +			 * are in the same group.
> +			 *
> +			 * Userspace either wants to be notified upon or block
> until
> +			 * completion of all the requests in the group.
> +			 */
> +			/*
> +			 * Ignore an IOCB_CMD_GROUP request if we are already
> +			 * processing one. This means only one listio per
> +			 * io_submit call.
> +			 */
> +			if (lio)
> +				continue;
> +
> +			lio = lio_create((struct sigevent __user *)(unsigned
> long)
> +					 tmp.aio_sigeventp);
> +
> +			ret = PTR_ERR(lio);
> +
> +			if (IS_ERR(lio))
> +				goto out_put_ctx;
> +
> +			if (!tmp.aio_sigeventp)
> +				lio_wait = 1;
> +		} else {
> +			if (lio)
> +				atomic_inc(&lio->lio_users);
> +
> +			ret = io_submit_one(ctx, user_iocb, &tmp, lio);
> +
> +			if (ret) {
> +				if (lio) {
> +					/*
> +					 * If a request failed, just decrement
> +					 * the users count, but go on
> submitting
> +					 * subsequent requests.
> +					 */
> +					atomic_dec(&lio->lio_users);
> +				} else
> +					break;
> +			}
> +		}
> +	}
> +
> +	if (lio) {
> +		/*
> +		 * Drop extra ref on the lio now that we're done submitting
> +		 * requests
> +		 */
> +		lio_check(lio);
> +
> +		if (lio_wait) {
> +			wait_event(ctx->wait, atomic_read(&lio->lio_users)==0);
> +			kfree(lio);
> +		}
>  	}
> 
> +out_put_ctx:
>  	put_ioctx(ctx);
>  	return i ? i : ret;
>  }
> Index: linux-2.6.19-rc6-mm2/include/linux/aio_abi.h
> ===================================================================
> --- linux-2.6.19-rc6-mm2.orig/include/linux/aio_abi.h	2006-11-28
> 12:51:45.000000000 +0100 +++
> linux-2.6.19-rc6-mm2/include/linux/aio_abi.h	2006-11-28
> 12:51:48.000000000 +0100 @@ -43,6 +43,7 @@ enum { IOCB_CMD_NOOP = 6,
>  	IOCB_CMD_PREADV = 7,
>  	IOCB_CMD_PWRITEV = 8,
> +	IOCB_CMD_GROUP = 9,
>  };
> 
>  /* read() from /dev/aio returns these structures. */
> Index: linux-2.6.19-rc6-mm2/include/linux/aio.h
> ===================================================================
> --- linux-2.6.19-rc6-mm2.orig/include/linux/aio.h	2006-11-28
> 12:51:45.000000000 +0100 +++ linux-2.6.19-rc6-mm2/include/linux/aio.h
> 2006-11-28 12:51:48.000000000 +0100 @@ -58,6 +58,11 @@ struct aio_notify {
>  	struct sigqueue		*sigq;
>  };
> 
> +struct lio_event {
> +	atomic_t		lio_users;
> +	struct aio_notify	lio_notify;
> +};
> +
>  /* is there a better place to document function pointer methods? */
>  /**
>   * ki_retry	-	iocb forward progress callback
> @@ -113,6 +118,9 @@ struct kiocb {
>  	wait_queue_t		ki_wait;
>  	loff_t			ki_pos;
> 
> +	/* lio this iocb might be attached to */
> +	struct lio_event	*ki_lio;
> +
>  	void			*private;
>  	/* State that we remember to be able to restart/retry  */
>  	unsigned short		ki_opcode;
> @@ -220,12 +228,15 @@ struct mm_struct;
>  extern void FASTCALL(exit_aio(struct mm_struct *mm));
>  extern struct kioctx *lookup_ioctx(unsigned long ctx_id);
>  extern int FASTCALL(io_submit_one(struct kioctx *ctx,
> -			struct iocb __user *user_iocb, struct iocb *iocb));
> +				  struct iocb __user *user_iocb, struct iocb
> *iocb,
> +				  struct lio_event *lio));
> 
>  /* semi private, but used by the 32bit emulations: */
>  struct kioctx *lookup_ioctx(unsigned long ctx_id);
>  int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
> -				  struct iocb *iocb));
> +			   struct iocb *iocb, struct lio_event *lio));
> +struct lio_event *lio_create(struct sigevent __user *user_event);
> +void lio_check(struct lio_event *lio);
> 
>  #define get_ioctx(kioctx) do {						\
>  	BUG_ON(atomic_read(&(kioctx)->users) <= 0);			\
> Index: linux-2.6.19-rc6-mm2/fs/compat.c
> ===================================================================
> --- linux-2.6.19-rc6-mm2.orig/fs/compat.c	2006-11-28 12:51:45.000000000
> +0100 +++ linux-2.6.19-rc6-mm2/fs/compat.c	2006-11-28 12:51:48.000000000
> +0100 @@ -646,6 +646,8 @@ asmlinkage long
>  compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb)
>  {
>  	struct kioctx *ctx;
> +	struct lio_event *lio = NULL;
> +	int lio_wait = 0;
>  	long ret = 0;
>  	int i;
> 
> @@ -694,12 +696,66 @@ compat_sys_io_submit(aio_context_t ctx_i
>  			tmp.aio_sigeventp = (__u64)event;
>  		}
> 
> -		ret = io_submit_one(ctx, user_iocb, &tmp);
> +		if (tmp.aio_lio_opcode == IOCB_CMD_GROUP) {
> 
> -		if (ret)
> -			break;
> +			/* this command means that all following IO commands
> +			 * are in the same group.
> +			 *
> +			 * Userspace either wants to be notified upon or block
> until
> +			 * completion of all the requests in the group.
> +			 */
> +			/*
> +			 * Ignore an IOCB_CMD_GROUP request if we are already
> +			 * processing one. This means only one listio per
> +			 * io_submit call.
> +			 */
> +			if (lio)
> +				continue;
> +
> +			lio = lio_create((struct sigevent __user *)(unsigned
> long)
> +					 tmp.aio_sigeventp);
> +
> +			ret = PTR_ERR(lio);
> +
> +			if (IS_ERR(lio))
> +				goto out_put_ctx;
> +
> +			if (!tmp.aio_sigeventp)
> +				lio_wait = 1;
> +		} else {
> +			if (lio)
> +				atomic_inc(&lio->lio_users);
> +
> +			ret = io_submit_one(ctx, user_iocb, &tmp, lio);
> +
> +			if (ret) {
> +				if (lio) {
> +					/*
> +					 * If a request failed, just decrement
> +					 * the users count, but go on
> submitting
> +					 * subsequent requests.
> +					 */
> +					atomic_dec(&lio->lio_users);
> +				} else
> +					break;
> +			}
> +		}
> +	}
> +
> +	if (lio) {
> +		/*
> +		 * Drop extra ref on the lio now that we're done submitting
> +		 * requests
> +		 */
> +		lio_check(lio);
> +
> +		if (lio_wait) {
> +			wait_event(ctx->wait, atomic_read(&lio->lio_users)==0);
> +			kfree(lio);
> +		}
>  	}
> 
> +out_put_ctx:
>  	put_ioctx(ctx);
> 
>  	return i? i: ret;
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-aio' in
> the body to majordomo@kvack.org.  For more info on Linux AIO,
> see: http://www.kvack.org/aio/
> Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

-- 
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 1/5][AIO] - Rework compat_sys_io_submit
  2006-11-30  0:47   ` Zach Brown
@ 2006-11-30  9:57     ` Sébastien Dugué
  2006-11-30 17:27       ` Zach Brown
  0 siblings, 1 reply; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-30  9:57 UTC (permalink / raw)
  To: Zach Brown
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

On Wed, 29 Nov 2006 16:47:47 -0800, Zach Brown <zach.brown@oracle.com> wrote:

> On Nov 29, 2006, at 2:32 AM, Sébastien Dugué wrote:
> 
> >                  compat_sys_io_submit() cleanup
> >
> >
> >   Cleanup compat_sys_io_submit by duplicating some of the native  
> > syscall
> > logic in the compat layer and directly calling io_submit_one() instead
> > of fooling the syscall into thinking it is called from a native 64-bit
> > caller.
> >
> >   This is needed for the completion notification patch to avoid having
> > to rewrite each iocb on the caller stack for sys_io_submit() to  
> > find the
> > sigevents.
> 
> You could explicitly mention that this eliminates:
> 
>   - the overhead of copying nr pointers on the userspace caller's stack
> 
>   - the arbitrary PAGE_SIZE/(sizeof(void *)) limit on the number of  
> iocbs that can be submitted
> 
> Those alone make this worth merging.

  Right, will add.

> 
> > +	if (unlikely(!access_ok(VERIFY_READ, iocb, (nr * sizeof(u32)))))
> > +		return -EFAULT;
> 
> I'm glad you got that right :)  I no doubt would have initially  
> hoisted these little checks into a shared helper function and missed  
> that detail of getting the size of the access_ok() right in the  
> compat case.

  Thanks.

> 
> > +	put_ioctx(ctx);
> > +
> > +	return i? i: ret;
> 
> sys_io_getevents() reads:

 uh!     ^^^^^^^^^    you must be meaning sys_io_submit()?

> 
>          put_ioctx(ctx);
>          return i ? i : ret;
> 
> So while this compat_sys_io_submit() logic seems fine and I would be  
> comfortable with it landing as-is, I'd also appreciate it if we  
> didn't introduce differences between the two functions when it seems  
> just as easy to make them the same.  (That chunk is just one  
> example.  There's whitespace, missing unlikely()s, etc).
> 

  OK, will fix.

  Thanks,

  Sébastien.


  

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 5/5][AIO] - Listio support
  2006-11-30  8:25   ` Suparna Bhattacharya
@ 2006-11-30 10:04     ` Sébastien Dugué
  0 siblings, 0 replies; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-30 10:04 UTC (permalink / raw)
  To: suparna
  Cc: linux-kernel, linux-aio, Andrew Morton, Christoph Hellwig,
	Zach Brown, Badari Pulavarty, Ulrich Drepper, Jean Pierre Dion

On Thu, 30 Nov 2006 13:55:36 +0530, Suparna Bhattacharya <suparna@in.ibm.com> wrote:

> 
> Could you mention changes in this patch since your last post ? 

  Aside from adding compat support, nothing changed here.

> 
> BTW, if I try to apply your patches, I get the following error (diffstat
> works ok, but something has mangled the patch, maybe mailer problems ?)
> 
> patch: **** Only garbage was found in the patch input.
> 

  My fault, didn't notice that my mailer did wrap the inserted patches.
Arghhh, sorry. Next round soon.



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 1/5][AIO] - Rework compat_sys_io_submit
  2006-11-30  9:57     ` Sébastien Dugué
@ 2006-11-30 17:27       ` Zach Brown
  0 siblings, 0 replies; 25+ messages in thread
From: Zach Brown @ 2006-11-30 17:27 UTC (permalink / raw)
  To: Sébastien Dugué
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

>>
>> sys_io_getevents() reads:
>
>  uh!     ^^^^^^^^^    you must be meaning sys_io_submit()?

Heh, yes, of course.  Damn these fingers!

- z

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 3/5][AIO] - export good_sigevent()
  2006-11-29 10:32 ` [PATCH -mm 3/5][AIO] - export good_sigevent() Sébastien Dugué
  2006-11-29 10:38   ` Christoph Hellwig
  2006-11-29 14:54   ` Christoph Hellwig
@ 2006-12-04 17:13   ` Bharata B Rao
  2006-12-05  8:30     ` Sébastien Dugué
  2 siblings, 1 reply; 25+ messages in thread
From: Bharata B Rao @ 2006-12-04 17:13 UTC (permalink / raw)
  To: Sébastien Dugué
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

On Wed, Nov 29, 2006 at 11:32:34AM +0100, Sébastien Dugué wrote:
> 
> <snip> 
> +/***
> + * good_sigevent - check and get target task from a sigevent.
> + * @event: the sigevent to be checked
> + *
> + * This function must be called with tasklist_lock held for reading.
> + */
> +struct task_struct * good_sigevent(sigevent_t * event)
> +{
> +	struct task_struct *rtn = current->group_leader;
> +
> +	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
> +		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
> +		 rtn->tgid != current->tgid ||
> +		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
> +		return NULL;
> +
> +	if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
> +	    ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
> +		return NULL;
> +
> +	return rtn;
> +}

Here good_sigevent() doesn't take care of SIGEV_THREAD. From this comment
from include/asm-generic/siginfo.h,

"It seems likely that SIGEV_THREAD will have to be handled from 
userspace, libpthread transmuting it to SIGEV_SIGNAL, which the
thread manager then catches and does the appropriate nonsense.
 However, everything is written out here so as to not get lost."

it looks like SIGEV_THREAD should never come into kernel. But atleast
libposix-aio does send SIGEV_THREAD all the way up to kernel.

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 3/5][AIO] - export good_sigevent()
  2006-12-04 17:13   ` Bharata B Rao
@ 2006-12-05  8:30     ` Sébastien Dugué
  0 siblings, 0 replies; 25+ messages in thread
From: Sébastien Dugué @ 2006-12-05  8:30 UTC (permalink / raw)
  To: bharata
  Cc: linux-kernel, linux-aio, Andrew Morton, Suparna Bhattacharya,
	Christoph Hellwig, Zach Brown, Badari Pulavarty, Ulrich Drepper,
	Jean Pierre Dion

On Mon, 4 Dec 2006 22:43:13 +0530 Bharata B Rao <bharata@in.ibm.com> wrote:

> On Wed, Nov 29, 2006 at 11:32:34AM +0100, Sébastien Dugué wrote:
> > 
> > <snip> 
> > +/***
> > + * good_sigevent - check and get target task from a sigevent.
> > + * @event: the sigevent to be checked
> > + *
> > + * This function must be called with tasklist_lock held for reading.
> > + */
> > +struct task_struct * good_sigevent(sigevent_t * event)
> > +{
> > +	struct task_struct *rtn = current->group_leader;
> > +
> > +	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
> > +		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
> > +		 rtn->tgid != current->tgid ||
> > +		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
> > +		return NULL;
> > +
> > +	if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
> > +	    ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
> > +		return NULL;
> > +
> > +	return rtn;
> > +}
> 
> Here good_sigevent() doesn't take care of SIGEV_THREAD. From this comment
> from include/asm-generic/siginfo.h,
> 
> "It seems likely that SIGEV_THREAD will have to be handled from 
> userspace, libpthread transmuting it to SIGEV_SIGNAL, which the
> thread manager then catches and does the appropriate nonsense.
>  However, everything is written out here so as to not get lost."
> 
> it looks like SIGEV_THREAD should never come into kernel. But atleast
> libposix-aio does send SIGEV_THREAD all the way up to kernel.

  That's right, I had to reflect that change into libposix-aio, i.e. transmuting
SIGEV_THREAD into SIGEV_SIGNAL.

  Sébastien.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH -mm 1/5][AIO] - Rework compat_sys_io_submit
       [not found] <20070117104601.36b2ab18@frecb000686>
@ 2007-01-17  9:48 ` Sébastien Dugué
  0 siblings, 0 replies; 25+ messages in thread
From: Sébastien Dugué @ 2007-01-17  9:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Andrew Morton, linux-aio, Bharata B Rao, Christoph Hellwig,
	Suparna Bhattacharya, Ulrich Drepper, Zach Brown,
	Jean Pierre Dion, Badari Pulavarty


                 compat_sys_io_submit() cleanup


  Cleanup compat_sys_io_submit by duplicating some of the native syscall
logic in the compat layer and directly calling io_submit_one() instead
of fooling the syscall into thinking it is called from a native 64-bit
caller.

  This eliminates:

  - the overhead of copying the nr iocb pointers on the userspace stack

  - the PAGE_SIZE/(sizeof(void *)) limit on the number of iocbs that
    can be submitted.

  This is also needed for the completion notification patch to avoid having
to rewrite each iocb on the caller stack for io_submit_one() to find the
sigevents.


 compat.c |   61 ++++++++++++++++++++++++++++++++++---------------------------
 1 file changed, 34 insertions(+), 27 deletions(-)

Signed-off-by: Sébastien Dugué <sebastien.dugue@bull.net>

Index: linux-2.6.20-rc4-mm1/fs/compat.c
===================================================================
--- linux-2.6.20-rc4-mm1.orig/fs/compat.c	2007-01-12 11:40:28.000000000 +0100
+++ linux-2.6.20-rc4-mm1/fs/compat.c	2007-01-12 12:14:51.000000000 +0100
@@ -644,40 +644,47 @@ out:
 	return ret;
 }
 
-static inline long
-copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
-{
-	compat_uptr_t uptr;
-	int i;
-
-	for (i = 0; i < nr; ++i) {
-		if (get_user(uptr, ptr32 + i))
-			return -EFAULT;
-		if (put_user(compat_ptr(uptr), ptr64 + i))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-#define MAX_AIO_SUBMITS 	(PAGE_SIZE/sizeof(struct iocb *))
-
 asmlinkage long
 compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb)
 {
-	struct iocb __user * __user *iocb64; 
-	long ret;
+	struct kioctx *ctx;
+	long ret = 0;
+	int i;
 
 	if (unlikely(nr < 0))
 		return -EINVAL;
 
-	if (nr > MAX_AIO_SUBMITS)
-		nr = MAX_AIO_SUBMITS;
-	
-	iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
-	ret = copy_iocb(nr, iocb, iocb64);
-	if (!ret)
-		ret = sys_io_submit(ctx_id, nr, iocb64);
-	return ret;
+	if (unlikely(!access_ok(VERIFY_READ, iocb, (nr * sizeof(u32)))))
+		return -EFAULT;
+
+	ctx = lookup_ioctx(ctx_id);
+	if (unlikely(!ctx))
+		return -EINVAL;
+
+	for (i=0; i<nr; i++) {
+		compat_uptr_t uptr;
+		struct iocb __user *user_iocb;
+		struct iocb tmp;
+
+		if (unlikely(get_user(uptr, iocb + i))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		user_iocb = compat_ptr(uptr);
+
+		if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		ret = io_submit_one(ctx, user_iocb, &tmp);
+		if (ret)
+			break;
+	}
+
+	put_ioctx(ctx);
+	return i ? i: ret;
 }
 
 struct compat_ncp_mount_data {

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH -mm 1/5][AIO] - Rework compat_sys_io_submit
  2006-11-30 15:38 [PATCH -mm 0/5][AIO] - AIO completion signal notification v4 Sébastien Dugué
@ 2006-11-30 15:49 ` Sébastien Dugué
  0 siblings, 0 replies; 25+ messages in thread
From: Sébastien Dugué @ 2006-11-30 15:49 UTC (permalink / raw)
  To: linux-kernel
  Cc: Christoph Hellwig, linux-aio, Andrew Morton,
	Suparna Bhattacharya, Zach Brown, Badari Pulavarty,
	Ulrich Drepper, Jean Pierre Dion, Bharata B Rao


                 compat_sys_io_submit() cleanup


  Cleanup compat_sys_io_submit by duplicating some of the native syscall
logic in the compat layer and directly calling io_submit_one() instead
of fooling the syscall into thinking it is called from a native 64-bit
caller.

  This eliminates:

  - the overhead of copying the nr iocb pointers on the userspace stack

  - the PAGE_SIZE/(sizeof(void *)) limit on the number of iocbs that
    can be submitted.

  This is also needed for the completion notification patch to avoid having
to rewrite each iocb on the caller stack for io_submit_one() to find the
sigevents.


 compat.c |   61 ++++++++++++++++++++++++++++++++++---------------------------
 1 file changed, 34 insertions(+), 27 deletions(-)

Signed-off-by: Sébastien Dugué <sebastien.dugue@bull.net>

Index: linux-2.6.19-rc6-mm2/fs/compat.c
===================================================================
--- linux-2.6.19-rc6-mm2.orig/fs/compat.c	2006-11-30 10:00:18.000000000 +0100
+++ linux-2.6.19-rc6-mm2/fs/compat.c	2006-11-30 13:12:32.000000000 +0100
@@ -642,40 +642,47 @@ out:
 	return ret;
 }
 
-static inline long
-copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
-{
-	compat_uptr_t uptr;
-	int i;
-
-	for (i = 0; i < nr; ++i) {
-		if (get_user(uptr, ptr32 + i))
-			return -EFAULT;
-		if (put_user(compat_ptr(uptr), ptr64 + i))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-#define MAX_AIO_SUBMITS 	(PAGE_SIZE/sizeof(struct iocb *))
-
 asmlinkage long
 compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb)
 {
-	struct iocb __user * __user *iocb64; 
-	long ret;
+	struct kioctx *ctx;
+	long ret = 0;
+	int i;
 
 	if (unlikely(nr < 0))
 		return -EINVAL;
 
-	if (nr > MAX_AIO_SUBMITS)
-		nr = MAX_AIO_SUBMITS;
-	
-	iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
-	ret = copy_iocb(nr, iocb, iocb64);
-	if (!ret)
-		ret = sys_io_submit(ctx_id, nr, iocb64);
-	return ret;
+	if (unlikely(!access_ok(VERIFY_READ, iocb, (nr * sizeof(u32)))))
+		return -EFAULT;
+
+	ctx = lookup_ioctx(ctx_id);
+	if (unlikely(!ctx))
+		return -EINVAL;
+
+	for (i=0; i<nr; i++) {
+		compat_uptr_t uptr;
+		struct iocb __user *user_iocb;
+		struct iocb tmp;
+
+		if (unlikely(get_user(uptr, iocb + i))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		user_iocb = compat_ptr(uptr);
+
+		if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		ret = io_submit_one(ctx, user_iocb, &tmp);
+		if (ret)
+			break;
+	}
+
+	put_ioctx(ctx);
+	return i ? i: ret;
 }
 
 struct compat_ncp_mount_data {

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2007-01-17  9:59 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-11-29 10:24 [PATCH -mm 0/5][AIO] - AIO completion signal notification v3 Sébastien Dugué
2006-11-29 10:32 ` [PATCH -mm 1/5][AIO] - Rework compat_sys_io_submit Sébastien Dugué
2006-11-30  0:47   ` Zach Brown
2006-11-30  9:57     ` Sébastien Dugué
2006-11-30 17:27       ` Zach Brown
2006-11-29 10:32 ` [PATCH -mm 2/5][AIO] - fix aio.h includes Sébastien Dugué
2006-11-29 10:32 ` [PATCH -mm 3/5][AIO] - export good_sigevent() Sébastien Dugué
2006-11-29 10:38   ` Christoph Hellwig
2006-11-29 10:46     ` Sébastien Dugué
2006-11-29 14:54   ` Christoph Hellwig
2006-11-29 16:10     ` Sébastien Dugué
2006-12-04 17:13   ` Bharata B Rao
2006-12-05  8:30     ` Sébastien Dugué
2006-11-29 10:33 ` [PATCH -mm 4/5][AIO] - AIO completion signal notification Sébastien Dugué
2006-11-29 10:51   ` Christoph Hellwig
2006-11-29 13:08     ` Sébastien Dugué
2006-11-29 13:50       ` Christoph Hellwig
2006-11-29 14:18         ` Sébastien Dugué
2006-11-29 11:33   ` Jakub Jelinek
2006-11-29 13:25     ` Sébastien Dugué
2006-11-29 10:33 ` [PATCH -mm 5/5][AIO] - Listio support Sébastien Dugué
2006-11-30  8:25   ` Suparna Bhattacharya
2006-11-30 10:04     ` Sébastien Dugué
2006-11-30 15:38 [PATCH -mm 0/5][AIO] - AIO completion signal notification v4 Sébastien Dugué
2006-11-30 15:49 ` [PATCH -mm 1/5][AIO] - Rework compat_sys_io_submit Sébastien Dugué
     [not found] <20070117104601.36b2ab18@frecb000686>
2007-01-17  9:48 ` Sébastien Dugué

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).