LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [patch] add epoll compat code to kernel/compat.c ...
@ 2007-02-11 20:15 Davide Libenzi
  2007-02-11 21:47 ` Heiko Carstens
  0 siblings, 1 reply; 3+ messages in thread
From: Davide Libenzi @ 2007-02-11 20:15 UTC (permalink / raw)
  To: Andrew Morton; +Cc: David Woodhouse, Linux Kernel Mailing List


Add epoll compat_ code to kernel/compat.c. IA64 and ARM-OABI are currently 
using their own version of epoll compat_ code and they could probably wire 
to the new common code. Patch over 2.6.20.


Signed-off-by: Davide Libenzi <davidel@xmailserver.org>


- Davide



diff -Nru linux-2.6.20/fs/eventpoll.c linux-2.6.20.mod/fs/eventpoll.c
--- linux-2.6.20/fs/eventpoll.c	2007-02-04 10:44:54.000000000 -0800
+++ linux-2.6.20.mod/fs/eventpoll.c	2007-02-11 12:03:50.000000000 -0800
@@ -544,8 +544,7 @@
  * file descriptors inside the interest set.  It represents
  * the kernel part of the user space epoll_ctl(2).
  */
-asmlinkage long
-sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
+asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 {
 	int error;
 	struct file *file, *tfile;
@@ -707,8 +706,8 @@
  * part of the user space epoll_pwait(2).
  */
 asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
-		int maxevents, int timeout, const sigset_t __user *sigmask,
-		size_t sigsetsize)
+				int maxevents, int timeout, const sigset_t __user *sigmask,
+				size_t sigsetsize)
 {
 	int error;
 	sigset_t ksigmask, sigsaved;
diff -Nru linux-2.6.20/include/linux/compat.h linux-2.6.20.mod/include/linux/compat.h
--- linux-2.6.20/include/linux/compat.h	2007-02-09 16:14:20.000000000 -0800
+++ linux-2.6.20.mod/include/linux/compat.h	2007-02-11 12:03:50.000000000 -0800
@@ -234,5 +234,22 @@
 		compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
 		const compat_ulong_t __user *new_nodes);
 
+/*
+ * epoll (fs/eventpoll.c) compat bits follow ...
+ */
+struct compat_epoll_event {
+	u32 events;
+	u32 data[2];
+};
+
+asmlinkage long compat_sys_epoll_ctl(int epfd, int op, int fd,
+				     struct compat_epoll_event __user *event);
+asmlinkage long compat_sys_epoll_wait(int epfd, struct compat_epoll_event __user *events,
+				      int maxevents, int timeout);
+asmlinkage long compat_sys_epoll_pwait(int epfd, struct compat_epoll_event __user *events,
+				       int maxevents, int timeout,
+				       const compat_sigset_t __user *sigmask,
+				       compat_size_t sigsetsize);
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
diff -Nru linux-2.6.20/kernel/compat.c linux-2.6.20.mod/kernel/compat.c
--- linux-2.6.20/kernel/compat.c	2007-02-04 10:44:54.000000000 -0800
+++ linux-2.6.20.mod/kernel/compat.c	2007-02-11 12:13:08.000000000 -0800
@@ -23,6 +23,7 @@
 #include <linux/timex.h>
 #include <linux/migrate.h>
 #include <linux/posix-timers.h>
+#include <linux/eventpoll.h>
 
 #include <asm/uaccess.h>
 
@@ -1016,3 +1017,157 @@
 	return sys_migrate_pages(pid, nr_bits + 1, old, new);
 }
 #endif
+
+
+#ifdef CONFIG_EPOLL
+
+/*
+ * epoll (fs/eventpoll.c) compat functions follow ...
+ *
+ *
+ * We need the compat layer over the epoll_event structure, only if the offset
+ * of the __u64 data member is not 4 (size of the events member that precedes the
+ * data one).
+ */
+#define EPOLL_NEED_EVENT_COMPAT() (offsetof(struct epoll_event, data) != 4)
+
+
+asmlinkage long compat_sys_epoll_ctl(int epfd, int op, int fd,
+				     struct compat_epoll_event __user *event)
+{
+	long ret;
+
+	/*
+	 * If compat is not needed, this simply map to a jump to sys_epoll_ctl(),
+	 * with the "else" code being dropped by GCC.
+	 */
+	if (!EPOLL_NEED_EVENT_COMPAT() || op == EPOLL_CTL_DEL)
+		ret = sys_epoll_ctl(epfd, op, fd, (struct epoll_event __user *) event);
+	else {
+		struct compat_epoll_event user;
+		struct epoll_event __user *kernel;
+		union {
+			u64 q;
+			u32 d[2];
+		} mux;
+
+		/*
+		 * The "event" pointer may be NULL in the EPOLL_CTL_DEL case,
+		 * but we handle such case above, so here we know "event" should
+		 * not be NULL.
+		 */
+		if (copy_from_user(&user, event, sizeof(user)))
+			return -EFAULT;
+		kernel = compat_alloc_user_space(sizeof(struct epoll_event));
+		ret = __put_user(user.events, &kernel->events);
+		mux.d[0] = user.data[0];
+		mux.d[1] = user.data[1];
+		if ((ret |= __put_user(mux.q, &kernel->data)) == 0)
+			ret = sys_epoll_ctl(epfd, op, fd, kernel);
+	}
+	
+	return ret;
+}
+
+
+asmlinkage long compat_sys_epoll_wait(int epfd, struct compat_epoll_event __user *events,
+				      int maxevents, int timeout)
+{
+	long ret;
+
+	/*
+	 * The compat_sys_epoll_pwait() function is calling this one. We do need a
+	 * compat function for sys_epoll_pwait() due to the sigset_t size, but not
+	 * every architecture might need a compat layer over sys_epoll_wait().
+	 * With the compile-time test below, a call to compat_sys_epoll_wait() that does
+	 * not need a translation, will map directly to sys_epoll_wait() avoiding the
+	 * double buffer copy for events (that might indeed blow cache and kill
+	 * performance). GCC takes care of removing the unused code (being the condition
+	 * known at compile-time), and issues a simple jump to sys_epoll_wait().
+	 */
+	if (EPOLL_NEED_EVENT_COMPAT()) {
+		struct epoll_event __user *kbuf;
+		struct epoll_event ev;
+		long err, i;
+		union {
+			u64 q;
+			u32 d[2];
+		} mux;
+
+		if (maxevents <= 0 || maxevents > (INT_MAX / sizeof(struct epoll_event)))
+			return -EINVAL;
+		kbuf = compat_alloc_user_space(sizeof(struct epoll_event) * maxevents);
+		ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout);
+		err = 0;
+		for (i = 0; i < ret; i++) {
+			err |= __get_user(ev.events, &kbuf[i].events);
+			err |= __get_user(ev.data, &kbuf[i].data);
+			err |= put_user(ev.events, &events->events);
+			mux.q = ev.data;
+			err |= put_user(mux.d[0], &events->data[0]);
+			err |= put_user(mux.d[1], &events->data[1]);
+			events++;
+		}
+		ret = err ? -EFAULT: ret;	
+	} else
+		ret = sys_epoll_wait(epfd, (struct epoll_event __user *) events,
+				     maxevents, timeout);
+
+	return ret;
+}
+
+
+#ifdef TIF_RESTORE_SIGMASK
+
+asmlinkage long compat_sys_epoll_pwait(int epfd, struct compat_epoll_event __user *events,
+				       int maxevents, int timeout,
+				       const compat_sigset_t __user *sigmask,
+				       compat_size_t sigsetsize)
+{
+	int error;
+	compat_sigset_t ss32;
+	sigset_t ksigmask, sigsaved;
+
+	/*
+	 * If the caller wants a certain signal mask to be set during the wait,
+	 * we apply it here.
+	 */
+	if (sigmask) {
+		if (sigsetsize != sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
+			return -EFAULT;
+		sigset_from_compat(&ksigmask, &ss32);
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);		
+	}
+
+	/* Compile-time switch ... */
+	if (EPOLL_NEED_EVENT_COMPAT())
+		error = compat_sys_epoll_wait(epfd, events, maxevents, timeout);
+	else
+		error = sys_epoll_wait(epfd, (struct epoll_event __user *) events,
+				       maxevents, timeout);
+
+	/*
+	 * If we changed the signal mask, we need to restore the original one.
+	 * In case we've got a signal while waiting, we do not restore the 
+	 * signal mask yet, and we allow do_signal() to deliver the signal on the way 
+	 * back to userspace, before the signal mask is restored.
+	 */
+	if (sigmask) {
+		if (error == -EINTR) {
+			memcpy(&current->saved_sigmask, &sigsaved, 
+			       sizeof(sigsaved));
+			set_thread_flag(TIF_RESTORE_SIGMASK);
+		} else
+			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	}
+
+	return error;	
+}
+
+#endif /* #ifdef TIF_RESTORE_SIGMASK */
+
+#endif /* #ifdef CONFIG_EPOLL */
+

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [patch] add epoll compat code to kernel/compat.c ...
  2007-02-11 20:15 [patch] add epoll compat code to kernel/compat.c Davide Libenzi
@ 2007-02-11 21:47 ` Heiko Carstens
  2007-02-11 23:18   ` Davide Libenzi
  0 siblings, 1 reply; 3+ messages in thread
From: Heiko Carstens @ 2007-02-11 21:47 UTC (permalink / raw)
  To: Davide Libenzi
  Cc: Andrew Morton, David Woodhouse, Linux Kernel Mailing List,
	Martin Schwidefsky

On Sun, Feb 11, 2007 at 12:15:24PM -0800, Davide Libenzi wrote:
> 
> Add epoll compat_ code to kernel/compat.c. IA64 and ARM-OABI are currently 
> using their own version of epoll compat_ code and they could probably wire 
> to the new common code. Patch over 2.6.20.
> + * epoll (fs/eventpoll.c) compat bits follow ...
> + */
> +struct compat_epoll_event {
> +	u32 events;
> +	u32 data[2];
> +};
> +
>[...]
> +
> + * We need the compat layer over the epoll_event structure, only if the offset
> + * of the __u64 data member is not 4 (size of the events member that precedes the
> + * data one).
> + */
> +#define EPOLL_NEED_EVENT_COMPAT() (offsetof(struct epoll_event, data) != 4)

With

struct epoll_event {
        __u32 events;
        __u64 data;
};

this won't work on s390. offsetof(struct epoll_event, data) is 8 on both
31 bit and 64 bit. So it will do the conversion and corrupt all the data.
Actually we would only need the compat conversion for the sigset_t stuff.

But then again I thought most 32 bit architectures would add a 4 byte
pad between events and data, no?

Maybe we need some arch dependent struct compat_epoll_event and have
something like
#define EPOLL_NEED_EVENT_COMPAT() \
(offsetof(struct epoll_event, data) != offsetof(struct compat_epoll_event, data))

?

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [patch] add epoll compat code to kernel/compat.c ...
  2007-02-11 21:47 ` Heiko Carstens
@ 2007-02-11 23:18   ` Davide Libenzi
  0 siblings, 0 replies; 3+ messages in thread
From: Davide Libenzi @ 2007-02-11 23:18 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Andrew Morton, David Woodhouse, Linux Kernel Mailing List,
	Martin Schwidefsky

On Sun, 11 Feb 2007, Heiko Carstens wrote:

> On Sun, Feb 11, 2007 at 12:15:24PM -0800, Davide Libenzi wrote:
> > 
> > Add epoll compat_ code to kernel/compat.c. IA64 and ARM-OABI are currently 
> > using their own version of epoll compat_ code and they could probably wire 
> > to the new common code. Patch over 2.6.20.
> > + * epoll (fs/eventpoll.c) compat bits follow ...
> > + */
> > +struct compat_epoll_event {
> > +	u32 events;
> > +	u32 data[2];
> > +};
> > +
> >[...]
> > +
> > + * We need the compat layer over the epoll_event structure, only if the offset
> > + * of the __u64 data member is not 4 (size of the events member that precedes the
> > + * data one).
> > + */
> > +#define EPOLL_NEED_EVENT_COMPAT() (offsetof(struct epoll_event, data) != 4)
> 
> With
> 
> struct epoll_event {
>         __u32 events;
>         __u64 data;
> };
> 
> this won't work on s390. offsetof(struct epoll_event, data) is 8 on both
> 31 bit and 64 bit. So it will do the conversion and corrupt all the data.
> Actually we would only need the compat conversion for the sigset_t stuff.

Yup, that's broken not only on s390, but on every arch with alignof(u64) == 8
in 32 bits mode.
The assumption was that for cases like the above, you simply wouldn't wire 
the compat_ version. That is true for epoll_wait and epoll_ctl, where the 
only need for compat was the "struct epoll_event". But that's not true for 
epoll_pwait, since this one needs to be wired because of the sigset_t.
On top of sigset_t, epoll_pwait may need "struct epoll_event" translation.
Now, that *really* sux because two versions of compat_epoll_pwait are 
needed, once that does sigset_t translation only, and one that does 
sigset_t + "struct epoll_event".



> But then again I thought most 32 bit architectures would add a 4 byte
> pad between events and data, no?

i386 does not, for example ;)



> Maybe we need some arch dependent struct compat_epoll_event and have
> something like
> #define EPOLL_NEED_EVENT_COMPAT() \
> (offsetof(struct epoll_event, data) != offsetof(struct compat_epoll_event, data))
> 
> ?

No, it won't work. Unless there is (or we define) a per-arch macro that 
tells us how the 32 bits mode align an u64, I'm afraid we can't do any 
smart tricks and we need to have the double compat_epoll_pwait.



- Davide



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2007-02-11 23:18 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-11 20:15 [patch] add epoll compat code to kernel/compat.c Davide Libenzi
2007-02-11 21:47 ` Heiko Carstens
2007-02-11 23:18   ` Davide Libenzi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).