LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [patch 00/22] pollfs: filesystem abstraction for pollable objects
@ 2007-05-02 5:22 Davi Arnaut
2007-05-02 5:22 ` [patch 01/22] pollfs: kernel-side API header Davi Arnaut
` (22 more replies)
0 siblings, 23 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
This patch set introduces a new file system for the delivery of pollable
events through file descriptors. To the detriment of debugability, pollable
objects are a nice adjunct to nonblocking/epoll/event-based servers.
The pollfs filesystem abstraction provides better mechanisms needed for
creating and maintaining pollable objects. Also the pollable futex approach
is far superior (send and receive events from userspace or kernel) to eventfd
and fixes (supercedes) FUTEX_FD at the same time.
The (non) blocking and object size (user <-> kernel) semantics and are handled
internally, decoupling the core filesystem from the "subsystems" (mere push and
pop operations).
Currently implemented waitable "objects" are: signals, futexes, ai/o blocks and
timers.
More details at each patch.
http://haxent.com/~davi/pollfs/
Comments are welcome.
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 01/22] pollfs: kernel-side API header
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 02/22] pollfs: file system operations Davi Arnaut
` (21 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-api.patch --]
[-- Type: text/plain, Size: 1940 bytes --]
Add pollfs_fs.h header which contains the kernel-side declarations
and auxiliary macros for type safety checks. Those macros can be
simplified later.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
include/linux/pollfs_fs.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 57 insertions(+)
Index: linux-2.6/include/linux/pollfs_fs.h
===================================================================
--- /dev/null
+++ linux-2.6/include/linux/pollfs_fs.h
@@ -0,0 +1,57 @@
+/*
+ * pollfs, a naive filesystem for pollable (waitable) files (objects)
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ */
+
+#ifndef _LINUX_POLL_FS_H
+#define _LINUX_POLL_FS_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+
+#define PFS_CHECK_CALLBACK_1(f, a) (void*) \
+ (sizeof((f)((typeof(a *))0)))
+
+#define PFS_CHECK_CALLBACK_2(f, a, b) (void*) \
+ (sizeof((f)((typeof(a *))0, (typeof(b*))0)))
+
+#define PFS_WRITE(func, type, utype) \
+ (ssize_t (*)(void *, const void __user *)) \
+ (0 ? PFS_CHECK_CALLBACK_2(func, type, utype) : func)
+
+#define PFS_READ(func, type, utype) \
+ (ssize_t (*)(void *, void __user *)) \
+ (0 ? PFS_CHECK_CALLBACK_2(func, type, utype) : func)
+
+#define PFS_POLL(func, type) \
+ (int (*)(void *))(0 ? PFS_CHECK_CALLBACK_1(func, type) : func)
+
+#define PFS_RELEASE(func, type) \
+ (int (*)(void *))(0 ? PFS_CHECK_CALLBACK_1(func, type) : func)
+
+struct pfs_operations {
+ ssize_t (*read)(void *, void __user *);
+ ssize_t (*write)(void *, const void __user *);
+ int (*mmap)(void *, struct vm_area_struct *);
+ int (*poll)(void *);
+ int (*release)(void *);
+ size_t rsize;
+ size_t wsize;
+};
+
+struct pfs_file {
+ void *data;
+ wait_queue_head_t *wait;
+ const struct pfs_operations *fops;
+};
+
+long pfs_open(struct pfs_file *pfs);
+
+#endif /* __KERNEL __ */
+
+#endif /* _LINUX_POLLFS_FS_H */
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 02/22] pollfs: file system operations
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
2007-05-02 5:22 ` [patch 01/22] pollfs: kernel-side API header Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 03/22] pollfs: asynchronously wait for a signal Davi Arnaut
` (20 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-core.patch --]
[-- Type: text/plain, Size: 6637 bytes --]
The key feature of the pollfs file operations is to internally handle
pollable (waitable) resources as files without exporting complex and
bug-prone underlying (VFS) implementation details.
All resource handlers are required to implement the read, write, poll,
release operations and must not block.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
fs/Makefile | 1
fs/pollfs/Makefile | 2
fs/pollfs/file.c | 238 +++++++++++++++++++++++++++++++++++++++++++++++++++++
init/Kconfig | 6 +
4 files changed, 247 insertions(+)
Index: linux-2.6/fs/pollfs/file.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/file.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <asm/uaccess.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/pollfs_fs.h>
+
+#define POLLFS_MAGIC 0x9a6afcd
+
+MODULE_LICENSE("GPL");
+
+/* pollfs vfsmount entry */
+static struct vfsmount *pfs_mnt;
+
+/* pollfs file operations */
+static const struct file_operations pfs_fops;
+
+static inline ssize_t
+pfs_read_nonblock(const struct pfs_operations *fops, void *data,
+ void __user *obj, size_t nr)
+{
+ ssize_t count = 0, res = 0;
+
+ do {
+ res = fops->read(data, obj);
+ if (res)
+ break;
+ count++;
+ obj += fops->rsize;
+ } while (--nr);
+
+ if (count)
+ return count * fops->rsize;
+ else if (res)
+ return res;
+ else
+ return -EAGAIN;
+}
+
+static inline ssize_t
+pfs_read_block(const struct pfs_operations *fops, void *data,
+ wait_queue_head_t *wait, void __user *obj, size_t nr)
+{
+ ssize_t count;
+
+ do {
+ count = pfs_read_nonblock(fops, data, obj, nr);
+ if (count != -EAGAIN)
+ break;
+ count = wait_event_interruptible((*wait), fops->poll(data));
+ } while (!count);
+
+ return count;
+}
+
+static ssize_t pfs_read(struct file *filp, char __user *buf,
+ size_t count, loff_t * pos)
+{
+ size_t nevents = count;
+ struct pfs_file *pfs = filp->private_data;
+ const struct pfs_operations *fops = pfs->fops;
+
+ if (fops->rsize)
+ nevents /= fops->rsize;
+ else
+ nevents = 1;
+
+ if (!nevents)
+ return -EINVAL;
+
+ if (filp->f_flags & O_NONBLOCK)
+ return pfs_read_nonblock(fops, pfs->data, buf, nevents);
+ else
+ return pfs_read_block(fops, pfs->data, pfs->wait, buf, nevents);
+}
+
+static ssize_t pfs_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t * ppos)
+{
+ ssize_t res = 0;
+ size_t nevents = count;
+ struct pfs_file *pfs = filp->private_data;
+ const struct pfs_operations *fops = pfs->fops;
+
+ if (fops->wsize)
+ nevents /= fops->wsize;
+ else
+ nevents = 1;
+
+ if (!nevents)
+ return -EINVAL;
+
+ count = 0;
+
+ do {
+ res = fops->write(pfs->data, buf);
+ if (res)
+ break;
+ count++;
+ buf += fops->wsize;
+ } while (--nevents);
+
+ if (count)
+ return count * fops->wsize;
+ else if (res)
+ return res;
+ else
+ return 0;
+}
+
+static unsigned int pfs_poll(struct file *filp, struct poll_table_struct *wait)
+{
+ int ret = 0;
+ struct pfs_file *pfs = filp->private_data;
+
+ poll_wait(filp, pfs->wait, wait);
+
+ if (pfs->fops->poll)
+ ret = pfs->fops->poll(pfs->data);
+ else
+ ret = POLLIN;
+
+ return ret;
+}
+
+static int pfs_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct pfs_file *pfs = filp->private_data;
+
+ return (pfs->fops->mmap) ? pfs->fops->mmap(pfs->data, vma) : -ENODEV;
+}
+
+static int pfs_release(struct inode *inode, struct file *filp)
+{
+ struct pfs_file *pfs = filp->private_data;
+
+ return pfs->fops->release(pfs->data);
+}
+
+static const struct file_operations pfs_fops = {
+ .poll = pfs_poll,
+ .mmap = pfs_mmap,
+ .read = pfs_read,
+ .write = pfs_write,
+ .release = pfs_release
+};
+
+long pfs_open(struct pfs_file *pfs)
+{
+ int fd;
+ struct file *filp;
+ const struct pfs_operations *fops = pfs->fops;
+
+ if (IS_ERR(pfs_mnt))
+ return -ENOSYS;
+
+ if (!fops->poll || (!fops->read || !fops->write))
+ return -EINVAL;
+
+ fd = get_unused_fd();
+ if (fd < 0)
+ return -ENFILE;
+
+ filp = get_empty_filp();
+ if (!filp) {
+ put_unused_fd(fd);
+ return -ENFILE;
+ }
+
+ filp->f_op = &pfs_fops;
+ filp->f_path.mnt = mntget(pfs_mnt);
+ filp->f_path.dentry = dget(pfs_mnt->mnt_root);
+ filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
+ filp->f_mode = 0;
+ filp->f_flags = 0;
+ filp->private_data = pfs;
+
+ if (fops->read) {
+ filp->f_flags = O_RDONLY;
+ filp->f_mode |= FMODE_READ;
+ }
+
+ if (fops->write) {
+ filp->f_flags = O_WRONLY;
+ filp->f_mode |= FMODE_WRITE;
+ }
+
+ if (fops->write && fops->read)
+ filp->f_flags = O_RDWR;
+
+ fd_install(fd, filp);
+
+ return fd;
+}
+
+EXPORT_SYMBOL(pfs_open);
+
+static int pfs_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data, struct vfsmount *mnt)
+{
+ return get_sb_pseudo(fs_type, "pollfs", NULL, POLLFS_MAGIC, mnt);
+}
+
+static struct file_system_type pollfs_type = {
+ .name = "pollfs",
+ .get_sb = pfs_get_sb,
+ .kill_sb = kill_anon_super
+};
+
+static int __init pollfs_init(void)
+{
+ int ret;
+
+ ret = register_filesystem(&pollfs_type);
+ if (ret)
+ return ret;
+
+ pfs_mnt = kern_mount(&pollfs_type);
+ if (IS_ERR(pfs_mnt)) {
+ ret = PTR_ERR(pfs_mnt);
+ unregister_filesystem(&pollfs_type);
+ }
+
+ return ret;
+}
+
+__initcall(pollfs_init);
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -463,6 +463,12 @@ config EPOLL
Disabling this option will cause the kernel to be built without
support for epoll family of system calls.
+config POLLFS
+ bool "Enable pollfs support" if EMBEDDED
+ default y
+ help
+ Pollfs support
+
config SHMEM
bool "Use full shmem filesystem" if EMBEDDED
default y
Index: linux-2.6/fs/Makefile
===================================================================
--- linux-2.6.orig/fs/Makefile
+++ linux-2.6/fs/Makefile
@@ -114,3 +114,4 @@ obj-$(CONFIG_HPPFS) += hppfs/
obj-$(CONFIG_DEBUG_FS) += debugfs/
obj-$(CONFIG_OCFS2_FS) += ocfs2/
obj-$(CONFIG_GFS2_FS) += gfs2/
+obj-$(CONFIG_POLLFS) += pollfs/
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_POLLFS) += pollfs.o
+pollfs-y := file.o
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 03/22] pollfs: asynchronously wait for a signal
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
2007-05-02 5:22 ` [patch 01/22] pollfs: kernel-side API header Davi Arnaut
2007-05-02 5:22 ` [patch 02/22] pollfs: file system operations Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 04/22] pollfs: pollable signal Davi Arnaut
` (19 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-signal-wakeup.patch --]
[-- Type: text/plain, Size: 3033 bytes --]
Add a wait queue to the task_struct in order to be able to
associate (wait for) a signal with other resources.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
include/linux/init_task.h | 1 +
include/linux/sched.h | 1 +
kernel/fork.c | 1 +
kernel/signal.c | 5 +++++
4 files changed, 8 insertions(+)
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -939,6 +939,7 @@ struct task_struct {
sigset_t blocked, real_blocked;
sigset_t saved_sigmask; /* To be restored with TIF_RESTORE_SIGMASK */
struct sigpending pending;
+ wait_queue_head_t sigwait;
unsigned long sas_ss_sp;
size_t sas_ss_size;
Index: linux-2.6/include/linux/init_task.h
===================================================================
--- linux-2.6.orig/include/linux/init_task.h
+++ linux-2.6/include/linux/init_task.h
@@ -134,6 +134,7 @@ extern struct group_info init_groups;
.list = LIST_HEAD_INIT(tsk.pending.list), \
.signal = {{0}}}, \
.blocked = {{0}}, \
+ .sigwait = __WAIT_QUEUE_HEAD_INITIALIZER(tsk.sigwait), \
.alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
Index: linux-2.6/kernel/fork.c
===================================================================
--- linux-2.6.orig/kernel/fork.c
+++ linux-2.6/kernel/fork.c
@@ -1034,6 +1034,7 @@ static struct task_struct *copy_process(
clear_tsk_thread_flag(p, TIF_SIGPENDING);
init_sigpending(&p->pending);
+ init_waitqueue_head(&p->sigwait);
p->utime = cputime_zero;
p->stime = cputime_zero;
Index: linux-2.6/kernel/signal.c
===================================================================
--- linux-2.6.orig/kernel/signal.c
+++ linux-2.6/kernel/signal.c
@@ -224,6 +224,8 @@ fastcall void recalc_sigpending_tsk(stru
set_tsk_thread_flag(t, TIF_SIGPENDING);
else
clear_tsk_thread_flag(t, TIF_SIGPENDING);
+
+ wake_up_interruptible_sync(&t->sigwait);
}
void recalc_sigpending(void)
@@ -759,6 +761,7 @@ static int send_signal(int sig, struct s
info->si_code >= 0)));
if (q) {
list_add_tail(&q->list, &signals->list);
+ wake_up_interruptible_sync(&t->sigwait);
switch ((unsigned long) info) {
case (unsigned long) SEND_SIG_NOINFO:
q->info.si_signo = sig;
@@ -1404,6 +1407,7 @@ int send_sigqueue(int sig, struct sigque
list_add_tail(&q->list, &p->pending.list);
sigaddset(&p->pending.signal, sig);
+ wake_up_interruptible_sync(&p->sigwait);
if (!sigismember(&p->blocked, sig))
signal_wake_up(p, sig == SIGKILL);
@@ -1453,6 +1457,7 @@ send_group_sigqueue(int sig, struct sigq
list_add_tail(&q->list, &p->signal->shared_pending.list);
sigaddset(&p->signal->shared_pending.signal, sig);
+ wake_up_interruptible_sync(&p->sigwait);
__group_complete_signal(sig, p);
out:
spin_unlock_irqrestore(&p->sighand->siglock, flags);
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 04/22] pollfs: pollable signal
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (2 preceding siblings ...)
2007-05-02 5:22 ` [patch 03/22] pollfs: asynchronously wait for a signal Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 05/22] pollfs: pollable signal compat code Davi Arnaut
` (18 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-signal.patch --]
[-- Type: text/plain, Size: 4313 bytes --]
Retrieve multiple per-process signals through a file descriptor. The mask
of signals can be changed at any time. Also, the compat code can be kept
very simple.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
fs/pollfs/Makefile | 2
fs/pollfs/signal.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++
init/Kconfig | 7 ++
3 files changed, 153 insertions(+)
Index: linux-2.6/fs/pollfs/signal.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/signal.c
@@ -0,0 +1,144 @@
+/*
+ * sigtimedwait4, retrieve multiple signals with one call.
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <asm/uaccess.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/signal.h>
+
+struct pfs_signal {
+ sigset_t set;
+ spinlock_t lock;
+ struct task_struct *task;
+ struct pfs_file file;
+};
+
+static void inline sigset_adjust(sigset_t *set)
+{
+ /* SIGKILL and SIGSTOP cannot be caught, blocked, or ignored */
+ sigdelsetmask(set, sigmask(SIGKILL) | sigmask(SIGSTOP));
+
+ /* Signals we don't want to dequeue */
+ signotset(set);
+}
+
+static ssize_t read(struct pfs_signal *evs, siginfo_t __user *infoup)
+{
+ int signo;
+ siginfo_t info;
+
+ signo = dequeue_signal_lock(evs->task, &evs->set, &info);
+ if (!signo)
+ return -EAGAIN;
+
+ if (copy_siginfo_to_user(infoup, &info))
+ return -EFAULT;
+
+ return 0;
+}
+
+static ssize_t write(struct pfs_signal *evs, const sigset_t __user *uset)
+{
+ sigset_t set;
+
+ if (copy_from_user(&set, uset, sizeof(sigset_t)))
+ return -EFAULT;
+
+ sigset_adjust(&set);
+
+ spin_lock_irq(&evs->lock);
+ sigemptyset(&evs->set);
+ sigorsets(&evs->set, &evs->set, &set);
+ spin_unlock_irq(&evs->lock);
+
+ return 0;
+}
+
+static int poll(struct pfs_signal *evs)
+{
+ int ret = 0;
+ sigset_t pending;
+ unsigned long flags;
+
+ rcu_read_lock();
+
+ if (!lock_task_sighand(evs->task, &flags))
+ goto out_unlock;
+
+ sigorsets(&pending, &evs->task->pending.signal,
+ &evs->task->signal->shared_pending.signal);
+
+ unlock_task_sighand(evs->task, &flags);
+
+ spin_lock_irqsave(&evs->lock, flags);
+ signandsets(&pending, &pending, &evs->set);
+ spin_unlock_irqrestore(&evs->lock, flags);
+
+ if (!sigisemptyset(&pending))
+ ret = POLLIN;
+
+out_unlock:
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int release(struct pfs_signal *evs)
+{
+ put_task_struct(evs->task);
+ kfree(evs);
+
+ return 0;
+}
+
+static const struct pfs_operations signal_ops = {
+ .read = PFS_READ(read, struct pfs_signal, siginfo_t),
+ .write = PFS_WRITE(write, struct pfs_signal, sigset_t),
+ .poll = PFS_POLL(poll, struct pfs_signal),
+ .release = PFS_RELEASE(release, struct pfs_signal),
+ .rsize = sizeof(siginfo_t),
+ .wsize = sizeof(sigset_t),
+};
+
+asmlinkage long sys_plsignal(const sigset_t __user *uset)
+{
+ long error;
+ struct pfs_signal *evs;
+
+ evs = kmalloc(sizeof(*evs), GFP_KERNEL);
+ if (!evs)
+ return -ENOMEM;
+
+ if (copy_from_user(&evs->set, uset, sizeof(sigset_t))) {
+ kfree(evs);
+ return -EFAULT;
+ }
+
+ spin_lock_init(&evs->lock);
+
+ evs->task = current;
+ get_task_struct(current);
+
+ sigset_adjust(&evs->set);
+
+ evs->file.data = evs;
+ evs->file.fops = &signal_ops;
+ evs->file.wait = &evs->task->sigwait;
+
+ error = pfs_open(&evs->file);
+ if (error < 0)
+ release(evs);
+
+ return error;
+}
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -1,2 +1,4 @@
obj-$(CONFIG_POLLFS) += pollfs.o
pollfs-y := file.o
+
+pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -469,6 +469,13 @@ config POLLFS
help
Pollfs support
+config POLLFS_SIGNAL
+ bool "Enable pollfs signal" if EMBEDDED
+ default y
+ depends on POLLFS
+ help
+ Pollable signal support
+
config SHMEM
bool "Use full shmem filesystem" if EMBEDDED
default y
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 05/22] pollfs: pollable signal compat code
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (3 preceding siblings ...)
2007-05-02 5:22 ` [patch 04/22] pollfs: pollable signal Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 06/22] pollfs: export the plsignal system call Davi Arnaut
` (17 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-signal-compat.patch --]
[-- Type: text/plain, Size: 2904 bytes --]
Compat handlers for the pollable signal operations. Later the0 compat operations
can operate on a per call basis.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
fs/pollfs/signal.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 85 insertions(+)
Index: linux-2.6/fs/pollfs/signal.c
===================================================================
--- linux-2.6.orig/fs/pollfs/signal.c
+++ linux-2.6/fs/pollfs/signal.c
@@ -16,6 +16,7 @@
#include <linux/poll.h>
#include <linux/pollfs_fs.h>
#include <linux/signal.h>
+#include <linux/compat.h>
struct pfs_signal {
sigset_t set;
@@ -48,6 +49,24 @@ static ssize_t read(struct pfs_signal *e
return 0;
}
+#ifdef CONFIG_COMPAT
+static ssize_t compat_read(struct pfs_signal *evs,
+ struct compat_siginfo __user *infoup)
+{
+ int signo;
+ siginfo_t info;
+
+ signo = dequeue_signal_lock(evs->task, &evs->set, &info);
+ if (!signo)
+ return -EAGAIN;
+
+ if (copy_siginfo_to_user32(infoup, &info))
+ return -EFAULT;
+
+ return 0;
+}
+#endif
+
static ssize_t write(struct pfs_signal *evs, const sigset_t __user *uset)
{
sigset_t set;
@@ -65,6 +84,28 @@ static ssize_t write(struct pfs_signal *
return 0;
}
+#ifdef CONFIG_COMPAT
+static ssize_t compat_write(struct pfs_signal *evs,
+ const compat_sigset_t __user *uset)
+{
+ sigset_t set;
+ compat_sigset_t cset;
+
+ if (copy_from_user(&cset, uset, sizeof(compat_sigset_t)))
+ return -EFAULT;
+
+ sigset_from_compat(&set, &cset);
+ sigset_adjust(&set);
+
+ spin_lock_irq(&evs->lock);
+ sigemptyset(&evs->set);
+ sigorsets(&evs->set, &evs->set, &set);
+ spin_unlock_irq(&evs->lock);
+
+ return 0;
+}
+#endif
+
static int poll(struct pfs_signal *evs)
{
int ret = 0;
@@ -142,3 +183,47 @@ asmlinkage long sys_plsignal(const sigse
return error;
}
+
+#ifdef CONFIG_COMPAT
+static const struct pfs_operations compat_signal_ops = {
+ /* .read = PFS_READ(compat_read, struct pfs_signal, struct compat_siginfo), */
+ .write = PFS_WRITE(compat_write, struct pfs_signal, compat_sigset_t),
+ .poll = PFS_POLL(poll, struct pfs_signal),
+ .release = PFS_RELEASE(release, struct pfs_signal),
+ /* .rsize = sizeof(compat_siginfo_t), */
+ .wsize = sizeof(sigset_t)
+};
+
+asmlinkage long compat_plsignal(const compat_sigset_t __user *uset)
+{
+ long error;
+ compat_sigset_t cset;
+ struct pfs_signal *evs;
+
+ if (copy_from_user(&cset, uset, sizeof(compat_sigset_t)))
+ return -EFAULT;
+
+ evs = kmalloc(sizeof(*evs), GFP_KERNEL);
+ if (!evs)
+ return -ENOMEM;
+
+ spin_lock_init(&evs->lock);
+
+ evs->task = current;
+ get_task_struct(current);
+
+ sigset_from_compat(&evs->set, &cset);
+ sigset_adjust(&evs->set);
+
+ evs->file.data = evs;
+ evs->file.fops = &compat_signal_ops;
+ evs->file.wait = &evs->task->sigwait;
+
+ error = pfs_open(&evs->file);
+
+ if (error < 0)
+ release(evs);
+
+ return error;
+}
+#endif
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 06/22] pollfs: export the plsignal system call
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (4 preceding siblings ...)
2007-05-02 5:22 ` [patch 05/22] pollfs: pollable signal compat code Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 07/22] pollfs: x86, wire up " Davi Arnaut
` (16 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-signal-syscall.patch --]
[-- Type: text/plain, Size: 1070 bytes --]
Export the new plsignal syscall prototype. While there, make it "conditional".
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
include/linux/syscalls.h | 2 ++
kernel/sys_ni.c | 1 +
2 files changed, 3 insertions(+)
Index: linux-2.6/include/linux/syscalls.h
===================================================================
--- linux-2.6.orig/include/linux/syscalls.h
+++ linux-2.6/include/linux/syscalls.h
@@ -605,4 +605,6 @@ asmlinkage long sys_getcpu(unsigned __us
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
+asmlinkage long sys_plsignal(const sigset_t __user * set);
+
#endif
Index: linux-2.6/kernel/sys_ni.c
===================================================================
--- linux-2.6.orig/kernel/sys_ni.c
+++ linux-2.6/kernel/sys_ni.c
@@ -112,6 +112,7 @@ cond_syscall(sys_vm86old);
cond_syscall(sys_vm86);
cond_syscall(compat_sys_ipc);
cond_syscall(compat_sys_sysctl);
+cond_syscall(sys_plsignal);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 07/22] pollfs: x86, wire up the plsignal system call
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (5 preceding siblings ...)
2007-05-02 5:22 ` [patch 06/22] pollfs: export the plsignal system call Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 08/22] pollfs: x86_64, " Davi Arnaut
` (15 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-signal-i386-syscall.patch --]
[-- Type: text/plain, Size: 1096 bytes --]
Make the plsignal syscall available to user-space on x86.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
arch/i386/kernel/syscall_table.S | 1 +
include/asm-i386/unistd.h | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
Index: linux-2.6/include/asm-i386/unistd.h
===================================================================
--- linux-2.6.orig/include/asm-i386/unistd.h
+++ linux-2.6/include/asm-i386/unistd.h
@@ -325,10 +325,11 @@
#define __NR_move_pages 317
#define __NR_getcpu 318
#define __NR_epoll_pwait 319
+#define __NR_plsignal 320
#ifdef __KERNEL__
-#define NR_syscalls 320
+#define NR_syscalls 321
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
Index: linux-2.6/arch/i386/kernel/syscall_table.S
===================================================================
--- linux-2.6.orig/arch/i386/kernel/syscall_table.S
+++ linux-2.6/arch/i386/kernel/syscall_table.S
@@ -319,3 +319,4 @@ ENTRY(sys_call_table)
.long sys_move_pages
.long sys_getcpu
.long sys_epoll_pwait
+ .long sys_plsignal /* 320 */
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 08/22] pollfs: x86_64, wire up the plsignal system call
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (6 preceding siblings ...)
2007-05-02 5:22 ` [patch 07/22] pollfs: x86, wire up " Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 09/22] pollfs: pollable hrtimers Davi Arnaut
` (14 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-signal-x86_64-syscall.patch --]
[-- Type: text/plain, Size: 1371 bytes --]
Make the plsignal syscall available to user-space on x86_64.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
arch/x86_64/ia32/ia32entry.S | 3 ++-
include/asm-x86_64/unistd.h | 4 +++-
2 files changed, 5 insertions(+), 2 deletions(-)
Index: linux-2.6/include/asm-x86_64/unistd.h
===================================================================
--- linux-2.6.orig/include/asm-x86_64/unistd.h
+++ linux-2.6/include/asm-x86_64/unistd.h
@@ -619,8 +619,10 @@ __SYSCALL(__NR_sync_file_range, sys_sync
__SYSCALL(__NR_vmsplice, sys_vmsplice)
#define __NR_move_pages 279
__SYSCALL(__NR_move_pages, sys_move_pages)
+#define __NR_plsignal 280
+__SYSCALL(__NR_plsignal, sys_plsignal)
-#define __NR_syscall_max __NR_move_pages
+#define __NR_syscall_max __NR_plsignal
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
Index: linux-2.6/arch/x86_64/ia32/ia32entry.S
===================================================================
--- linux-2.6.orig/arch/x86_64/ia32/ia32entry.S
+++ linux-2.6/arch/x86_64/ia32/ia32entry.S
@@ -714,9 +714,10 @@ ia32_sys_call_table:
.quad compat_sys_get_robust_list
.quad sys_splice
.quad sys_sync_file_range
- .quad sys_tee
+ .quad sys_tee /* 315 */
.quad compat_sys_vmsplice
.quad compat_sys_move_pages
.quad sys_getcpu
.quad sys_epoll_pwait
+ .quad sys_plsignal /* 320 */
ia32_syscall_end:
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 09/22] pollfs: pollable hrtimers
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (7 preceding siblings ...)
2007-05-02 5:22 ` [patch 08/22] pollfs: x86_64, " Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 21:16 ` Thomas Gleixner
2007-05-02 5:22 ` [patch 10/22] pollfs: export the pltimer system call Davi Arnaut
` (13 subsequent siblings)
22 siblings, 1 reply; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-timer.patch --]
[-- Type: text/plain, Size: 5604 bytes --]
Per file descriptor high-resolution timers. A classic unix file interface for
the POSIX timer_(create|settime|gettime|delete) family of functions.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
fs/pollfs/Makefile | 1
fs/pollfs/timer.c | 198 +++++++++++++++++++++++++++++++++++++++++++++++++++++
init/Kconfig | 7 +
3 files changed, 206 insertions(+)
Index: linux-2.6/fs/pollfs/timer.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/timer.c
@@ -0,0 +1,198 @@
+/*
+ * pollable timers
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/hrtimer.h>
+
+struct pfs_timer {
+ wait_queue_head_t wait;
+ ktime_t interval;
+ spinlock_t lock;
+ unsigned long overruns;
+ struct hrtimer timer;
+ struct pfs_file file;
+};
+
+struct hrtimerspec {
+ int flags;
+ clockid_t clock;
+ struct itimerspec expr;
+};
+
+static ssize_t read(struct pfs_timer *evs, struct itimerspec __user *uspec)
+{
+ int ret = -EAGAIN;
+ ktime_t remaining = {};
+ unsigned long overruns = 0;
+ struct itimerspec spec = {};
+ struct hrtimer *timer = &evs->timer;
+
+ spin_lock_irq(&evs->lock);
+
+ if (!evs->overruns)
+ goto out_unlock;
+
+ if (hrtimer_active(timer))
+ remaining = hrtimer_get_remaining(timer);
+ else if (evs->interval.tv64 > 0)
+ overruns = hrtimer_forward(timer, hrtimer_cb_get_time(timer),
+ evs->interval);
+
+ ret = -EOVERFLOW;
+ if (overruns > (ULONG_MAX - evs->overruns))
+ goto out_unlock;
+ else
+ evs->overruns += overruns;
+
+ if (remaining.tv64 > 0)
+ spec.it_value = ktime_to_timespec(remaining);
+
+ spec.it_interval = ktime_to_timespec(evs->interval);
+
+ ret = 0;
+
+out_unlock:
+ spin_unlock_irq(&evs->lock);
+
+ if (ret)
+ return ret;
+
+ if (copy_to_user(uspec, &spec, sizeof(spec)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static enum hrtimer_restart timer_fn(struct hrtimer *timer)
+{
+ struct pfs_timer *evs = container_of(timer, struct pfs_timer, timer);
+ unsigned long flags;
+
+ spin_lock_irqsave(&evs->lock, flags);
+ /* timer tick, interval has elapsed */
+ if (!evs->overruns++)
+ wake_up_all(&evs->wait);
+ spin_unlock_irqrestore(&evs->lock, flags);
+
+ return HRTIMER_NORESTART;
+}
+
+static inline void rearm_timer(struct pfs_timer *evs, struct hrtimerspec *spec)
+{
+ struct hrtimer *timer = &evs->timer;
+ enum hrtimer_mode mode = HRTIMER_MODE_REL;
+
+ if (spec->flags & TIMER_ABSTIME)
+ mode = HRTIMER_MODE_ABS;
+
+ do {
+ spin_lock_irq(&evs->lock);
+ if (hrtimer_try_to_cancel(timer) >= 0)
+ break;
+ spin_unlock_irq(&evs->lock);
+ cpu_relax();
+ } while (1);
+
+ hrtimer_init(timer, spec->clock, mode);
+
+ timer->function = timer_fn;
+ timer->expires = timespec_to_ktime(spec->expr.it_value);
+ evs->interval = timespec_to_ktime(spec->expr.it_interval);
+
+ if (timer->expires.tv64)
+ hrtimer_start(timer, timer->expires, mode);
+
+ spin_unlock_irq(&evs->lock);
+}
+
+static inline int spec_invalid(const struct hrtimerspec *spec)
+{
+ if (spec->clock != CLOCK_REALTIME && spec->clock != CLOCK_MONOTONIC)
+ return 1;
+
+ if (!timespec_valid(&spec->expr.it_value) ||
+ !timespec_valid(&spec->expr.it_interval))
+ return 1;
+
+ return 0;
+}
+
+static ssize_t write(struct pfs_timer *evs,
+ const struct hrtimerspec __user *uspec)
+{
+ struct hrtimerspec spec;
+
+ if (copy_from_user(&spec, uspec, sizeof(spec)))
+ return -EFAULT;
+
+ if (spec_invalid(&spec))
+ return -EINVAL;
+
+ rearm_timer(evs, &spec);
+
+ return 0;
+}
+
+static int poll(struct pfs_timer *evs)
+{
+ int ret;
+
+ ret = evs->overruns ? POLLIN : 0;
+
+ return ret;
+}
+
+static int release(struct pfs_timer *evs)
+{
+ hrtimer_cancel(&evs->timer);
+ kfree(evs);
+
+ return 0;
+}
+
+static const struct pfs_operations timer_ops = {
+ .read = PFS_READ(read, struct pfs_timer, struct itimerspec),
+ .write = PFS_WRITE(write, struct pfs_timer, struct hrtimerspec),
+ .poll = PFS_POLL(poll, struct pfs_timer),
+ .release = PFS_RELEASE(release, struct pfs_timer),
+ .rsize = sizeof(struct itimerspec),
+ .wsize = sizeof(struct hrtimerspec),
+};
+
+asmlinkage long sys_pltimer(void)
+{
+ long error;
+ struct pfs_timer *evs;
+
+ evs = kmalloc(sizeof(*evs), GFP_KERNEL);
+ if (!evs)
+ return -ENOMEM;
+
+ evs->overruns = 0;
+ spin_lock_init(&evs->lock);
+ init_waitqueue_head(&evs->wait);
+ hrtimer_init(&evs->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+
+ evs->file.data = evs;
+ evs->file.fops = &timer_ops;
+ evs->file.wait = &evs->wait;
+
+ error = pfs_open(&evs->file);
+
+ if (error < 0)
+ release(evs);
+
+ return error;
+}
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_POLLFS) += pollfs.o
pollfs-y := file.o
pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
+pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -476,6 +476,13 @@ config POLLFS_SIGNAL
help
Pollable signal support
+config POLLFS_TIMER
+ bool "Enable pollfs timer" if EMBEDDED
+ default y
+ depends on POLLFS
+ help
+ Pollable timer support
+
config SHMEM
bool "Use full shmem filesystem" if EMBEDDED
default y
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 10/22] pollfs: export the pltimer system call
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (8 preceding siblings ...)
2007-05-02 5:22 ` [patch 09/22] pollfs: pollable hrtimers Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 11/22] pollfs: x86, wire up " Davi Arnaut
` (12 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-timer-syscall.patch --]
[-- Type: text/plain, Size: 1023 bytes --]
Export the new pltimer syscall prototype. While there, make it "conditional".
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
include/linux/syscalls.h | 2 ++
kernel/sys_ni.c | 1 +
2 files changed, 3 insertions(+)
Index: linux-2.6/include/linux/syscalls.h
===================================================================
--- linux-2.6.orig/include/linux/syscalls.h
+++ linux-2.6/include/linux/syscalls.h
@@ -607,4 +607,6 @@ int kernel_execve(const char *filename,
asmlinkage long sys_plsignal(const sigset_t __user * set);
+asmlinkage long sys_pltimer(void);
+
#endif
Index: linux-2.6/kernel/sys_ni.c
===================================================================
--- linux-2.6.orig/kernel/sys_ni.c
+++ linux-2.6/kernel/sys_ni.c
@@ -113,6 +113,7 @@ cond_syscall(sys_vm86);
cond_syscall(compat_sys_ipc);
cond_syscall(compat_sys_sysctl);
cond_syscall(sys_plsignal);
+cond_syscall(sys_pltimer);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 11/22] pollfs: x86, wire up the pltimer system call
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (9 preceding siblings ...)
2007-05-02 5:22 ` [patch 10/22] pollfs: export the pltimer system call Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 12/22] pollfs: x86_64, " Davi Arnaut
` (11 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-timer-i386-syscall.patch --]
[-- Type: text/plain, Size: 1089 bytes --]
Make the pltimer syscall available to user-space on x86.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
arch/i386/kernel/syscall_table.S | 1 +
include/asm-i386/unistd.h | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
Index: linux-2.6/include/asm-i386/unistd.h
===================================================================
--- linux-2.6.orig/include/asm-i386/unistd.h
+++ linux-2.6/include/asm-i386/unistd.h
@@ -326,10 +326,11 @@
#define __NR_getcpu 318
#define __NR_epoll_pwait 319
#define __NR_plsignal 320
+#define __NR_pltimer 321
#ifdef __KERNEL__
-#define NR_syscalls 321
+#define NR_syscalls 322
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
Index: linux-2.6/arch/i386/kernel/syscall_table.S
===================================================================
--- linux-2.6.orig/arch/i386/kernel/syscall_table.S
+++ linux-2.6/arch/i386/kernel/syscall_table.S
@@ -320,3 +320,4 @@ ENTRY(sys_call_table)
.long sys_getcpu
.long sys_epoll_pwait
.long sys_plsignal /* 320 */
+ .long sys_pltimer
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 12/22] pollfs: x86_64, wire up the pltimer system call
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (10 preceding siblings ...)
2007-05-02 5:22 ` [patch 11/22] pollfs: x86, wire up " Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 13/22] pollfs: asynchronous futex wait Davi Arnaut
` (10 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-timer-x86_64-syscall.patch --]
[-- Type: text/plain, Size: 1192 bytes --]
Make the pltimer syscall available to user-space on x86_64.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
arch/x86_64/ia32/ia32entry.S | 1 +
include/asm-x86_64/unistd.h | 4 +++-
2 files changed, 4 insertions(+), 1 deletion(-)
Index: linux-2.6/arch/x86_64/ia32/ia32entry.S
===================================================================
--- linux-2.6.orig/arch/x86_64/ia32/ia32entry.S
+++ linux-2.6/arch/x86_64/ia32/ia32entry.S
@@ -720,4 +720,5 @@ ia32_sys_call_table:
.quad sys_getcpu
.quad sys_epoll_pwait
.quad sys_plsignal /* 320 */
+ .quad sys_pltimer
ia32_syscall_end:
Index: linux-2.6/include/asm-x86_64/unistd.h
===================================================================
--- linux-2.6.orig/include/asm-x86_64/unistd.h
+++ linux-2.6/include/asm-x86_64/unistd.h
@@ -621,8 +621,10 @@ __SYSCALL(__NR_vmsplice, sys_vmsplice)
__SYSCALL(__NR_move_pages, sys_move_pages)
#define __NR_plsignal 280
__SYSCALL(__NR_plsignal, sys_plsignal)
+#define __NR_pltimer 281
+__SYSCALL(__NR_pltimer, sys_pltimer)
-#define __NR_syscall_max __NR_plsignal
+#define __NR_syscall_max __NR_pltimer
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 13/22] pollfs: asynchronous futex wait
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (11 preceding siblings ...)
2007-05-02 5:22 ` [patch 12/22] pollfs: x86_64, " Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 14/22] pollfs: pollable futex Davi Arnaut
` (9 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-futex-async-wait.patch --]
[-- Type: text/plain, Size: 7950 bytes --]
Break apart and export the futex_wait function in order to be able to
associate (wait for) a futex with other resources.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
include/linux/futex.h | 80 ++++++++++++++++++++++++++++++
kernel/futex.c | 130 ++++++++++++++------------------------------------
2 files changed, 118 insertions(+), 92 deletions(-)
Index: linux-2.6/kernel/futex.c
===================================================================
--- linux-2.6.orig/kernel/futex.c
+++ linux-2.6/kernel/futex.c
@@ -55,81 +55,6 @@
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
/*
- * Futexes are matched on equal values of this key.
- * The key type depends on whether it's a shared or private mapping.
- * Don't rearrange members without looking at hash_futex().
- *
- * offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
- * We set bit 0 to indicate if it's an inode-based key.
- */
-union futex_key {
- struct {
- unsigned long pgoff;
- struct inode *inode;
- int offset;
- } shared;
- struct {
- unsigned long address;
- struct mm_struct *mm;
- int offset;
- } private;
- struct {
- unsigned long word;
- void *ptr;
- int offset;
- } both;
-};
-
-/*
- * Priority Inheritance state:
- */
-struct futex_pi_state {
- /*
- * list of 'owned' pi_state instances - these have to be
- * cleaned up in do_exit() if the task exits prematurely:
- */
- struct list_head list;
-
- /*
- * The PI object:
- */
- struct rt_mutex pi_mutex;
-
- struct task_struct *owner;
- atomic_t refcount;
-
- union futex_key key;
-};
-
-/*
- * We use this hashed waitqueue instead of a normal wait_queue_t, so
- * we can wake only the relevant ones (hashed queues may be shared).
- *
- * A futex_q has a woken state, just like tasks have TASK_RUNNING.
- * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0.
- * The order of wakup is always to make the first condition true, then
- * wake up q->waiters, then make the second condition true.
- */
-struct futex_q {
- struct list_head list;
- wait_queue_head_t waiters;
-
- /* Which hash list lock to use: */
- spinlock_t *lock_ptr;
-
- /* Key which the futex is hashed on: */
- union futex_key key;
-
- /* For fd, sigio sent using these: */
- int fd;
- struct file *filp;
-
- /* Optional priority inheritance state: */
- struct futex_pi_state *pi_state;
- struct task_struct *task;
-};
-
-/*
* Split the global futex_lock into every hash list lock.
*/
struct futex_hash_bucket {
@@ -904,8 +829,6 @@ queue_lock(struct futex_q *q, int fd, st
q->fd = fd;
q->filp = filp;
- init_waitqueue_head(&q->waiters);
-
get_key_refs(&q->key);
hb = hash_futex(&q->key);
q->lock_ptr = &hb->lock;
@@ -938,6 +861,7 @@ static void queue_me(struct futex_q *q,
{
struct futex_hash_bucket *hb;
+ init_waitqueue_head(&q->waiters);
hb = queue_lock(q, fd, filp);
__queue_me(q, hb);
}
@@ -1002,24 +926,22 @@ static void unqueue_me_pi(struct futex_q
drop_key_refs(&q->key);
}
-static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
+int futex_wait_queue(struct futex_q *q, u32 __user *uaddr, u32 val)
{
struct task_struct *curr = current;
- DECLARE_WAITQUEUE(wait, curr);
struct futex_hash_bucket *hb;
- struct futex_q q;
u32 uval;
int ret;
- q.pi_state = NULL;
+ q->pi_state = NULL;
retry:
down_read(&curr->mm->mmap_sem);
- ret = get_futex_key(uaddr, &q.key);
+ ret = get_futex_key(uaddr, &q->key);
if (unlikely(ret != 0))
goto out_release_sem;
- hb = queue_lock(&q, -1, NULL);
+ hb = queue_lock(q, -1, NULL);
/*
* Access the page AFTER the futex is queued.
@@ -1044,7 +966,7 @@ static int futex_wait(u32 __user *uaddr,
ret = get_futex_value_locked(&uval, uaddr);
if (unlikely(ret)) {
- queue_unlock(&q, hb);
+ queue_unlock(q, hb);
/*
* If we would have faulted, release mmap_sem, fault it in and
@@ -1063,14 +985,37 @@ static int futex_wait(u32 __user *uaddr,
goto out_unlock_release_sem;
/* Only actually queue if *uaddr contained val. */
- __queue_me(&q, hb);
+ __queue_me(q, hb);
/*
* Now the futex is queued and we have checked the data, we
- * don't want to hold mmap_sem while we sleep.
+ * don't want to hold mmap_sem while we (might) sleep.
*/
up_read(&curr->mm->mmap_sem);
+ return 0;
+
+ out_unlock_release_sem:
+ queue_unlock(q, hb);
+
+ out_release_sem:
+ up_read(&curr->mm->mmap_sem);
+ return ret;
+}
+
+static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
+{
+ int ret;
+ struct futex_q q;
+ DECLARE_WAITQUEUE(wait, current);
+
+ init_waitqueue_head(&q.waiters);
+
+ ret = futex_wait_queue(&q, uaddr, val);
+
+ if (ret)
+ return ret;
+
/*
* There might have been scheduling since the queue_me(), as we
* cannot hold a spinlock across the get_user() in case it
@@ -1106,13 +1051,12 @@ static int futex_wait(u32 __user *uaddr,
* have handled it for us already.
*/
return -EINTR;
+}
- out_unlock_release_sem:
- queue_unlock(&q, hb);
-
- out_release_sem:
- up_read(&curr->mm->mmap_sem);
- return ret;
+/* Return 1 if we were still queued, 0 means we were woken. */
+int futex_wait_unqueue(struct futex_q *q)
+{
+ return unqueue_me(q);
}
/*
@@ -1142,6 +1086,8 @@ static int futex_lock_pi(u32 __user *uad
}
q.pi_state = NULL;
+
+ init_waitqueue_head(&q.waiters);
retry:
down_read(&curr->mm->mmap_sem);
Index: linux-2.6/include/linux/futex.h
===================================================================
--- linux-2.6.orig/include/linux/futex.h
+++ linux-2.6/include/linux/futex.h
@@ -94,12 +94,92 @@ struct robust_list_head {
#define ROBUST_LIST_LIMIT 2048
#ifdef __KERNEL__
+
+#include <linux/rtmutex.h>
+
+/*
+ * Futexes are matched on equal values of this key.
+ * The key type depends on whether it's a shared or private mapping.
+ * Don't rearrange members without looking at hash_futex().
+ *
+ * offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
+ * We set bit 0 to indicate if it's an inode-based key.
+ */
+union futex_key {
+ struct {
+ unsigned long pgoff;
+ struct inode *inode;
+ int offset;
+ } shared;
+ struct {
+ unsigned long address;
+ struct mm_struct *mm;
+ int offset;
+ } private;
+ struct {
+ unsigned long word;
+ void *ptr;
+ int offset;
+ } both;
+};
+
+/*
+ * Priority Inheritance state:
+ */
+struct futex_pi_state {
+ /*
+ * list of 'owned' pi_state instances - these have to be
+ * cleaned up in do_exit() if the task exits prematurely:
+ */
+ struct list_head list;
+
+ /*
+ * The PI object:
+ */
+ struct rt_mutex pi_mutex;
+
+ struct task_struct *owner;
+ atomic_t refcount;
+
+ union futex_key key;
+};
+
+/*
+ * We use this hashed waitqueue instead of a normal wait_queue_t, so
+ * we can wake only the relevant ones (hashed queues may be shared).
+ *
+ * A futex_q has a woken state, just like tasks have TASK_RUNNING.
+ * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0.
+ * The order of wakup is always to make the first condition true, then
+ * wake up q->waiters, then make the second condition true.
+ */
+struct futex_q {
+ struct list_head list;
+ wait_queue_head_t waiters;
+
+ /* Which hash list lock to use: */
+ spinlock_t *lock_ptr;
+
+ /* Key which the futex is hashed on: */
+ union futex_key key;
+
+ /* For fd, sigio sent using these: */
+ int fd;
+ struct file *filp;
+
+ /* Optional priority inheritance state: */
+ struct futex_pi_state *pi_state;
+ struct task_struct *task;
+};
long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
extern int
handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
+extern int futex_wait_queue(struct futex_q *q, u32 __user *uaddr, u32 val);
+extern int futex_wait_unqueue(struct futex_q *q);
+
#ifdef CONFIG_FUTEX
extern void exit_robust_list(struct task_struct *curr);
extern void exit_pi_state_list(struct task_struct *curr);
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 14/22] pollfs: pollable futex
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (12 preceding siblings ...)
2007-05-02 5:22 ` [patch 13/22] pollfs: asynchronous futex wait Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:54 ` Eric Dumazet
2007-05-02 7:40 ` Ulrich Drepper
2007-05-02 5:22 ` [patch 15/22] pollfs: export the plfutex system call Davi Arnaut
` (8 subsequent siblings)
22 siblings, 2 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-futex.patch --]
[-- Type: text/plain, Size: 4428 bytes --]
Asynchronously wait for FUTEX_WAKE operation on a futex if it still contains
a given value. There can be only one futex wait per file descriptor. However,
it can be rearmed (possibly at a different address) anytime.
The pollable futex approach is far superior (send and receive events from
userspace or kernel) to eventfd and fixes (supercedes) FUTEX_FD at the same time.
Building block for pollable semaphores and user-defined events.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
fs/pollfs/Makefile | 1
fs/pollfs/futex.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++
init/Kconfig | 7 ++
3 files changed, 162 insertions(+)
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -3,3 +3,4 @@ pollfs-y := file.o
pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
+pollfs-$(CONFIG_POLLFS_FUTEX) += futex.o
Index: linux-2.6/fs/pollfs/futex.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/futex.c
@@ -0,0 +1,154 @@
+/*
+ * pollable futex
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/futex.h>
+
+struct futex_event {
+ union {
+ void __user *addr;
+ u64 padding;
+ };
+ int val;
+};
+
+struct pfs_futex {
+ struct futex_q q;
+ struct futex_event fevt;
+ struct mutex mutex;
+ unsigned volatile queued;
+ struct pfs_file file;
+};
+
+static ssize_t read(struct pfs_futex *evs, struct futex_event __user *ufevt)
+{
+ int ret;
+ struct futex_event fevt;
+
+ mutex_lock(&evs->mutex);
+
+ fevt = evs->fevt;
+
+ ret = -EAGAIN;
+
+ if (!evs->queued)
+ ret = -EINVAL;
+ else if (list_empty(&evs->q.list))
+ ret = futex_wait_unqueue(&evs->q);
+
+ switch (ret) {
+ case 1:
+ ret = -EAGAIN;
+ case 0:
+ evs->queued = 0;
+ }
+
+ mutex_unlock(&evs->mutex);
+
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user(ufevt, &fevt, sizeof(fevt)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static ssize_t write(struct pfs_futex *evs,
+ const struct futex_event __user *ufevt)
+{
+ int ret;
+ struct futex_event fevt;
+
+ if (copy_from_user(&fevt, ufevt, sizeof(fevt)))
+ return -EFAULT;
+
+ mutex_lock(&evs->mutex);
+
+ if (evs->queued)
+ futex_wait_unqueue(&evs->q);
+
+ ret = futex_wait_queue(&evs->q, fevt.addr, fevt.val);
+
+ if (ret)
+ evs->queued = 0;
+ else {
+ evs->queued = 1;
+ evs->fevt = fevt;
+ }
+
+ mutex_unlock(&evs->mutex);
+
+ return ret;
+}
+
+static int poll(struct pfs_futex *evs)
+{
+ int ret;
+
+ while (!mutex_trylock(&evs->mutex))
+ cpu_relax();
+
+ ret = evs->queued && list_empty(&evs->q.list) ? POLLIN : 0;
+
+ mutex_unlock(&evs->mutex);
+
+ return ret;
+}
+
+static int release(struct pfs_futex *evs)
+{
+ if (evs->queued)
+ futex_wait_unqueue(&evs->q);
+
+ mutex_destroy(&evs->mutex);
+
+ kfree(evs);
+
+ return 0;
+}
+
+static const struct pfs_operations futex_ops = {
+ .read = PFS_READ(read, struct pfs_futex, struct futex_event),
+ .write = PFS_WRITE(write, struct pfs_futex, struct futex_event),
+ .poll = PFS_POLL(poll, struct pfs_futex),
+ .release = PFS_RELEASE(release, struct pfs_futex),
+ .rsize = sizeof(struct futex_event),
+ .wsize = sizeof(struct futex_event),
+};
+
+asmlinkage long sys_plfutex(void)
+{
+ long error;
+ struct pfs_futex *evs;
+
+ evs = kzalloc(sizeof(*evs), GFP_KERNEL);
+ if (!evs)
+ return -ENOMEM;
+
+ mutex_init(&evs->mutex);
+ init_waitqueue_head(&evs->q.waiters);
+
+ evs->file.data = evs;
+ evs->file.fops = &futex_ops;
+ evs->file.wait = &evs->q.waiters;
+
+ error = pfs_open(&evs->file);
+
+ if (error < 0)
+ release(evs);
+
+ return error;
+}
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -483,6 +483,13 @@ config POLLFS_TIMER
help
Pollable timer support
+config POLLFS_FUTEX
+ bool "Enable pollfs futex" if EMBEDDED
+ default y
+ depends on POLLFS && FUTEX
+ help
+ Pollable futex support
+
config SHMEM
bool "Use full shmem filesystem" if EMBEDDED
default y
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 15/22] pollfs: export the plfutex system call
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (13 preceding siblings ...)
2007-05-02 5:22 ` [patch 14/22] pollfs: pollable futex Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 16/22] pollfs: x86, wire up " Davi Arnaut
` (7 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-futex-syscall.patch --]
[-- Type: text/plain, Size: 1002 bytes --]
Export the new plfutex syscall prototype. While there, make it "conditional".
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
include/linux/syscalls.h | 2 ++
kernel/sys_ni.c | 1 +
2 files changed, 3 insertions(+)
Index: linux-2.6/include/linux/syscalls.h
===================================================================
--- linux-2.6.orig/include/linux/syscalls.h
+++ linux-2.6/include/linux/syscalls.h
@@ -609,4 +609,6 @@ asmlinkage long sys_plsignal(const sigse
asmlinkage long sys_pltimer(void);
+asmlinkage long sys_plfutex(void);
+
#endif
Index: linux-2.6/kernel/sys_ni.c
===================================================================
--- linux-2.6.orig/kernel/sys_ni.c
+++ linux-2.6/kernel/sys_ni.c
@@ -114,6 +114,7 @@ cond_syscall(compat_sys_ipc);
cond_syscall(compat_sys_sysctl);
cond_syscall(sys_plsignal);
cond_syscall(sys_pltimer);
+cond_syscall(sys_plfutex);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 16/22] pollfs: x86, wire up the plfutex system call
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (14 preceding siblings ...)
2007-05-02 5:22 ` [patch 15/22] pollfs: export the plfutex system call Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 17/22] pollfs: x86_64, " Davi Arnaut
` (6 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-futex-i386-syscall.patch --]
[-- Type: text/plain, Size: 1091 bytes --]
Make the plfutex syscall available to user-space on x86.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
arch/i386/kernel/syscall_table.S | 1 +
include/asm-i386/unistd.h | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
Index: linux-2.6/include/asm-i386/unistd.h
===================================================================
--- linux-2.6.orig/include/asm-i386/unistd.h
+++ linux-2.6/include/asm-i386/unistd.h
@@ -327,10 +327,11 @@
#define __NR_epoll_pwait 319
#define __NR_plsignal 320
#define __NR_pltimer 321
+#define __NR_plfutex 322
#ifdef __KERNEL__
-#define NR_syscalls 322
+#define NR_syscalls 323
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
Index: linux-2.6/arch/i386/kernel/syscall_table.S
===================================================================
--- linux-2.6.orig/arch/i386/kernel/syscall_table.S
+++ linux-2.6/arch/i386/kernel/syscall_table.S
@@ -321,3 +321,4 @@ ENTRY(sys_call_table)
.long sys_epoll_pwait
.long sys_plsignal /* 320 */
.long sys_pltimer
+ .long sys_plfutex
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 17/22] pollfs: x86_64, wire up the plfutex system call
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (15 preceding siblings ...)
2007-05-02 5:22 ` [patch 16/22] pollfs: x86, wire up " Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 18/22] pollfs: check if a AIO event ring is empty Davi Arnaut
` (5 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-futex-x86_64-syscall.patch --]
[-- Type: text/plain, Size: 1187 bytes --]
Make the plfutex syscall available to user-space on x86_64.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
arch/x86_64/ia32/ia32entry.S | 1 +
include/asm-x86_64/unistd.h | 4 +++-
2 files changed, 4 insertions(+), 1 deletion(-)
Index: linux-2.6/arch/x86_64/ia32/ia32entry.S
===================================================================
--- linux-2.6.orig/arch/x86_64/ia32/ia32entry.S
+++ linux-2.6/arch/x86_64/ia32/ia32entry.S
@@ -721,4 +721,5 @@ ia32_sys_call_table:
.quad sys_epoll_pwait
.quad sys_plsignal /* 320 */
.quad sys_pltimer
+ .quad sys_plfutex
ia32_syscall_end:
Index: linux-2.6/include/asm-x86_64/unistd.h
===================================================================
--- linux-2.6.orig/include/asm-x86_64/unistd.h
+++ linux-2.6/include/asm-x86_64/unistd.h
@@ -623,8 +623,10 @@ __SYSCALL(__NR_move_pages, sys_move_page
__SYSCALL(__NR_plsignal, sys_plsignal)
#define __NR_pltimer 281
__SYSCALL(__NR_pltimer, sys_pltimer)
+#define __NR_plfutex 282
+__SYSCALL(__NR_plfutex, sys_plfutex)
-#define __NR_syscall_max __NR_pltimer
+#define __NR_syscall_max __NR_plfutex
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 18/22] pollfs: check if a AIO event ring is empty
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (16 preceding siblings ...)
2007-05-02 5:22 ` [patch 17/22] pollfs: x86_64, " Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 19/22] pollfs: pollable aio Davi Arnaut
` (4 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-aio-ring-empty.patch --]
[-- Type: text/plain, Size: 1604 bytes --]
The aio_ring_empty() function returns true if the AIO event ring has no
elements, false otherwise.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
fs/aio.c | 17 +++++++++++++++++
include/linux/aio.h | 1 +
2 files changed, 18 insertions(+)
Index: linux-2.6/fs/aio.c
===================================================================
--- linux-2.6.orig/fs/aio.c
+++ linux-2.6/fs/aio.c
@@ -1004,6 +1004,23 @@ put_rq:
return ret;
}
+int fastcall aio_ring_empty(struct kioctx *ioctx)
+{
+ struct aio_ring_info *info = &ioctx->ring_info;
+ struct aio_ring *ring;
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&ioctx->ctx_lock, flags);
+ ring = kmap_atomic(info->ring_pages[0], KM_IRQ1);
+ if (ring->head == ring->tail)
+ ret = 1;
+ kunmap_atomic(ring, KM_IRQ1);
+ spin_unlock_irqrestore(&ioctx->ctx_lock, flags);
+
+ return ret;
+}
+
/* aio_read_evt
* Pull an event off of the ioctx's event ring. Returns the number of
* events fetched (0 or 1 ;-)
Index: linux-2.6/include/linux/aio.h
===================================================================
--- linux-2.6.orig/include/linux/aio.h
+++ linux-2.6/include/linux/aio.h
@@ -202,6 +202,7 @@ extern unsigned aio_max_size;
extern ssize_t FASTCALL(wait_on_sync_kiocb(struct kiocb *iocb));
extern int FASTCALL(aio_put_req(struct kiocb *iocb));
+extern int FASTCALL(aio_ring_empty(struct kioctx *ioctx));
extern void FASTCALL(kick_iocb(struct kiocb *iocb));
extern int FASTCALL(aio_complete(struct kiocb *iocb, long res, long res2));
extern void FASTCALL(__put_ioctx(struct kioctx *ctx));
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 19/22] pollfs: pollable aio
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (17 preceding siblings ...)
2007-05-02 5:22 ` [patch 18/22] pollfs: check if a AIO event ring is empty Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 20/22] pollfs: export the plaio system call Davi Arnaut
` (3 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-aio.patch --]
[-- Type: text/plain, Size: 3414 bytes --]
Submit, retrieve, or poll aio requests for completion through a
file descriptor. User supplies a aio_context_t that is used to
fetch a reference to the kioctx. Once the file descriptor is
closed, the reference is decremented.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
fs/pollfs/Makefile | 1
fs/pollfs/aio.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++
init/Kconfig | 7 +++
3 files changed, 111 insertions(+)
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -4,3 +4,4 @@ pollfs-y := file.o
pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
pollfs-$(CONFIG_POLLFS_FUTEX) += futex.o
+pollfs-$(CONFIG_POLLFS_AIO) += aio.o
Index: linux-2.6/fs/pollfs/aio.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/aio.c
@@ -0,0 +1,103 @@
+/*
+ * pollable aio
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/aio.h>
+#include <linux/syscalls.h>
+
+struct pfs_aio {
+ struct kioctx *ioctx;
+ struct pfs_file file;
+};
+
+static ssize_t read(struct pfs_aio *evs, struct io_event __user *uioevt)
+{
+ int ret;
+
+ ret = sys_io_getevents(evs->ioctx->user_id, 0, 1, uioevt, NULL);
+
+ if (!ret)
+ ret = -EAGAIN;
+ else if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
+static ssize_t write(struct pfs_aio *evs, const struct iocb __user *uiocb)
+{
+ struct iocb iocb;
+
+ if (copy_from_user(&iocb, uiocb, sizeof(iocb)))
+ return -EFAULT;
+
+ return io_submit_one(evs->ioctx, uiocb, &iocb);
+}
+
+static int poll(struct pfs_aio *evs)
+{
+ int ret;
+
+ ret = aio_ring_empty(evs->ioctx) ? 0 : POLLIN;
+
+ return ret;
+}
+
+static int release(struct pfs_aio *evs)
+{
+ put_ioctx(evs->ioctx);
+
+ kfree(evs);
+
+ return 0;
+}
+
+static const struct pfs_operations aio_ops = {
+ .read = PFS_READ(read, struct pfs_aio, struct io_event),
+ .write = PFS_WRITE(write, struct pfs_aio, struct iocb),
+ .poll = PFS_POLL(poll, struct pfs_aio),
+ .release = PFS_RELEASE(release, struct pfs_aio),
+ .rsize = sizeof(struct io_event),
+ .wsize = sizeof(struct iocb),
+};
+
+asmlinkage long sys_plaio(aio_context_t ctx)
+{
+ long error;
+ struct pfs_aio *evs;
+ struct kioctx *ioctx = lookup_ioctx(ctx);
+
+ if (!ioctx)
+ return -EINVAL;
+
+ evs = kzalloc(sizeof(*evs), GFP_KERNEL);
+ if (!evs) {
+ put_ioctx(ioctx);
+ return -ENOMEM;
+ }
+
+ evs->ioctx = ioctx;
+
+ evs->file.data = evs;
+ evs->file.fops = &aio_ops;
+ evs->file.wait = &ioctx->wait;
+
+ error = pfs_open(&evs->file);
+
+ if (error < 0)
+ release(evs);
+
+ return error;
+}
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -490,6 +490,13 @@ config POLLFS_FUTEX
help
Pollable futex support
+config POLLFS_AIO
+ bool "Enable pollfs aio" if EMBEDDED
+ default y
+ depends on POLLFS
+ help
+ Pollable aio support
+
config SHMEM
bool "Use full shmem filesystem" if EMBEDDED
default y
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 20/22] pollfs: export the plaio system call
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (18 preceding siblings ...)
2007-05-02 5:22 ` [patch 19/22] pollfs: pollable aio Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 21/22] pollfs: x86, wire up " Davi Arnaut
` (2 subsequent siblings)
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-aio-syscall.patch --]
[-- Type: text/plain, Size: 1000 bytes --]
Export the new plaio syscall prototype. While there, make it "conditional".
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
include/linux/syscalls.h | 2 ++
kernel/sys_ni.c | 1 +
2 files changed, 3 insertions(+)
Index: linux-2.6/include/linux/syscalls.h
===================================================================
--- linux-2.6.orig/include/linux/syscalls.h
+++ linux-2.6/include/linux/syscalls.h
@@ -611,4 +611,6 @@ asmlinkage long sys_pltimer(void);
asmlinkage long sys_plfutex(void);
+asmlinkage long sys_plaio(aio_context_t ctx);
+
#endif
Index: linux-2.6/kernel/sys_ni.c
===================================================================
--- linux-2.6.orig/kernel/sys_ni.c
+++ linux-2.6/kernel/sys_ni.c
@@ -115,6 +115,7 @@ cond_syscall(compat_sys_sysctl);
cond_syscall(sys_plsignal);
cond_syscall(sys_pltimer);
cond_syscall(sys_plfutex);
+cond_syscall(sys_plaio);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 21/22] pollfs: x86, wire up the plaio system call
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (19 preceding siblings ...)
2007-05-02 5:22 ` [patch 20/22] pollfs: export the plaio system call Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 5:22 ` [patch 22/22] pollfs: x86_64, " Davi Arnaut
2007-05-02 6:05 ` [patch 00/22] pollfs: filesystem abstraction for pollable objects Andrew Morton
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-aio-i386-syscall.patch --]
[-- Type: text/plain, Size: 1078 bytes --]
Make the plaio syscall available to user-space on x86.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
arch/i386/kernel/syscall_table.S | 1 +
include/asm-i386/unistd.h | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
Index: linux-2.6/include/asm-i386/unistd.h
===================================================================
--- linux-2.6.orig/include/asm-i386/unistd.h
+++ linux-2.6/include/asm-i386/unistd.h
@@ -328,10 +328,11 @@
#define __NR_plsignal 320
#define __NR_pltimer 321
#define __NR_plfutex 322
+#define __NR_plaio 323
#ifdef __KERNEL__
-#define NR_syscalls 323
+#define NR_syscalls 324
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
Index: linux-2.6/arch/i386/kernel/syscall_table.S
===================================================================
--- linux-2.6.orig/arch/i386/kernel/syscall_table.S
+++ linux-2.6/arch/i386/kernel/syscall_table.S
@@ -322,3 +322,4 @@ ENTRY(sys_call_table)
.long sys_plsignal /* 320 */
.long sys_pltimer
.long sys_plfutex
+ .long sys_plaio
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* [patch 22/22] pollfs: x86_64, wire up the plaio system call
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (20 preceding siblings ...)
2007-05-02 5:22 ` [patch 21/22] pollfs: x86, wire up " Davi Arnaut
@ 2007-05-02 5:22 ` Davi Arnaut
2007-05-02 6:05 ` [patch 00/22] pollfs: filesystem abstraction for pollable objects Andrew Morton
22 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 5:22 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
[-- Attachment #1: pollfs-aio-x86_64-syscall.patch --]
[-- Type: text/plain, Size: 1167 bytes --]
Make the plaio syscall available to user-space on x86_64.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
arch/x86_64/ia32/ia32entry.S | 1 +
include/asm-x86_64/unistd.h | 4 +++-
2 files changed, 4 insertions(+), 1 deletion(-)
Index: linux-2.6/arch/x86_64/ia32/ia32entry.S
===================================================================
--- linux-2.6.orig/arch/x86_64/ia32/ia32entry.S
+++ linux-2.6/arch/x86_64/ia32/ia32entry.S
@@ -722,4 +722,5 @@ ia32_sys_call_table:
.quad sys_plsignal /* 320 */
.quad sys_pltimer
.quad sys_plfutex
+ .quad sys_plaio
ia32_syscall_end:
Index: linux-2.6/include/asm-x86_64/unistd.h
===================================================================
--- linux-2.6.orig/include/asm-x86_64/unistd.h
+++ linux-2.6/include/asm-x86_64/unistd.h
@@ -625,8 +625,10 @@ __SYSCALL(__NR_plsignal, sys_plsignal)
__SYSCALL(__NR_pltimer, sys_pltimer)
#define __NR_plfutex 282
__SYSCALL(__NR_plfutex, sys_plfutex)
+#define __NR_plaio 283
+__SYSCALL(__NR_plaio, sys_plaio)
-#define __NR_syscall_max __NR_plfutex
+#define __NR_syscall_max __NR_plaio
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
--
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 5:22 ` [patch 14/22] pollfs: pollable futex Davi Arnaut
@ 2007-05-02 5:54 ` Eric Dumazet
2007-05-02 6:16 ` Davi Arnaut
2007-05-02 7:40 ` Ulrich Drepper
1 sibling, 1 reply; 71+ messages in thread
From: Eric Dumazet @ 2007-05-02 5:54 UTC (permalink / raw)
To: Davi Arnaut
Cc: Andrew Morton, Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
Davi Arnaut a écrit :
> Asynchronously wait for FUTEX_WAKE operation on a futex if it still contains
> a given value. There can be only one futex wait per file descriptor. However,
> it can be rearmed (possibly at a different address) anytime.
>
> The pollable futex approach is far superior (send and receive events from
> userspace or kernel) to eventfd and fixes (supercedes) FUTEX_FD at the same time.
>
> Building block for pollable semaphores and user-defined events.
>
> Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
>
> ---
> fs/pollfs/Makefile | 1
> fs/pollfs/futex.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> init/Kconfig | 7 ++
> 3 files changed, 162 insertions(+)
>
> Index: linux-2.6/fs/pollfs/Makefile
> ===================================================================
> --- linux-2.6.orig/fs/pollfs/Makefile
> +++ linux-2.6/fs/pollfs/Makefile
> @@ -3,3 +3,4 @@ pollfs-y := file.o
>
> pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
> pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
> +pollfs-$(CONFIG_POLLFS_FUTEX) += futex.o
> Index: linux-2.6/fs/pollfs/futex.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6/fs/pollfs/futex.c
> @@ -0,0 +1,154 @@
> +/*
> + * pollable futex
> + *
> + * Copyright (C) 2007 Davi E. M. Arnaut
> + *
> + * Licensed under the GNU GPL. See the file COPYING for details.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/sched.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/err.h>
> +#include <linux/wait.h>
> +#include <linux/poll.h>
> +#include <linux/pollfs_fs.h>
> +#include <linux/futex.h>
> +
> +struct futex_event {
> + union {
> + void __user *addr;
> + u64 padding;
> + };
> + int val;
> +};
Hum... Here we might have a problem with 64 bit futexes, or private futexes
So I believe this interface is not well defined and not expandable: in case of
future additions to futexes, an old application compiled with an old pollable
futex_event type might fail.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 00/22] pollfs: filesystem abstraction for pollable objects
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
` (21 preceding siblings ...)
2007-05-02 5:22 ` [patch 22/22] pollfs: x86_64, " Davi Arnaut
@ 2007-05-02 6:05 ` Andrew Morton
2007-05-02 17:28 ` Davide Libenzi
22 siblings, 1 reply; 71+ messages in thread
From: Andrew Morton @ 2007-05-02 6:05 UTC (permalink / raw)
To: Davi Arnaut; +Cc: Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
On Wed, 02 May 2007 02:22:35 -0300 Davi Arnaut <davi@haxent.com.br> wrote:
> This patch set introduces a new file system for the delivery of pollable
> events through file descriptors. To the detriment of debugability, pollable
> objects are a nice adjunct to nonblocking/epoll/event-based servers.
>
> The pollfs filesystem abstraction provides better mechanisms needed for
> creating and maintaining pollable objects. Also the pollable futex approach
> is far superior (send and receive events from userspace or kernel) to eventfd
> and fixes (supercedes) FUTEX_FD at the same time.
>
> The (non) blocking and object size (user <-> kernel) semantics and are handled
> internally, decoupling the core filesystem from the "subsystems" (mere push and
> pop operations).
>
> Currently implemented waitable "objects" are: signals, futexes, ai/o blocks and
> timers.
Well that throws a spanner in the signalfd works. The code _looks_ nice
and simple and clean from a quick scan.
David, could you provide some feedback please? The patches are stunningly
free of comments, but you used to do that to me pretty often so my sympathy
is limited ;)
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 5:54 ` Eric Dumazet
@ 2007-05-02 6:16 ` Davi Arnaut
2007-05-02 6:39 ` Eric Dumazet
0 siblings, 1 reply; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 6:16 UTC (permalink / raw)
To: Eric Dumazet
Cc: Andrew Morton, Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
Eric Dumazet wrote:
> Davi Arnaut a écrit :
>> Asynchronously wait for FUTEX_WAKE operation on a futex if it still contains
>> a given value. There can be only one futex wait per file descriptor. However,
>> it can be rearmed (possibly at a different address) anytime.
>>
>> The pollable futex approach is far superior (send and receive events from
>> userspace or kernel) to eventfd and fixes (supercedes) FUTEX_FD at the same time.
>>
>> Building block for pollable semaphores and user-defined events.
>>
>> Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
>>
>> ---
>> fs/pollfs/Makefile | 1
>> fs/pollfs/futex.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>> init/Kconfig | 7 ++
>> 3 files changed, 162 insertions(+)
>>
>> Index: linux-2.6/fs/pollfs/Makefile
>> ===================================================================
>> --- linux-2.6.orig/fs/pollfs/Makefile
>> +++ linux-2.6/fs/pollfs/Makefile
>> @@ -3,3 +3,4 @@ pollfs-y := file.o
>>
>> pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
>> pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
>> +pollfs-$(CONFIG_POLLFS_FUTEX) += futex.o
>> Index: linux-2.6/fs/pollfs/futex.c
>> ===================================================================
>> --- /dev/null
>> +++ linux-2.6/fs/pollfs/futex.c
>> @@ -0,0 +1,154 @@
>> +/*
>> + * pollable futex
>> + *
>> + * Copyright (C) 2007 Davi E. M. Arnaut
>> + *
>> + * Licensed under the GNU GPL. See the file COPYING for details.
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/sched.h>
>> +#include <linux/module.h>
>> +#include <linux/slab.h>
>> +#include <linux/err.h>
>> +#include <linux/wait.h>
>> +#include <linux/poll.h>
>> +#include <linux/pollfs_fs.h>
>> +#include <linux/futex.h>
>> +
>> +struct futex_event {
>> + union {
>> + void __user *addr;
>> + u64 padding;
>> + };
>> + int val;
>> +};
>
> Hum... Here we might have a problem with 64 bit futexes, or private futexes
>
> So I believe this interface is not well defined and not expandable: in case of
> future additions to futexes, an old application compiled with an old pollable
> futex_event type might fail.
>
Hmm, how about:
struct futex_event {
union {
void __user *addr;
u64 padding;
};
union {
int val;
s64 val64;
};
/* whatever room is necessary for future improvements */
};
I haven't been keeping up with 64 bit or private futexes. What else
could probably go wrong?
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 6:16 ` Davi Arnaut
@ 2007-05-02 6:39 ` Eric Dumazet
2007-05-02 6:54 ` Davi Arnaut
2007-05-02 7:11 ` Davi Arnaut
0 siblings, 2 replies; 71+ messages in thread
From: Eric Dumazet @ 2007-05-02 6:39 UTC (permalink / raw)
To: Davi Arnaut
Cc: Andrew Morton, Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
Davi Arnaut a écrit :
> Eric Dumazet wrote:
>> Davi Arnaut a écrit :
>>> Asynchronously wait for FUTEX_WAKE operation on a futex if it still contains
>>> a given value. There can be only one futex wait per file descriptor. However,
>>> it can be rearmed (possibly at a different address) anytime.
>>>
>>> The pollable futex approach is far superior (send and receive events from
>>> userspace or kernel) to eventfd and fixes (supercedes) FUTEX_FD at the same time.
>>>
>>> Building block for pollable semaphores and user-defined events.
>>>
>>> Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
>>>
>>> ---
>>> fs/pollfs/Makefile | 1
>>> fs/pollfs/futex.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>> init/Kconfig | 7 ++
>>> 3 files changed, 162 insertions(+)
>>>
>>> Index: linux-2.6/fs/pollfs/Makefile
>>> ===================================================================
>>> --- linux-2.6.orig/fs/pollfs/Makefile
>>> +++ linux-2.6/fs/pollfs/Makefile
>>> @@ -3,3 +3,4 @@ pollfs-y := file.o
>>>
>>> pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
>>> pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
>>> +pollfs-$(CONFIG_POLLFS_FUTEX) += futex.o
>>> Index: linux-2.6/fs/pollfs/futex.c
>>> ===================================================================
>>> --- /dev/null
>>> +++ linux-2.6/fs/pollfs/futex.c
>>> @@ -0,0 +1,154 @@
>>> +/*
>>> + * pollable futex
>>> + *
>>> + * Copyright (C) 2007 Davi E. M. Arnaut
>>> + *
>>> + * Licensed under the GNU GPL. See the file COPYING for details.
>>> + */
>>> +
>>> +#include <linux/kernel.h>
>>> +#include <linux/sched.h>
>>> +#include <linux/module.h>
>>> +#include <linux/slab.h>
>>> +#include <linux/err.h>
>>> +#include <linux/wait.h>
>>> +#include <linux/poll.h>
>>> +#include <linux/pollfs_fs.h>
>>> +#include <linux/futex.h>
>>> +
>>> +struct futex_event {
>>> + union {
>>> + void __user *addr;
>>> + u64 padding;
>>> + };
>>> + int val;
>>> +};
>> Hum... Here we might have a problem with 64 bit futexes, or private futexes
>>
>> So I believe this interface is not well defined and not expandable: in case of
>> future additions to futexes, an old application compiled with an old pollable
>> futex_event type might fail.
>>
>
> Hmm, how about:
>
> struct futex_event {
> union {
> void __user *addr;
> u64 padding;
> };
> union {
> int val;
> s64 val64;
> };
> /* whatever room is necessary for future improvements */
> };
>
> I haven't been keeping up with 64 bit or private futexes. What else
> could probably go wrong?
Well, that's the point : This interface is like an ioctl() one : pretty bad if
not properly designed :)
You probably need to stick one field containing one command or version number,
something like that.
struct futex_event {
int type;
union {
void __user *addr;
u64 padding;
};
union {
int val;
s64 val64;
};
};
#define FUTEX_EVENT_SHARED32 1
#define FUTEX_EVENT_SHARED64 2
#define FUTEX_EVENT_PRIVATE32 (128|1)
#define FUTEX_EVENT_PRIVATE64 (128|2)
...
Also, you should take care of alignements constraints (a 32bit user program
might run on a 64bit kernel)
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 6:39 ` Eric Dumazet
@ 2007-05-02 6:54 ` Davi Arnaut
2007-05-02 7:11 ` Davi Arnaut
1 sibling, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 6:54 UTC (permalink / raw)
To: Eric Dumazet
Cc: Andrew Morton, Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
Eric Dumazet wrote:
> Davi Arnaut a écrit :
>> Eric Dumazet wrote:
>>> Davi Arnaut a écrit :
>>>> Asynchronously wait for FUTEX_WAKE operation on a futex if it still contains
>>>> a given value. There can be only one futex wait per file descriptor. However,
>>>> it can be rearmed (possibly at a different address) anytime.
>>>>
>>>> The pollable futex approach is far superior (send and receive events from
>>>> userspace or kernel) to eventfd and fixes (supercedes) FUTEX_FD at the same time.
>>>>
>>>> Building block for pollable semaphores and user-defined events.
>>>>
>>>> Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
>>>>
<snip>
>>>> +
>>>> +struct futex_event {
>>>> + union {
>>>> + void __user *addr;
>>>> + u64 padding;
>>>> + };
>>>> + int val;
>>>> +};
>>> Hum... Here we might have a problem with 64 bit futexes, or private futexes
>>>
>>> So I believe this interface is not well defined and not expandable: in case of
>>> future additions to futexes, an old application compiled with an old pollable
>>> futex_event type might fail.
>>>
>> Hmm, how about:
>>
>> struct futex_event {
>> union {
>> void __user *addr;
>> u64 padding;
>> };
>> union {
>> int val;
>> s64 val64;
>> };
>> /* whatever room is necessary for future improvements */
>> };
>>
>> I haven't been keeping up with 64 bit or private futexes. What else
>> could probably go wrong?
>
> Well, that's the point : This interface is like an ioctl() one : pretty bad if
> not properly designed :)
I was merely mirroring the futex syscall arguments for FUTEX_WAIT. Will
those change? I hope not :)
> You probably need to stick one field containing one command or version number,
> something like that.
I'm a bit skeptical that we need versioning for such a simple operation
(command) as FUTEX_WAIT that takes an address and a value.
>
>
> struct futex_event {
> int type;
> union {
> void __user *addr;
> u64 padding;
> };
> union {
> int val;
> s64 val64;
> };
> };
>
> #define FUTEX_EVENT_SHARED32 1
> #define FUTEX_EVENT_SHARED64 2
> #define FUTEX_EVENT_PRIVATE32 (128|1)
> #define FUTEX_EVENT_PRIVATE64 (128|2)
I will take a look at the private futexes patches before commenting further.
> ...
>
> Also, you should take care of alignements constraints (a 32bit user program
> might run on a 64bit kernel)
>
Compat code? or futex alignements constraints?
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 6:39 ` Eric Dumazet
2007-05-02 6:54 ` Davi Arnaut
@ 2007-05-02 7:11 ` Davi Arnaut
1 sibling, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 7:11 UTC (permalink / raw)
To: Eric Dumazet
Cc: Andrew Morton, Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
Eric Dumazet wrote:
> Davi Arnaut a écrit :
>> Eric Dumazet wrote:
>>> Davi Arnaut a écrit :
>>>> Asynchronously wait for FUTEX_WAKE operation on a futex if it still contains
>>>> a given value. There can be only one futex wait per file descriptor. However,
>>>> it can be rearmed (possibly at a different address) anytime.
>>>>
>>>> The pollable futex approach is far superior (send and receive events from
>>>> userspace or kernel) to eventfd and fixes (supercedes) FUTEX_FD at the same time.
>>>>
>>>> Building block for pollable semaphores and user-defined events.
>>>>
>>>> Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
>>>>
>>>> ---
>>>> fs/pollfs/Makefile | 1
>>>> fs/pollfs/futex.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>> init/Kconfig | 7 ++
>>>> 3 files changed, 162 insertions(+)
>>>>
>>>> Index: linux-2.6/fs/pollfs/Makefile
>>>> ===================================================================
>>>> --- linux-2.6.orig/fs/pollfs/Makefile
>>>> +++ linux-2.6/fs/pollfs/Makefile
>>>> @@ -3,3 +3,4 @@ pollfs-y := file.o
>>>>
>>>> pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
>>>> pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
>>>> +pollfs-$(CONFIG_POLLFS_FUTEX) += futex.o
>>>> Index: linux-2.6/fs/pollfs/futex.c
>>>> ===================================================================
>>>> --- /dev/null
>>>> +++ linux-2.6/fs/pollfs/futex.c
>>>> @@ -0,0 +1,154 @@
>>>> +/*
>>>> + * pollable futex
>>>> + *
>>>> + * Copyright (C) 2007 Davi E. M. Arnaut
>>>> + *
>>>> + * Licensed under the GNU GPL. See the file COPYING for details.
>>>> + */
>>>> +
>>>> +#include <linux/kernel.h>
>>>> +#include <linux/sched.h>
>>>> +#include <linux/module.h>
>>>> +#include <linux/slab.h>
>>>> +#include <linux/err.h>
>>>> +#include <linux/wait.h>
>>>> +#include <linux/poll.h>
>>>> +#include <linux/pollfs_fs.h>
>>>> +#include <linux/futex.h>
>>>> +
>>>> +struct futex_event {
>>>> + union {
>>>> + void __user *addr;
>>>> + u64 padding;
>>>> + };
>>>> + int val;
>>>> +};
>>> Hum... Here we might have a problem with 64 bit futexes, or private futexes
>>>
>>> So I believe this interface is not well defined and not expandable: in case of
>>> future additions to futexes, an old application compiled with an old pollable
>>> futex_event type might fail.
>>>
>> Hmm, how about:
>>
>> struct futex_event {
>> union {
>> void __user *addr;
>> u64 padding;
>> };
>> union {
>> int val;
>> s64 val64;
>> };
>> /* whatever room is necessary for future improvements */
>> };
>>
>> I haven't been keeping up with 64 bit or private futexes. What else
>> could probably go wrong?
>
> Well, that's the point : This interface is like an ioctl() one : pretty bad if
> not properly designed :)
>
> You probably need to stick one field containing one command or version number,
> something like that.
>
>
> struct futex_event {
> int type;
> union {
> void __user *addr;
> u64 padding;
> };
> union {
> int val;
> s64 val64;
> };
> };
>
> #define FUTEX_EVENT_SHARED32 1
> #define FUTEX_EVENT_SHARED64 2
> #define FUTEX_EVENT_PRIVATE32 (128|1)
> #define FUTEX_EVENT_PRIVATE64 (128|2)
>
I'm changing the structure to:
struct futex_event {
union {
void __user *addr;
u64 addr64;
};
union {
int val;
s64 val64;
};
union {
s32 flags;
s64 flags64;
};
};
Plenty room for future FUTEX_WAIT growth ?
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 5:22 ` [patch 14/22] pollfs: pollable futex Davi Arnaut
2007-05-02 5:54 ` Eric Dumazet
@ 2007-05-02 7:40 ` Ulrich Drepper
2007-05-02 7:55 ` Eric Dumazet
` (2 more replies)
1 sibling, 3 replies; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-02 7:40 UTC (permalink / raw)
To: Davi Arnaut
Cc: Andrew Morton, Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
On 5/1/07, Davi Arnaut <davi@haxent.com.br> wrote:
> The pollable futex approach is far superior (send and receive events from
> userspace or kernel) to eventfd and fixes (supercedes) FUTEX_FD at the same time.
> [...]
You have to explain in detail how these interfaces are supposed to
work. From first sight and without understanding (all) the it seems
it's far from useful.
Pollable futexes are useful, but any solution which gets implemented
must be sufficiently useful for all the uses we might have.
- the trivial is that you have a futex and you are just interest in
seeing it change. The
same as FUTEX_WAIT. I cannot figure out how all this works in your
code. Does your
read() call (that's the one to wait, yes?) work with O_NONBLOCK or
how else do you get
that behavior?
- more complicated case: I have to wait for multiple futexes and lock
them all at the same
time or don't return at all. This is possible with SysV semaphores
and generally useful
and needed. How can this be implemented with your scheme?
- how does it work with PI futexes?
- can I use a futex at the same time through this mechanism and using the normal
FUTEX_WAIT operation? This is a killer if it's not the case.
- if you have multiple threads polling a futex and the waker wakes up
one, what happens?
It is simply not acceptable to have more than one thread return from
the poll() call, this
would waste too many cycles, just to put all threads but one back to sleep.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 7:40 ` Ulrich Drepper
@ 2007-05-02 7:55 ` Eric Dumazet
2007-05-02 8:08 ` Ulrich Drepper
2007-05-02 12:20 ` Davi Arnaut
2007-05-02 12:39 ` Davi Arnaut
2 siblings, 1 reply; 71+ messages in thread
From: Eric Dumazet @ 2007-05-02 7:55 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Davi Arnaut, Andrew Morton, Davide Libenzi, Linus Torvalds,
Linux Kernel Mailing List
On Wed, 2 May 2007 00:40:17 -0700
"Ulrich Drepper" <drepper@gmail.com> wrote:
> - if you have multiple threads polling a futex and the waker wakes up
> one, what happens?
> It is simply not acceptable to have more than one thread return from
> the poll() call, this
> would waste too many cycles, just to put all threads but one back to sleep.
>
Well, poll() level edge semantic is well defined, you cannot cheat or change it.
If many threads call poll() on the same end point, they should *all* return POLLIN/whatever status.
This is why programs usually use one thread to dispatch events to workers, or at least dont queue XXXX threads calling poll() on one fd.
Only system calls that actually returns an 'work_done' can avoid waking all waiting threads and putting them back in queue. Example of such system calls are accept() or read()
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 7:55 ` Eric Dumazet
@ 2007-05-02 8:08 ` Ulrich Drepper
2007-05-02 8:49 ` Eric Dumazet
0 siblings, 1 reply; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-02 8:08 UTC (permalink / raw)
To: Eric Dumazet
Cc: Davi Arnaut, Andrew Morton, Davide Libenzi, Linus Torvalds,
Linux Kernel Mailing List
On 5/2/07, Eric Dumazet <dada1@cosmosbay.com> wrote:
> Well, poll() level edge semantic is well defined, you cannot cheat or change it.
>
> If many threads call poll() on the same end point, they should *all* return POLLIN/whatever status.
This means to me it's the wrong abstraction for this. We had a nice
solution for this with Evgeniy's kevent interfaces. It worked without
forcing futexes is this inflexible poll() interface.
> This is why programs usually use one thread to dispatch events to workers, or at least dont queue XXXX threads calling poll() on one fd.
No. This is why programs are forced to waste cycles by doing this.
Ideally this would not happen. Ideally you'd park all worker thread
in the same place and have them woken up one by one. Again, Evgeniy's
code was able to do this. This approach seems to be a big step
backward.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 8:08 ` Ulrich Drepper
@ 2007-05-02 8:49 ` Eric Dumazet
2007-05-02 16:39 ` Ulrich Drepper
0 siblings, 1 reply; 71+ messages in thread
From: Eric Dumazet @ 2007-05-02 8:49 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Davi Arnaut, Andrew Morton, Davide Libenzi, Linus Torvalds,
Linux Kernel Mailing List
On Wed, 2 May 2007 01:08:26 -0700
"Ulrich Drepper" <drepper@gmail.com> wrote:
> On 5/2/07, Eric Dumazet <dada1@cosmosbay.com> wrote:
> > Well, poll() level edge semantic is well defined, you cannot cheat or change it.
> >
> > If many threads call poll() on the same end point, they should *all* return POLLIN/whatever status.
>
> This means to me it's the wrong abstraction for this. We had a nice
> solution for this with Evgeniy's kevent interfaces. It worked without
> forcing futexes is this inflexible poll() interface.
poll() is a generalist interface. Not the *perfect* one, but well spreaded on other OS as well.
>
>
>
> > This is why programs usually use one thread to dispatch events to workers, or at least dont queue XXXX threads calling poll() on one fd.
>
> No. This is why programs are forced to waste cycles by doing this.
> Ideally this would not happen. Ideally you'd park all worker thread
> in the same place and have them woken up one by one. Again, Evgeniy's
> code was able to do this. This approach seems to be a big step
> backward.
I understand your concerns, but *this* patch bundle extends poll()/select()/epoll, and is not an alternative to kevent or other work in progress, (and linux centered)
Are you suggesting poll() system call should be deprecated ?
Most programs still use the archaic select() thing you know ...
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 7:40 ` Ulrich Drepper
2007-05-02 7:55 ` Eric Dumazet
@ 2007-05-02 12:20 ` Davi Arnaut
2007-05-02 12:39 ` Davi Arnaut
2 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 12:20 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Andrew Morton, Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
Ulrich Drepper wrote:
> On 5/1/07, Davi Arnaut <davi@haxent.com.br> wrote:
>> The pollable futex approach is far superior (send and receive events from
>> userspace or kernel) to eventfd and fixes (supercedes) FUTEX_FD at the same time.
>> [...]
>
> You have to explain in detail how these interfaces are supposed to
> work. From first sight and without understanding (all) the it seems
> it's far from useful.
It's basically a asynchronous FUTEX_WAIT with notification delivery
through a file descriptor.
> Pollable futexes are useful, but any solution which gets implemented
> must be sufficiently useful for all the uses we might have.
It's very useful for asynchronous event notification libraries
(libevent, liboop, libivykis, etc) because it integrates nicely with
their (e)poll main loops.
Usage schenario: you have 10 worker threads (and 10 futexes) for disk
i/o (or whatever) and one manager thread which is a state machine
serving many clients (epoll loop).
In this scenario the workers threads have only two possible ways of
notifying the manager thread once a job is done: signals and pipe tricks.
For libraries, signals sux. They dont integrate well with poll() loops,
may have overflow issues (RT), and signal numbers may clash with other
libraries/code. The self-pipe trick waste resources (mostly unused pipe
buffer).
By using pollable futexes, all the manager thread has todo is to
associate each of these futexes with a file descriptor (plfutex) and
epoll() for their completion. Once the futex is signaled, epoll()
returns POLLIN for the file descriptor and the manager thread may
dequeue the notification status from anywhere.
> - the trivial is that you have a futex and you are just interest in
> seeing it change. The same as FUTEX_WAIT.
I'm just interested in seeing a FUTEX_WAKE. Yes, same as FUTEX_WAIT.
> I cannot figure out how all this works in your code.
Every futex has a wait queue (q->waiters) which is used to track
processes waiting on the futex. When the futex receives a FUTEX_WAKE it
wakes up all waiters on the wait queue. Also, a futex is considered
woken when it wait queue is empty (or lock_ptr == NULL).
When you register a file descriptor with select(), poll() or epoll() a
callback is queued into the futex wait queue. When the futex receives a
FUTEX_WAKE every callback is called and the event is registered within
each select(), poll() or epoll() table. This initiates a chain reaction
waking up all process sleeping on poll()/whatever.
> Does your read() call (that's the one to wait, yes?) work with O_NONBLOCK
> or how else do you get that behavior?
If the fd is marked O_NONBLOCK and the futex is not woken yet, it simply
returns -EAGAIN (pfs_read_nonblock). If O_NONBLOCK is not set, it waits
synchronously (pfs_read_block/wait_event_interruptible) on the futex
wait queue.
> - more complicated case: I have to wait for multiple futexes and lock
> them all at the same time or don't return at all. This is possible with
> SysV semaphores and generally useful and needed. How can this be
> implemented with your scheme?
Remember, it's only about FUTEX_WAIT.
> - how does it work with PI futexes?
It dosen't work. AFAICS PI futexes don't use FUTEX_WAKE.
> - can I use a futex at the same time through this mechanism and using the normal
> FUTEX_WAIT operation? This is a killer if it's not the case.
Yes.
> - if you have multiple threads polling a futex and the waker wakes up
> one, what happens? It is simply not acceptable to have more than one
> thread return from the poll() call, this would waste too many cycles,
> just to put all threads but one back to sleep.
Only one is waked up (whatever matches first on the futex hashed bucket).
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 7:40 ` Ulrich Drepper
2007-05-02 7:55 ` Eric Dumazet
2007-05-02 12:20 ` Davi Arnaut
@ 2007-05-02 12:39 ` Davi Arnaut
2007-05-02 16:46 ` Ulrich Drepper
2 siblings, 1 reply; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 12:39 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Andrew Morton, Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
Ulrich Drepper wrote:
> On 5/1/07, Davi Arnaut <davi@haxent.com.br> wrote:
>> The pollable futex approach is far superior (send and receive events from
>> userspace or kernel) to eventfd and fixes (supercedes) FUTEX_FD at the same time.
>> [...]
>
<snip>
>
> - more complicated case: I have to wait for multiple futexes and lock
> them all at the same time or don't return at all. This is possible with
> SysV semaphores and generally useful and needed.
> How can this be implemented with your scheme?
It's quite easy to implement this scheme by write()ing the futexes all
at once but that would break the one futex per fd association. For
atomicity: if one of the futexes can't be queued, we would rollback
(unqueue) the others.
Sounds sane?
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 8:49 ` Eric Dumazet
@ 2007-05-02 16:39 ` Ulrich Drepper
2007-05-02 16:59 ` Davi Arnaut
0 siblings, 1 reply; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-02 16:39 UTC (permalink / raw)
To: Eric Dumazet
Cc: Davi Arnaut, Andrew Morton, Davide Libenzi, Linus Torvalds,
Linux Kernel Mailing List
On 5/2/07, Eric Dumazet <dada1@cosmosbay.com> wrote:
> I understand your concerns, but *this* patch bundle extends poll()/select()/epoll, and is not an alternative to kevent or other work in progress, (and linux centered)
It is adding huge amounts of complexity and at the same time is not
future-safe. I consider this enough reason to reject this approach.
You never can get rid of the interface. It's much cleaner and safer
to do it right instead of piling on more and more workarounds for
special situations.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 12:39 ` Davi Arnaut
@ 2007-05-02 16:46 ` Ulrich Drepper
2007-05-02 17:05 ` Davi Arnaut
0 siblings, 1 reply; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-02 16:46 UTC (permalink / raw)
To: Davi Arnaut
Cc: Andrew Morton, Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
On 5/2/07, Davi Arnaut <davi@haxent.com.br> wrote:
> It's quite easy to implement this scheme by write()ing the futexes all
> at once but that would break the one futex per fd association. For
> atomicity: if one of the futexes can't be queued, we would rollback
> (unqueue) the others.
>
> Sounds sane?
I don't know how you use "unqueue" in this context. If a queued futex
is one which is /locked/ by te call, then yes, this is the semantics
needed. Atomically locking a number of futexes means that if one of
the set cannot be locked all operations done to lock the others have
to be undone. It's an all-or-nothing situation.
Locking is not as easy as you might think, though. For non-PI futexes
there is deliberately no protocol in place describing what "locked"
means. The locking operation has to be customizable. This is what
the FUTEX_OP_* stuff is about.
And you wrote that currently each futex needs its own file descriptor.
So this would have to be changed, too.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 16:39 ` Ulrich Drepper
@ 2007-05-02 16:59 ` Davi Arnaut
2007-05-02 17:10 ` Ulrich Drepper
0 siblings, 1 reply; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 16:59 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Eric Dumazet, Andrew Morton, Davide Libenzi, Linus Torvalds,
Linux Kernel Mailing List
Ulrich Drepper wrote:
> On 5/2/07, Eric Dumazet <dada1@cosmosbay.com> wrote:
>> I understand your concerns, but *this* patch bundle extends
>> poll()/select()/epoll, and is not an alternative to kevent or other
>> work in progress, (and linux centered)
>
> It is adding huge amounts of complexity and at the same time is not
> future-safe. I consider this enough reason to reject this approach.
Huge amounts of complexity? It just _moves_ some futex code around!
The intended use is not for locking, but for event signaling. Why can't
it be future-safe? It just needs a address and a value! Pseudocode:
thread A:
int fd = plfutex(addr, 0);
do
poll(fdset+fd);
process network events
queue obj to thread B
if fd:
job processed
thread B:
wait_job();
process_job();
raise_event(addr);
> You never can get rid of the interface. It's much cleaner and safer
> to do it right instead of piling on more and more workarounds for
> special situations.
It simple as is, there is no need to overdesign.
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 16:46 ` Ulrich Drepper
@ 2007-05-02 17:05 ` Davi Arnaut
0 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 17:05 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Andrew Morton, Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
Ulrich Drepper wrote:
> On 5/2/07, Davi Arnaut <davi@haxent.com.br> wrote:
>> It's quite easy to implement this scheme by write()ing the futexes all
>> at once but that would break the one futex per fd association. For
>> atomicity: if one of the futexes can't be queued, we would rollback
>> (unqueue) the others.
>>
>> Sounds sane?
>
> I don't know how you use "unqueue" in this context. If a queued futex
> is one which is /locked/ by te call, then yes, this is the semantics
> needed. Atomically locking a number of futexes means that if one of
> the set cannot be locked all operations done to lock the others have
> to be undone. It's an all-or-nothing situation.
The waits are queued, thus then can be "unqueued". It's quite simple to
extend futex_wait_queue() to support this, but again you are thinking of
locks while what I want is fast events.
> Locking is not as easy as you might think, though. For non-PI futexes
> there is deliberately no protocol in place describing what "locked"
> means. The locking operation has to be customizable. This is what
> the FUTEX_OP_* stuff is about.
Events are simple. A event is either signaled or not. A futex value 0 means
not signaled, 1+ signaled.
> And you wrote that currently each futex needs its own file descriptor.
> So this would have to be changed, too.
If it's really worth, I have no problem with it.
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 16:59 ` Davi Arnaut
@ 2007-05-02 17:10 ` Ulrich Drepper
2007-05-02 17:29 ` Davide Libenzi
2007-05-02 17:37 ` Davi Arnaut
0 siblings, 2 replies; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-02 17:10 UTC (permalink / raw)
To: Davi Arnaut
Cc: Eric Dumazet, Andrew Morton, Davide Libenzi, Linus Torvalds,
Linux Kernel Mailing List
On 5/2/07, Davi Arnaut <davi@haxent.com.br> wrote:
> thread A:
> int fd = plfutex(addr, 0);
> do
> poll(fdset+fd);
> process network events
> queue obj to thread B
> if fd:
> job processed
>
> thread B:
> wait_job();
> process_job();
> raise_event(addr);
This is not the model you can implement with your changes. Because
every single waiter is woken you need one thread listening for the
jobs and then distribute the work. Otherwise you have thundering
herds of threads and only one gets to do some work.
> It simple as is, there is no need to overdesign.
There is no reason to go with a limited, too-simple minded design if
we've already identified a much better design. The fact that poll is
used today does not excuse piling on more and more code which makes
additional functions which don't fit into the poll framework barely
work. Plus, poll/epoll itself is a problem.
And you cannot talk about little changes and no "overdesign". You
have 22 patches for all this. It's not just limited to futexes, it's
the whole thing which IMO is unnecessary ballast going forward.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 00/22] pollfs: filesystem abstraction for pollable objects
2007-05-02 6:05 ` [patch 00/22] pollfs: filesystem abstraction for pollable objects Andrew Morton
@ 2007-05-02 17:28 ` Davide Libenzi
2007-05-02 17:47 ` Davi Arnaut
0 siblings, 1 reply; 71+ messages in thread
From: Davide Libenzi @ 2007-05-02 17:28 UTC (permalink / raw)
To: Andrew Morton; +Cc: Davi Arnaut, Linus Torvalds, Linux Kernel Mailing List
On Tue, 1 May 2007, Andrew Morton wrote:
> David, could you provide some feedback please? The patches are stunningly
> free of comments, but you used to do that to me pretty often so my sympathy
> is limited ;)
You bastard! :)
Ok, from a brief look ...
[general]
The code adds an extra indirection over the already existing
file_operations, that IMO already sufficently abstract a file.
The compat code, if I read it correctly, does not support files crossing
32/64 bits boundaries (exec or SCM_RIGHTS).
[timers]
Returns a structure instead of a 32 bit counter (ala timerfd), and needs
extra compat code.
[signal]
All the discussions that went on for signalfd has been lost. It pins the
task struct and it does not handle process detach signaling.
[aio]
I prefer a signaling-only approach like the 20 lines patch I posted, and
use the standard AIO calls to fetch results. This code simply wraps AIO
calls. This is just my opinions of course, and others may disagree.
[futex]
I intentionally stayed out of there after all the talks about futexfd
being killed due to the impossibility of getting an interface right (did
not follow the talk, so I can't tell the level of BS contained in the
previous statement). We also have a nice printk in there, with the first
kill-on deadline that ever made mainline :)
printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
"will be removed from the kernel in June 2007\n",
current->comm);
- Davide
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 17:10 ` Ulrich Drepper
@ 2007-05-02 17:29 ` Davide Libenzi
2007-05-02 17:53 ` Ulrich Drepper
2007-05-02 17:37 ` Davi Arnaut
1 sibling, 1 reply; 71+ messages in thread
From: Davide Libenzi @ 2007-05-02 17:29 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
On Wed, 2 May 2007, Ulrich Drepper wrote:
> > It simple as is, there is no need to overdesign.
>
> There is no reason to go with a limited, too-simple minded design if
> we've already identified a much better design. The fact that poll is
> used today does not excuse piling on more and more code which makes
> additional functions which don't fit into the poll framework barely
> work. Plus, poll/epoll itself is a problem.
Is it? Please do tell me more...
- Davide
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 17:10 ` Ulrich Drepper
2007-05-02 17:29 ` Davide Libenzi
@ 2007-05-02 17:37 ` Davi Arnaut
2007-05-02 17:49 ` Ulrich Drepper
1 sibling, 1 reply; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 17:37 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Eric Dumazet, Andrew Morton, Davide Libenzi, Linus Torvalds,
Linux Kernel Mailing List
Ulrich Drepper wrote:
> On 5/2/07, Davi Arnaut <davi@haxent.com.br> wrote:
>> thread A:
>> int fd = plfutex(addr, 0);
>> do
>> poll(fdset+fd);
>> process network events
>> queue obj to thread B
>> if fd:
>> job processed
>>
>> thread B:
>> wait_job();
>> process_job();
>> raise_event(addr);
>
> This is not the model you can implement with your changes. Because
> every single waiter is woken you need one thread listening for the
> jobs and then distribute the work. Otherwise you have thundering
> herds of threads and only one gets to do some work.
>
NO! Every single waiter of the _file descriptor_ is waked, not of the futex.
One can associate N fds with a single futex address. FUTEX_WAKE with
nproc = 1 will only wake one of the file descriptors. Its up to the user
to decide if he wants a broadcast or not.
Have you seen the email where I told you exactly this?
>> It simple as is, there is no need to overdesign.
>
> There is no reason to go with a limited, too-simple minded design if
> we've already identified a much better design. The fact that poll is
> used today does not excuse piling on more and more code which makes
> additional functions which don't fit into the poll framework barely
> work. Plus, poll/epoll itself is a problem.
>
epoll itself is a problem?! sorry, but i didn't know that. Care to
elaborate?
I really need some guidance here. I just want to unify the epoll for various
event sources. It seems a lot of people like this, just look at the
popularity
of libevent and other "unifying" event loops.
I don't think we need another epoll clone.
> And you cannot talk about little changes and no "overdesign". You
> have 22 patches for all this. It's not just limited to futexes, it's
> the whole thing which IMO is unnecessary ballast going forward.
davi@karmic:~/git/linux-2.6$ find patches/ -name *.patch |grep -v
syscall | wc -l
10
davi@karmic:~/git/linux-2.6$ find patches/ -name *.patch |grep -v
syscall |grep futex
patches/pollfs-futex-async-wait.patch
patches/pollfs-futex.patch
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 00/22] pollfs: filesystem abstraction for pollable objects
2007-05-02 17:28 ` Davide Libenzi
@ 2007-05-02 17:47 ` Davi Arnaut
2007-05-02 18:23 ` Davide Libenzi
0 siblings, 1 reply; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 17:47 UTC (permalink / raw)
To: Davide Libenzi; +Cc: Andrew Morton, Linus Torvalds, Linux Kernel Mailing List
Davide Libenzi wrote:
> On Tue, 1 May 2007, Andrew Morton wrote:
>
>
>> David, could you provide some feedback please? The patches are stunningly
>> free of comments, but you used to do that to me pretty often so my sympathy
>> is limited ;)
>>
>
> You bastard! :)
> Ok, from a brief look ...
>
> [general]
> The code adds an extra indirection over the already existing
> file_operations, that IMO already sufficently abstract a file.
> The compat code, if I read it correctly, does not support files crossing
> 32/64 bits boundaries (exec or SCM_RIGHTS).
>
>
The compat code is not already finished, I plan to address compat
code on the next version.
> [timers]
> Returns a structure instead of a 32 bit counter (ala timerfd), and needs
> extra compat code.
>
Yes, but the compat code will be quite small.
> [signal]
> All the discussions that went on for signalfd has been lost. It pins the
> task struct and it does not handle process detach signaling.
>
No, I just went into a different direction.
> [aio]
> I prefer a signaling-only approach like the 20 lines patch I posted, and
> use the standard AIO calls to fetch results. This code simply wraps AIO
> calls. This is just my opinions of course, and others may disagree.
>
The write()/read() wraps are just for the sake of completeness. The import
part is the poll().
> [futex]
> I intentionally stayed out of there after all the talks about futexfd
> being killed due to the impossibility of getting an interface right (did
> not follow the talk, so I can't tell the level of BS contained in the
> previous statement). We also have a nice printk in there, with the first
> kill-on deadline that ever made mainline :)
>
>
Yes, i was more daring (or crazy). The plfutex is so simple, i don't
know why we can't get it right. It's just a _event_ (address/value).
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 17:37 ` Davi Arnaut
@ 2007-05-02 17:49 ` Ulrich Drepper
2007-05-02 18:05 ` Davi Arnaut
0 siblings, 1 reply; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-02 17:49 UTC (permalink / raw)
To: Davi Arnaut
Cc: Eric Dumazet, Andrew Morton, Davide Libenzi, Linus Torvalds,
Linux Kernel Mailing List
On 5/2/07, Davi Arnaut <davi@haxent.com.br> wrote:
> NO! Every single waiter of the _file descriptor_ is waked, not of the futex.
And how is this better? In this world of yours a program must have
one file descriptor for each single futex which is used like this *per
thread*. There can be hundreds, thousands of threads. And there can
be large numbers of futexes, too.
This is not going to fly. You reach the file descriptor limit just
with this. And this in many processes on the system.
> davi@karmic:~/git/linux-2.6$ find patches/ -name *.patch |grep -v
> syscall | wc -l
> 10
>
> davi@karmic:~/git/linux-2.6$ find patches/ -name *.patch |grep -v
> syscall |grep futex
> patches/pollfs-futex-async-wait.patch
> patches/pollfs-futex.patch
I don't know what you want to show here. You 10 new syscalls? You
have two patches alone modifying futexes? And 22 patches in total.
That's not "a lot"?
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 17:29 ` Davide Libenzi
@ 2007-05-02 17:53 ` Ulrich Drepper
2007-05-02 18:21 ` Davide Libenzi
0 siblings, 1 reply; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-02 17:53 UTC (permalink / raw)
To: Davide Libenzi
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
On 5/2/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> Is it? Please do tell me more...
Come on, we went through all this. Having to do syscalls for event
retrieval plus the limited channel available for feedback (the POLL*
bits) is to limiting. This is where the kevent stuff innovated and
really fixed the problems. Userlevel ring buffers are more efficient.
Yes, a unifying event look is what is wanted. But it does not have to
be poll based. Given the right abstraction you can fit in the kevent
technology or similar things.
And seeing all these requirements of this approach: kevent is also
much more resource efficient. No "one file desriptor per thread per
object". These are important factors.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 17:49 ` Ulrich Drepper
@ 2007-05-02 18:05 ` Davi Arnaut
2007-05-03 13:40 ` Ulrich Drepper
0 siblings, 1 reply; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 18:05 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Eric Dumazet, Andrew Morton, Davide Libenzi, Linus Torvalds,
Linux Kernel Mailing List
Ulrich Drepper wrote:
> On 5/2/07, Davi Arnaut <davi@haxent.com.br> wrote:
>> NO! Every single waiter of the _file descriptor_ is waked, not of the
>> futex.
>
> And how is this better? In this world of yours a program must have
> one file descriptor for each single futex which is used like this *per
> thread*. There can be hundreds, thousands of threads. And there can
> be large numbers of futexes, too.
>
The usage cases of yours are quite different from mine. We don't use a
single file descriptor to to manage various resources. The worker threads
are _not going_ to have a file descriptor, _only_ the event dispatching
(poll)
thread. The worker threads are just going to increase the futex value and
call FUTEX_WAKE in case the previous value was 0.
A pollable futex is even more useful for _single_ threaded programs that
don't want to go into lengthy hacks to monitor events coming from the
outside
world.
I, at least, don't want to port my epoll applications to yet another event
notification facility.
> This is not going to fly. You reach the file descriptor limit just
> with this. And this in many processes on the system.
>
>
>> davi@karmic:~/git/linux-2.6$ find patches/ -name *.patch |grep -v
>> syscall | wc -l
>> 10
>>
>> davi@karmic:~/git/linux-2.6$ find patches/ -name *.patch |grep -v
>> syscall |grep futex
>> patches/pollfs-futex-async-wait.patch
>> patches/pollfs-futex.patch
>
> I don't know what you want to show here. You 10 new syscalls? You
> have two patches alone modifying futexes? And 22 patches in total.
> That's not "a lot"?
No. 12 patches are for i386/x86_64 obligatory syscall housekeeping. I don't
want to sound rude, but have you actually looked at the patches?
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 17:53 ` Ulrich Drepper
@ 2007-05-02 18:21 ` Davide Libenzi
2007-05-03 13:46 ` Ulrich Drepper
0 siblings, 1 reply; 71+ messages in thread
From: Davide Libenzi @ 2007-05-02 18:21 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
On Wed, 2 May 2007, Ulrich Drepper wrote:
> On 5/2/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> > Is it? Please do tell me more...
>
> Come on, we went through all this. Having to do syscalls for event
> retrieval plus the limited channel available for feedback (the POLL*
> bits) is to limiting. This is where the kevent stuff innovated and
> really fixed the problems. Userlevel ring buffers are more efficient.
>
> Yes, a unifying event look is what is wanted. But it does not have to
> be poll based. Given the right abstraction you can fit in the kevent
> technology or similar things.
>
> And seeing all these requirements of this approach: kevent is also
> much more resource efficient. No "one file desriptor per thread per
> object". These are important factors.
99% of the fds you'll find inside an event loop you care to scale about,
are *already* fd based. The handful of the remaining ones (signals,
timers, AIO signaling, ??) will likely account for a *very limited* number
of fds. On top of that, those fds are very cheap in terms of memory
(they're basically wakeup targets), since the new code shares the inode
for them. So we have a limited number of fds, using a pretty limited
amount of memory each.
And this approach is not bound to a completely new and monolitic interface.
All these things need to basically deliver notifications of completion,
and being able to read results. Things that the existing f_op->poll and
f_op->read are already able to give us. Is that really a strange concept
to base it on? Because, to me, it seems pretty natural.
- Davide
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 00/22] pollfs: filesystem abstraction for pollable objects
2007-05-02 17:47 ` Davi Arnaut
@ 2007-05-02 18:23 ` Davide Libenzi
2007-05-02 18:50 ` Davi Arnaut
0 siblings, 1 reply; 71+ messages in thread
From: Davide Libenzi @ 2007-05-02 18:23 UTC (permalink / raw)
To: Davi Arnaut; +Cc: Andrew Morton, Linus Torvalds, Linux Kernel Mailing List
On Wed, 2 May 2007, Davi Arnaut wrote:
> Davide Libenzi wrote:
> > On Tue, 1 May 2007, Andrew Morton wrote:
> >
> >
> > > David, could you provide some feedback please? The patches are stunningly
> > > free of comments, but you used to do that to me pretty often so my
> > > sympathy
> > > is limited ;)
> > >
> >
> > You bastard! :)
> > Ok, from a brief look ...
> >
> > [general]
> > The code adds an extra indirection over the already existing
> > file_operations, that IMO already sufficently abstract a file.
> > The compat code, if I read it correctly, does not support files crossing
> > 32/64 bits boundaries (exec or SCM_RIGHTS).
> >
> >
>
> The compat code is not already finished, I plan to address compat
> code on the next version.
How? Compat on sys_read/sys_write?
> > [timers]
> > Returns a structure instead of a 32 bit counter (ala timerfd), and needs
> > extra compat code.
> >
>
> Yes, but the compat code will be quite small.
Why would that be even justified?
> > [signal]
> > All the discussions that went on for signalfd has been lost. It pins the
> > task struct and it does not handle process detach signaling.
> >
>
> No, I just went into a different direction.
I'd say wrong, because signalfd addressed valid concerns of quite a few
ppl.
- Davide
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 00/22] pollfs: filesystem abstraction for pollable objects
2007-05-02 18:23 ` Davide Libenzi
@ 2007-05-02 18:50 ` Davi Arnaut
2007-05-02 19:42 ` Davide Libenzi
0 siblings, 1 reply; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 18:50 UTC (permalink / raw)
To: Davide Libenzi; +Cc: Andrew Morton, Linus Torvalds, Linux Kernel Mailing List
Davide Libenzi wrote:
> On Wed, 2 May 2007, Davi Arnaut wrote:
>
>
>> Davide Libenzi wrote:
>>
>>> On Tue, 1 May 2007, Andrew Morton wrote:
>>>
>>>
>>>
>>>> David, could you provide some feedback please? The patches are stunningly
>>>> free of comments, but you used to do that to me pretty often so my
>>>> sympathy
>>>> is limited ;)
>>>>
>>>>
>>> You bastard! :)
>>> Ok, from a brief look ...
>>>
>>> [general]
>>> The code adds an extra indirection over the already existing
>>> file_operations, that IMO already sufficently abstract a file.
>>> The compat code, if I read it correctly, does not support files crossing
>>> 32/64 bits boundaries (exec or SCM_RIGHTS).
>>>
>>>
>>>
>> The compat code is not already finished, I plan to address compat
>> code on the next version.
>>
>
> How? Compat on sys_read/sys_write?
>
>
Yes. More on that later.
>>> [timers]
>>> Returns a structure instead of a 32 bit counter (ala timerfd), and needs
>>> extra compat code.
>>>
>>>
>> Yes, but the compat code will be quite small.
>>
>
> Why would that be even justified?
>
>
>
Because the developer may need it.
>>> [signal]
>>> All the discussions that went on for signalfd has been lost. It pins the
>>> task struct and it does not handle process detach signaling.
>>>
>>>
>> No, I just went into a different direction.
>>
>
> I'd say wrong, because signalfd addressed valid concerns of quite a few
> ppl.
>
>
So in this case I may borrow some signalfd code :-) I really like the
signalfd approach, but IMHO the code is quite ugly and duplicates
a lot of hairy code.
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 00/22] pollfs: filesystem abstraction for pollable objects
2007-05-02 18:50 ` Davi Arnaut
@ 2007-05-02 19:42 ` Davide Libenzi
2007-05-02 20:11 ` Davi Arnaut
0 siblings, 1 reply; 71+ messages in thread
From: Davide Libenzi @ 2007-05-02 19:42 UTC (permalink / raw)
To: Davi Arnaut; +Cc: Andrew Morton, Linus Torvalds, Linux Kernel Mailing List
On Wed, 2 May 2007, Davi Arnaut wrote:
> So in this case I may borrow some signalfd code :-) I really like the
> signalfd approach, but IMHO the code is quite ugly and duplicates
> a lot of hairy code.
Ugly, really? Please ...
+ while (!mutex_trylock(&evs->mutex))
+ cpu_relax();
So, let's see. The whole things adds an unneeded (read "empty") extra
layer of indirections over the existing file_operations. Timerfd added the
return of the structure (because "developer may need it"), that wants
extra compat too. Your signal code does not handle things that is supposed
to be handling (easy to write clean code when the code does not actually
do what is supposed to be doing), and you'll be sucking code from
signalfd. On top of that, it returns a structure that needs compat too.
AIO code added an extra unnecessary wrapper over AIO native calls (by
literally calling sys_io_*). And the compat on sys_read/sys_write I
guess will be funny to watch. So what does your code effectively adds that
is useful in some way? Undocumented and testcase-less code?
As I see it, your code badly copies exiting one, and adds uneeded
infrastrcture/functionality.
See, I really wouldn't care, if you'd keep the "ugly" word for yourself.
Actually, I really care only about the functionality, so I leave the whole
ball into Andrew's hands ;)
- Davide
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 00/22] pollfs: filesystem abstraction for pollable objects
2007-05-02 19:42 ` Davide Libenzi
@ 2007-05-02 20:11 ` Davi Arnaut
0 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 20:11 UTC (permalink / raw)
To: Davide Libenzi; +Cc: Andrew Morton, Linus Torvalds, Linux Kernel Mailing List
Davide Libenzi wrote:
> On Wed, 2 May 2007, Davi Arnaut wrote:
>
>
>> So in this case I may borrow some signalfd code :-) I really like the
>> signalfd approach, but IMHO the code is quite ugly and duplicates
>> a lot of hairy code.
>>
>
> Ugly, really? Please ...
>
> + while (!mutex_trylock(&evs->mutex))
> + cpu_relax();
>
>
I would call that a "creative way to spin a mutex" .
> So, let's see. The whole things adds an unneeded (read "empty") extra
> layer of indirections over the existing file_operations.
"Empty" ? It handles the non-blocking code, event waiting, filp
setup. It's a affordable extra thin layer.
> Timerfd added the
> return of the structure (because "developer may need it"), that wants
> extra compat too.
I bet the pltimer/plsignal compat code will be smaller/cleaner then
the timerfd/signalfd. Will post soon.
> Your signal code does not handle things that is supposed
> to be handling (easy to write clean code when the code does not actually
> do what is supposed to be doing), and you'll be sucking code from
> signalfd.
As if sucking code from signalfd is a bad thing.
> On top of that, it returns a structure that needs compat too.
> AIO code added an extra unnecessary wrapper over AIO native calls (by
> literally calling sys_io_*). And the compat on sys_read/sys_write I
> guess will be funny to watch.
I hope you enjoy it :-)
> So what does your code effectively adds that
> is useful in some way? Undocumented and testcase-less code?
> As I see it, your code badly copies exiting one, and adds uneeded
> infrastrcture/functionality.
>
If you don't remember i mailed you the patches back on 03/08/2007.
It's definitely not a copy.
> See, I really wouldn't care, if you'd keep the "ugly" word for yourself.
>
its a matter of taste and i'm entitled to a bad one every once in a
while :-)
> Actually, I really care only about the functionality, so I leave the whole
> ball into Andrew's hands ;)
>
Anyway, I didn't expect to move forward with your blessing but your comments
are appreciated.
Andrew, If you see fit, go ahead with signalfd. Anyway, i will cook up a
a new version
for tomorrow.
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 09/22] pollfs: pollable hrtimers
2007-05-02 5:22 ` [patch 09/22] pollfs: pollable hrtimers Davi Arnaut
@ 2007-05-02 21:16 ` Thomas Gleixner
2007-05-02 23:00 ` Davi Arnaut
0 siblings, 1 reply; 71+ messages in thread
From: Thomas Gleixner @ 2007-05-02 21:16 UTC (permalink / raw)
To: Davi Arnaut
Cc: Andrew Morton, Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
On Wed, 2007-05-02 at 02:22 -0300, Davi Arnaut wrote:
> plain text document attachment (pollfs-timer.patch)
> Per file descriptor high-resolution timers. A classic unix file interface for
> the POSIX timer_(create|settime|gettime|delete) family of functions.
>
> Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
Nacked-by-me.
Aside of the fact, that it is a bad clone of the timerfd code, it is
simply broken and untested.
> +
> +struct hrtimerspec {
> + int flags;
> + clockid_t clock;
> + struct itimerspec expr;
> +};
How exactly knows userspace what a struct hrtimerspec is ? Is the c file
exported as a header ?
> +static ssize_t read(struct pfs_timer *evs, struct itimerspec __user *uspec)
> +{
> + int ret = -EAGAIN;
> + ktime_t remaining = {};
> + unsigned long overruns = 0;
> + struct itimerspec spec = {};
> + struct hrtimer *timer = &evs->timer;
> +
> + spin_lock_irq(&evs->lock);
> +
> + if (!evs->overruns)
> + goto out_unlock;
> +
> + if (hrtimer_active(timer))
> + remaining = hrtimer_get_remaining(timer);
> + else if (evs->interval.tv64 > 0)
> + overruns = hrtimer_forward(timer, hrtimer_cb_get_time(timer),
> + evs->interval);
Where is the logic here ?
If no overrun, return remaining time = 0
If active, return the real remaining time. This path is never hit, as
the timer is nowhere restarted.
If not active, return remanining time = 0. How does the caller know how
many events are missed ?
> + ret = -EOVERFLOW;
> + if (overruns > (ULONG_MAX - evs->overruns))
> + goto out_unlock;
> + else
> + evs->overruns += overruns;
Interesting feature. evs->overruns is adding up forever and then limited
to ULONG_MAX
> +static enum hrtimer_restart timer_fn(struct hrtimer *timer)
> +{
> + struct pfs_timer *evs = container_of(timer, struct pfs_timer, timer);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&evs->lock, flags);
> + /* timer tick, interval has elapsed */
> + if (!evs->overruns++)
> + wake_up_all(&evs->wait);
Cool. Waiters, which came after the first event are stuck. Simply
because there is no second event.
> +static ssize_t write(struct pfs_timer *evs,
> + const struct hrtimerspec __user *uspec)
> +{
> + struct hrtimerspec spec;
See first comment !
> + if (copy_from_user(&spec, uspec, sizeof(spec)))
> + return -EFAULT;
> +
> + if (spec_invalid(&spec))
> + return -EINVAL;
> +
> + rearm_timer(evs, &spec);
> +
> + return 0;
> +}
> +
> +static int poll(struct pfs_timer *evs)
> +{
> + int ret;
> +
> + ret = evs->overruns ? POLLIN : 0;
> +
> + return ret;
> +}
Creative lockless programming style with 4 lines overhead and a
guaranteed return POLLIN after the first timer event. This is really
cute as it covers the missing timer restart and guarantees 100% CPU load
for ever. Hmm, maybe it's correct: polling should loop for ever,
shouldn't it ?
> +static const struct pfs_operations timer_ops = {
> + .read = PFS_READ(read, struct pfs_timer, struct itimerspec),
> + .write = PFS_WRITE(write, struct pfs_timer, struct hrtimerspec),
> + .poll = PFS_POLL(poll, struct pfs_timer),
> + .release = PFS_RELEASE(release, struct pfs_timer),
> + .rsize = sizeof(struct itimerspec),
> + .wsize = sizeof(struct hrtimerspec),
See first comment !
tglx
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 09/22] pollfs: pollable hrtimers
2007-05-02 21:16 ` Thomas Gleixner
@ 2007-05-02 23:00 ` Davi Arnaut
0 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-02 23:00 UTC (permalink / raw)
To: tglx
Cc: Andrew Morton, Davide Libenzi, Linus Torvalds, Linux Kernel Mailing List
Thomas Gleixner wrote:
> On Wed, 2007-05-02 at 02:22 -0300, Davi Arnaut wrote:
>> plain text document attachment (pollfs-timer.patch)
>> Per file descriptor high-resolution timers. A classic unix file interface for
>> the POSIX timer_(create|settime|gettime|delete) family of functions.
>>
>> Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
>
> Nacked-by-me.
>
> Aside of the fact, that it is a bad clone of the timerfd code, it is
> simply broken and untested.
I've made it by the same time of timerfd, I even sent it to Davide and
the list. "Clone" is a bit of overstatment, timerfd is not bugged as this :)
>> +
>> +struct hrtimerspec {
>> + int flags;
>> + clockid_t clock;
>> + struct itimerspec expr;
>> +};
>
> How exactly knows userspace what a struct hrtimerspec is ? Is the c file
> exported as a header ?
Will move then all to another header later.
>> +static ssize_t read(struct pfs_timer *evs, struct itimerspec __user *uspec)
>> +{
>> + int ret = -EAGAIN;
>> + ktime_t remaining = {};
>> + unsigned long overruns = 0;
>> + struct itimerspec spec = {};
>> + struct hrtimer *timer = &evs->timer;
>> +
>> + spin_lock_irq(&evs->lock);
>> +
>> + if (!evs->overruns)
>> + goto out_unlock;
>> +
>> + if (hrtimer_active(timer))
>> + remaining = hrtimer_get_remaining(timer);
>> + else if (evs->interval.tv64 > 0)
>> + overruns = hrtimer_forward(timer, hrtimer_cb_get_time(timer),
>> + evs->interval);
>
> Where is the logic here ?
Return the remaing time for timer firing, or rearm the timer. And its
pretty broken because of the first if and I forgot to reset overruns.
> If no overrun, return remaining time = 0
>
> If active, return the real remaining time. This path is never hit, as
> the timer is nowhere restarted.
>
> If not active, return remanining time = 0. How does the caller know how
> many events are missed ?
>
>> + ret = -EOVERFLOW;
>> + if (overruns > (ULONG_MAX - evs->overruns))
>> + goto out_unlock;
>> + else
>> + evs->overruns += overruns;
>
> Interesting feature. evs->overruns is adding up forever and then limited
> to ULONG_MAX
See third comment!
>> +static enum hrtimer_restart timer_fn(struct hrtimer *timer)
>> +{
>> + struct pfs_timer *evs = container_of(timer, struct pfs_timer, timer);
>> + unsigned long flags;
>> +
>> + spin_lock_irqsave(&evs->lock, flags);
>> + /* timer tick, interval has elapsed */
>> + if (!evs->overruns++)
>> + wake_up_all(&evs->wait);
>
> Cool. Waiters, which came after the first event are stuck. Simply
> because there is no second event.
See third comment!
>> +static ssize_t write(struct pfs_timer *evs,
>> + const struct hrtimerspec __user *uspec)
>> +{
>> + struct hrtimerspec spec;
>
> See first comment !
>
>> + if (copy_from_user(&spec, uspec, sizeof(spec)))
>> + return -EFAULT;
>> +
>> + if (spec_invalid(&spec))
>> + return -EINVAL;
>> +
>> + rearm_timer(evs, &spec);
>> +
>> + return 0;
>> +}
>> +
>> +static int poll(struct pfs_timer *evs)
>> +{
>> + int ret;
>> +
>> + ret = evs->overruns ? POLLIN : 0;
>> +
>> + return ret;
>> +}
>
> Creative lockless programming style with 4 lines overhead and a
> guaranteed return POLLIN after the first timer event. This is really
> cute as it covers the missing timer restart and guarantees 100% CPU load
> for ever. Hmm, maybe it's correct: polling should loop for ever,
> shouldn't it ?
See third comment! -- It will remain lockless, as reading and setting
this data type is guaranteed to happen atomically.
>> +static const struct pfs_operations timer_ops = {
>> + .read = PFS_READ(read, struct pfs_timer, struct itimerspec),
>> + .write = PFS_WRITE(write, struct pfs_timer, struct hrtimerspec),
>> + .poll = PFS_POLL(poll, struct pfs_timer),
>> + .release = PFS_RELEASE(release, struct pfs_timer),
>> + .rsize = sizeof(struct itimerspec),
>> + .wsize = sizeof(struct hrtimerspec),
>
> See first comment !
This has nothing to do with user space, or you got lost in comments
references.
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 18:05 ` Davi Arnaut
@ 2007-05-03 13:40 ` Ulrich Drepper
0 siblings, 0 replies; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-03 13:40 UTC (permalink / raw)
To: Davi Arnaut
Cc: Eric Dumazet, Andrew Morton, Davide Libenzi, Linus Torvalds,
Linux Kernel Mailing List
On 5/2/07, Davi Arnaut <davi@haxent.com.br> wrote:
> The usage cases of yours are quite different from mine. We don't use a
> single file descriptor to to manage various resources. The worker threads
> are _not going_ to have a file descriptor, _only_ the event dispatching
> (poll)
> thread.
An model which doesn't scale well.
> A pollable futex is even more useful for _single_ threaded programs that
> don't want to go into lengthy hacks to monitor events coming from the
> outside
> world.
There is nothing here that cannot be done with a more complete model
for event handling. It's Linus decision whether he wants to add yet
more code, yet more possible problems, yet more maintenance
overhead/nightmare for an interim solution which isn't necessary,
which cannot solve all the problems, and which is not as scalable as
other proposed methods.
I can only say that I would be trickly against it. It makes just no sense.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-02 18:21 ` Davide Libenzi
@ 2007-05-03 13:46 ` Ulrich Drepper
2007-05-03 18:24 ` Davide Libenzi
0 siblings, 1 reply; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-03 13:46 UTC (permalink / raw)
To: Davide Libenzi
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
On 5/2/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> 99% of the fds you'll find inside an event loop you care to scale about,
> are *already* fd based.
You are missing the point. To get acceptable behavior of the wakeup
it is necessary with this approach to open one descriptor _per thread_
for a futex. Otherwise all threads get woken upon FUTEX_WAKE.
This also means you need individual epoll sets for each thread. You
cannot share them anymore among all the threads in the process.
> On top of that, those fds are very cheap in terms of memory
They might be when they are counted in dozens. But here we are
talking about the possible need to use thousands of additional file
descriptors. If they are so cheap to allow thousands of descriptors
with ease, why would the rlimit for files default to a small number
(1024 on Fedora right now)?
> And this approach is not bound to a completely new and monolitic interface.
So? It's stil additional, new code for an approach which will have to
be superceded real soon. That's just pure overhead to me.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-03 13:46 ` Ulrich Drepper
@ 2007-05-03 18:24 ` Davide Libenzi
2007-05-03 19:03 ` Ulrich Drepper
0 siblings, 1 reply; 71+ messages in thread
From: Davide Libenzi @ 2007-05-03 18:24 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
I thought you were talking about the poll/epoll interface in general, and
the approach on how to extend it for the very few cases that ppl asks for.
but I see we're focusing on futexes ...
On Thu, 3 May 2007, Ulrich Drepper wrote:
> On 5/2/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> > 99% of the fds you'll find inside an event loop you care to scale about,
> > are *already* fd based.
>
> You are missing the point. To get acceptable behavior of the wakeup
> it is necessary with this approach to open one descriptor _per thread_
> for a futex. Otherwise all threads get woken upon FUTEX_WAKE.
>
> This also means you need individual epoll sets for each thread. You
> cannot share them anymore among all the threads in the process.
I'm not sure if futexes are the best approach to do that, but a way for
the user to signal an event into a main event loop is needed.
> > On top of that, those fds are very cheap in terms of memory
>
> They might be when they are counted in dozens. But here we are
> talking about the possible need to use thousands of additional file
> descriptors. If they are so cheap to allow thousands of descriptors
> with ease, why would the rlimit for files default to a small number
> (1024 on Fedora right now)?
Right now, ppl do that using pipes. That costs 2 file descriptors and at
least 4KB of kernel data (plus an inode, a dentry and a file). This just
to have a way to signal to an event loop dispatcher. The patches I posted
a few weeks ago introduce an eventfd, that reduces the amount of kernel
memory to basically a dentry and a file (plus uses only one file
descriptor, and its 2-3 times faster than pipes. Add to that cost, about
200 lines of code in fs/eventfd.c.
> > And this approach is not bound to a completely new and monolitic interface.
>
> So? It's stil additional, new code for an approach which will have to
> be superceded real soon. That's just pure overhead to me.
IMO it is better to leave futexes alone. They are great for syncronizing
MT apps, but do not properly fit an fd-based solution. For that, something
like eventfd is enough.
- Davide
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-03 18:24 ` Davide Libenzi
@ 2007-05-03 19:03 ` Ulrich Drepper
2007-05-03 22:14 ` Davide Libenzi
0 siblings, 1 reply; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-03 19:03 UTC (permalink / raw)
To: Davide Libenzi
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
On 5/3/07, Davide Libenzi <davidel@xmailserver.org> wrote:
>
> I thought you were talking about the poll/epoll interface in general, and
> the approach on how to extend it for the very few cases that ppl asks for.
> but I see we're focusing on futexes ...
Futexes must be part of the whole approach. If they cannot sanely be
integrated the whole approach is more than questionable IMO.
> I'm not sure if futexes are the best approach to do that, but a way for
> the user to signal an event into a main event loop is needed.
I haven't necessarily seen much of this demand and, as you pointed out
yourself, there is already a completely valid and POSIX compliant way
to achieve that. The situation would be very different if you
couldn't reliably implement this.
I don't suggest this as a long term solution, it's neither nice nor
fast. But it is a way to achieve the goal until a real soution comes
along. Signals cannot serve as a justification for introducing these
new concepts.
> IMO it is better to leave futexes alone. They are great for syncronizing
> MT apps, but do not properly fit an fd-based solution. For that, something
> like eventfd is enough.
That's ridiculously short-sighted. All objects upon which one can
wait must be unified. This is possible. The kevent interface gives
enough flexibility.
Let's just finish the design and implementation of the real solution.
Be it kevent (modified to meet the last comments, I think I still have
some myself), or something completely different which you can propose.
Then all programs which really care about performance can use that
code.
If a program doesn't care about performance then they might just as
well use pipes in signal handlers.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-03 19:03 ` Ulrich Drepper
@ 2007-05-03 22:14 ` Davide Libenzi
2007-05-04 15:28 ` Ulrich Drepper
0 siblings, 1 reply; 71+ messages in thread
From: Davide Libenzi @ 2007-05-03 22:14 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
On Thu, 3 May 2007, Ulrich Drepper wrote:
> On 5/3/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> >
> > I thought you were talking about the poll/epoll interface in general, and
> > the approach on how to extend it for the very few cases that ppl asks for.
> > but I see we're focusing on futexes ...
>
> Futexes must be part of the whole approach. If they cannot sanely be
> integrated the whole approach is more than questionable IMO.
Why is that futexes *must* be part of the "whole solution"? Ppl needs
solutions to specific problems, not an bloated interface that, like a
giant blob, includes everything just because it exists.
> > I'm not sure if futexes are the best approach to do that, but a way for
> > the user to signal an event into a main event loop is needed.
>
> I haven't necessarily seen much of this demand and, as you pointed out
> yourself, there is already a completely valid and POSIX compliant way
> to achieve that. The situation would be very different if you
> couldn't reliably implement this.
Before you try to bash a solution becuase it's costly, then you bounce
back from another angle, and say that a solution (pipes) that uses 2
descriptors, one file, one inode, one dentry and 4KB of kernel memory for
each instance, is a perfectly legal solution.
The 1024 file cap is a bogus problem. If you decided to leave the POSIX
compatibility (poll/select) for your code, to use something like epoll, it
means already that your application is handling quite a huge amount of
files and the 1024 cap must be out of the way. And here the cost
associated with each file is already pretty big (inode, dentry, file, and
buffers - for each one of them). We cannot change that cost.
> I don't suggest this as a long term solution, it's neither nice nor
> fast. But it is a way to achieve the goal until a real soution comes
> along. Signals cannot serve as a justification for introducing these
> new concepts.
Fast, I think we have that pretty much covered with Ingo poiting out a few
flaws in the numbers posted previously. Nice, I'll leave that out.
Monolitic and interface-centric solutions, or better, solutions in search
of a problem, do not fit the "nice" category IMO.
So, let's leave hand-waving and ugly/nice BS out of the picture, and let's
see what is currently missing.
Epoll scales and already covers a large amount of things you may be
interested in receiving events from. Basically everything that have a
working f_op->poll.
The other big piece is AIO. Now you can have *another* layer on top of
AIO, that is included in your blob interface, but why? The AIO API is
already defined, and all you need is a way to signal the main loop that
AIO events will be ready to be spilled out from the AIO context. And at
that point you use the *already existing* AIO API for it. Why do you want
to add another layer on top? What you end up doing, is pushing userspace
code into the kernel.
The 20 lines AIO patch I posted, simply signals to an eventfd when the
AIO context has something to be fetched.
Then we have signals and timers, covered in the other two patches. And all
this works without being bound to an interface. Your application can just
use poll if it does not have scalability problems.
- Davide
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-03 22:14 ` Davide Libenzi
@ 2007-05-04 15:28 ` Ulrich Drepper
2007-05-04 19:15 ` Davide Libenzi
0 siblings, 1 reply; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-04 15:28 UTC (permalink / raw)
To: Davide Libenzi
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
On 5/3/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> Why is that futexes *must* be part of the "whole solution"? Ppl needs
> solutions to specific problems, not an bloated interface that, like a
> giant blob, includes everything just because it exists.
Sync objects are essential parts of many programs today and most
programs tomorrow. Currently you cannot efficiently implement working
on multiple independent areas which are protected through some sync
object (mutex, condvar, ...). You have to create a separate thread
for each. Looping with the nonlocking mutex, for instance, is no
possibility. This is solved by being able to get events for the
availability of the sync object.
And before you start and claim that this is no common cases take a
look at the waitformultipleobjects (with studdly caps somewhere) for
windows' API. The actual interface is horrible, but the concept is
sound (it comes from VMS). This is the basis of many programs on that
platform. Basically, the central loop contains such a call.
Currently programs would have to be completely redesigned when ported
to Linux if they use any object which cannot be waited on.
There is much more. As I tried to point out in last year's OLS paper,
central loops around such a call are the perfect scalability mechanism
and this is what is needed for the processors from today and tomorrow.
> Before you try to bash a solution becuase it's costly, then you bounce
> back from another angle, and say that a solution (pipes) that uses 2
> descriptors, one file, one inode, one dentry and 4KB of kernel memory for
> each instance, is a perfectly legal solution.
Stop. I call the proposed code costly in terms of the code added to
the kernel which must be maintained and kept in mind when writing the
real next-gen event mechanism. Not having this code in the kernel
certainly would make a difference.
> Fast, I think we have that pretty much covered with Ingo poiting out a few
> flaws in the numbers posted previously. Nice, I'll leave that out.
You again miss the context. I was talking about the pipe-based
solution using a signal handler.
> Epoll scales and already covers a large amount of things you may be
> interested in receiving events from. Basically everything that have a
> working f_op->poll.
epoll doesn't scale if every thread needs its own epoll set. Beside
the overhead this also has huge program design problems: how do you
atomically remove a file descriptor from a collection of epoll sets?
> The other big piece is AIO. Now you can have *another* layer on top of
> AIO, that is included in your blob interface, but why?
I don't know how you arrive at AIO now. kevent itself is independent
of the AIO code which was done at the same time by the same person.
It was just one kernel service which uses the event functionality.
The two must be judged independently.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-04 15:28 ` Ulrich Drepper
@ 2007-05-04 19:15 ` Davide Libenzi
2007-05-04 19:20 ` 2.6.20.4 / 2.6.21.1 AT91SAM9260-EK oops Ryan Ordway
2007-05-04 23:38 ` [patch 14/22] pollfs: pollable futex Ulrich Drepper
0 siblings, 2 replies; 71+ messages in thread
From: Davide Libenzi @ 2007-05-04 19:15 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
On Fri, 4 May 2007, Ulrich Drepper wrote:
> On 5/3/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> > Why is that futexes *must* be part of the "whole solution"? Ppl needs
> > solutions to specific problems, not an bloated interface that, like a
> > giant blob, includes everything just because it exists.
>
> Sync objects are essential parts of many programs today and most
> programs tomorrow. Currently you cannot efficiently implement working
> on multiple independent areas which are protected through some sync
> object (mutex, condvar, ...). You have to create a separate thread
> for each. Looping with the nonlocking mutex, for instance, is no
> possibility. This is solved by being able to get events for the
> availability of the sync object.
>
> And before you start and claim that this is no common cases take a
> look at the waitformultipleobjects (with studdly caps somewhere) for
> windows' API. The actual interface is horrible, but the concept is
> sound (it comes from VMS). This is the basis of many programs on that
> platform. Basically, the central loop contains such a call.
> Currently programs would have to be completely redesigned when ported
> to Linux if they use any object which cannot be waited on.
>
> There is much more. As I tried to point out in last year's OLS paper,
> central loops around such a call are the perfect scalability mechanism
> and this is what is needed for the processors from today and tomorrow.
This is a pretty specific case, that is not very typical to find in the
usual common event loop dispatch application design.
But strange you went even there, because, as you know, WaitForMultipleObjects
works with HANDLEs, that are the closest thing to the Unix file you can
find. They can be read (ReadFile/read), written (WriteFile/write), closed
(CloseHandle/close), duplicated (DuplicateHandle/dup) and waited
(WaitForMultipleObjects/poll), with a common interface.
And if you *really* want your truly generic WaitForMultipleObjects
implementation, your only way is to base it on files. Files are our almost
perfect match to HANDLEs in our world. We have the basic infrastructure
already there.
- Davide
^ permalink raw reply [flat|nested] 71+ messages in thread
* 2.6.20.4 / 2.6.21.1 AT91SAM9260-EK oops
2007-05-04 19:15 ` Davide Libenzi
@ 2007-05-04 19:20 ` Ryan Ordway
2007-05-04 23:38 ` [patch 14/22] pollfs: pollable futex Ulrich Drepper
1 sibling, 0 replies; 71+ messages in thread
From: Ryan Ordway @ 2007-05-04 19:20 UTC (permalink / raw)
To: Linux Kernel Mailing List
I am having issues getting both a working kernel and rootfs going on an
Atmel AT91SAM9260-EK board. I can boot the Atmel-provided 2.6.18-rc4 kernel
and my rootfs image created with buildroot. But when I try to boot my own
2.6.20.4 or 2.6.21.1 kernels, I get an oops as below. Forgive the
formatting... Below that is my kernel config.
Any ideas why the kernel might be dying trying to open and initialize a TTY?
Thanks!
Ryan
Uncompressing
Linux.............................................................
................... done, booting the kernel.
Linux version 2.6.21.1-pml1 (rordway@alpha.powermand.com) (gcc version
4.1.2) #2
Thu May 3 13:47:43 PDT 2007
CPU: ARM926EJ-S [41069265] revision 5 (ARMv5TEJ), cr=00053177
Machine: Atmel AT91SAM9260-EK
Memory policy: ECC disabled, Data cache writeback
Clocks: CPU 198 MHz, master 99 MHz, main 18.432 MHz
CPU0: D VIVT write-back cache
CPU0: I cache: 8192 bytes, associativity 4, 32 byte lines, 64 sets
CPU0: D cache: 8192 bytes, associativity 4, 32 byte lines, 64 sets
Built 1 zonelists. Total pages: 16256
Kernel command line: ram=64M console=ttyS0,115200 initrd=0x21000000
root=/dev/ram0 init=/linuxrc rw
AT91: 96 gpio irqs in 3 banks
PID hash table entries: 256 (order: 8, 1024 bytes)
Console: colour dummy device 80x30
Dentry cache hash table entries: 8192 (order: 3, 32768 bytes)
Inode-cache hash table entries: 4096 (order: 2, 16384 bytes)
Memory: 64MB = 64MB total
Memory: 59312KB available (2224K code, 330K data, 104K init)
Mount-cache hash table entries: 512
CPU: Testing write buffer coherency: ok
NET: Registered protocol family 16
Generic PHY: Registered new driver
usbcore: registered new interface driver usbfs
usbcore: registered new interface driver hub
usbcore: registered new device driver usb
NET: Registered protocol family 2
IP route cache hash table entries: 1024 (order: 0, 4096 bytes)
TCP established hash table entries: 2048 (order: 3, 40960 bytes)
TCP bind hash table entries: 2048 (order: 3, 40960 bytes)
TCP: Hash tables configured (established 2048 bind 2048)
TCP reno registered
checking if image is initramfs...it isn't (no cpio magic); looks like an
initrd
Freeing initrd memory: 2888K
NetWinder Floating Point Emulator V0.97 (extended precision)
JFFS2 version 2.2. (NAND) (C) 2001-2006 Red Hat, Inc.
io scheduler noop registered (default)
Serial: 8250/16550 driver $Revision: 1.90 $ 4 ports, IRQ sharing enabled
RAMDISK driver initialized: 16 RAM disks of 16384K size 1024 blocksize
loop: loaded (max 8 devices)
Davicom DM9161E: Registered new driver
Davicom DM9131: Registered new driver
dm9000 Ethernet Driver
macb macb: detected PHY at address 0 (ID 0181:b8a0)
eth0: Atmel MACB at 0xfffc4000 irq 21 (02:03:04:05:06:07)
NFTL driver: nftlcore.c $Revision: 1.98 $, nftlmount.c $Revision: 1.41 $
SSFDC read-only Flash Translation layer
NAND device: Manufacturer ID: 0xec, Chip ID: 0xda (Samsung NAND 256MiB 3,3V
8-bi
t)
NAND bus width 16 instead 8 bit
No NAND device found!!!
at91_ohci at91_ohci: AT91 OHCI
at91_ohci at91_ohci: new USB bus registered, assigned bus number 1
at91_ohci at91_ohci: irq 20, io mem 0x00500000
usb usb1: configuration #1 chosen from 1 choice
hub 1-0:1.0: USB hub found
hub 1-0:1.0: 2 ports detected
usbcore: registered new interface driver usbserial
drivers/usb/serial/usb-serial.c: USB Serial support registered for generic
usbcore: registered new interface driver usbserial_generic
drivers/usb/serial/usb-serial.c: USB Serial Driver core
drivers/usb/serial/usb-serial.c: USB Serial support registered for cp2101
usbcore: registered new interface driver cp2101
drivers/usb/serial/cp2101.c: Silicon Labs CP2101/CP2102 RS232 serial adaptor
dri
ver v0.07
udc: at91_udc version 3 May 2006
ether gadget: using random self ethernet address
ether gadget: using random host ethernet address
usb0: Ethernet Gadget, version: May Day 2005
usb0: using at91_udc, OUT ep2 IN ep1 STATUS ep4
usb0: MAC da:af:79:f3:97:1e
usb0: HOST MAC ee:9f:94:c8:5d:22
mice: PS/2 mouse device common for all mice
AT91 MMC: 4 wire bus mode not supported by this driver - using 1 wire
TCP cubic registered
Initializing XFRM netlink socket
NET: Registered protocol family 1
NET: Registered protocol family 17
NET: Registered protocol family 15
drivers/rtc/hctosys.c: unable to open rtc device (rtc0)
RAMDISK: Compressed image found at block 0
VFS: Mounted root (ext2 filesystem).
Freeing init memory: 104K
Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = c0004000
[00000000] *pgd=00000000
Internal error: Oops: 5 [#1]
Modules linked in:
CPU: 0
PC is at init_dev+0x28/0x4e8
LR is at tty_open+0x120/0x304
pc : [<c00f8a18>] lr : [<c00fbce4>] Not tainted
sp : c12e1e08 ip : c12e1e50 fp : c12e1e4c
r10: 00000002 r9 : 00000000 r8 : c12e0000
r7 : 00000001 r6 : c1237c00 r5 : c0299c38 r4 : c12fb8a0
r3 : 00000000 r2 : c12e1e54 r1 : 00000000 r0 : c1237c00
Flags: nZCv IRQs on FIQs on Mode SVC_32 Segment kernel
Control: 5317F
Table: 20004000 DAC: 00000017
Process swapper (pid: 1, stack limit = 0xc12e0258)
Stack: (0xc12e1e08 to 0xc12e2000)
1e00: c12e1e54 c025fce8 c12e1e50 c1237c00 00000000
c12e0000
1e20: c12e1e4c c12fb8a0 c0299c38 00500001 00000001 c12e0000 00000000
00000002
1e40: c12e1e7c c12e1e50 c00fbce4 c00f8a00 00000000 c3d78a40 00000000
c0299c38
1e60: c3d78a40 c12fb8a0 c1257bc8 00000000 c12e1ea4 c12e1e80 c0080d7c
c00fbbd4
1e80: c12e1ea4 00000000 c12fb8a0 c3d78a40 c0080c2c c12d3220 c12e1ecc
c12e1ea8
1ea0: c007c758 c0080c3c c12fb8a0 c12e1ef8 c03a4000 00000000 ffffff9c
00000000
1ec0: c12e1eec c12e1ed0 c007c8f4 c007c674 00000000 ffffff9c 00000000
00000002
1ee0: c12e1f5c c12e1ef0 c007c950 c007c8d0 c12e1ef8 c00ef830 c1257bc8
c12d3220
1f00: c0065970 00000002 c12e0000 00000101 00000001 00000000 c12e1f34
c12e1f28
1f20: c01d74f0 c00ef830 c12e1f5c c12e1f38 c007c658 c01d74f0 00000002
00000003
1f40: 00000000 c12fb8a0 00000002 00000000 c12e1f84 c12e1f60 c007c9ac
c007c91c
1f60: c0029be8 c0269f98 c001f87c c12e0000 00000000 00000000 c12e1f94
c12e1f88
1f80: c007ca28 c007c968 c12e1fac c12e1f98 c002205c c007ca14 c0269f98
c0269f98
1fa0: c12e1ff4 c12e1fb0 c000886c c002203c 00000000 00000000 c00086fc
c00406bc
1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000
00000000
1fe0: 00000000 00000000 00000000 c12e1ff8 c00406bc c000870c 00220008
00020015
Backtrace:
[<c00f89f0>] (init_dev+0x0/0x4e8) from [<c00fbce4>] (tty_open+0x120/0x304)
[<c00fbbc4>] (tty_open+0x0/0x304) from [<c0080d7c>]
(chrdev_open+0x150/0x1a0)
[<c0080c2c>] (chrdev_open+0x0/0x1a0) from [<c007c758>]
(__dentry_open+0xf4/0x1e4)
r7 = C12D3220 r6 = C0080C2C r5 = C3D78A40 r4 = C12FB8A0
[<c007c664>] (__dentry_open+0x0/0x1e4) from [<c007c8f4>]
(nameidata_to_filp+0x34/0x4c)
[<c007c8c0>] (nameidata_to_filp+0x0/0x4c) from [<c007c950>]
(do_filp_open+0x44/0x4c)
r4 = 00000002
[<c007c90c>] (do_filp_open+0x0/0x4c) from [<c007c9ac>]
(do_sys_open+0x54/0x98)
r5 = 00000000 r4 = 00000002
[<c007c958>] (do_sys_open+0x0/0x98) from [<c007ca28>] (sys_open+0x24/0x28)
r8 = 00000000 r7 = 00000000 r6 = C12E0000 r5 = C001F87C
r4 = C0269F98
[<c007ca04>] (sys_open+0x0/0x28) from [<c002205c>] (init_post+0x30/0xe8)
[<c002202c>] (init_post+0x0/0xe8) from [<c000886c>] (init+0x170/0x1b8)
r4 = C0269F98
[<c00086fc>] (init+0x0/0x1b8) from [<c00406bc>] (do_exit+0x0/0x808)
r7 = 00000000 r6 = 00000000 r5 = 00000000 r4 = 00000000
Code: e3130010 059030c8 e1a06000 e1a09001 (07935101)
Kernel panic - not syncing: Attempted to kill init!
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.20.4
# Fri May 4 11:13:33 2007
#
CONFIG_ARM=y
# CONFIG_GENERIC_TIME is not set
CONFIG_MMU=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
CONFIG_HARDIRQS_SW_RESEND=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_RWSEM_GENERIC_SPINLOCK=y
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
CONFIG_GENERIC_HWEIGHT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_VECTORS_BASE=0xffff0000
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
#
# Code maturity level options
#
CONFIG_EXPERIMENTAL=y
CONFIG_BROKEN_ON_SMP=y
CONFIG_INIT_ENV_ARG_LIMIT=32
#
# General setup
#
CONFIG_LOCALVERSION="-pml1"
CONFIG_LOCALVERSION_AUTO=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
# CONFIG_IPC_NS is not set
CONFIG_POSIX_MQUEUE=y
# CONFIG_BSD_PROCESS_ACCT is not set
# CONFIG_TASKSTATS is not set
# CONFIG_UTS_NS is not set
# CONFIG_AUDIT is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_SYSFS_DEPRECATED=y
# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_SYSCTL=y
# CONFIG_EMBEDDED is not set
CONFIG_UID16=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
CONFIG_SLAB=y
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_RT_MUTEXES=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
# CONFIG_SLOB is not set
#
# Loadable module support
#
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_MODULE_FORCE_UNLOAD is not set
CONFIG_MODVERSIONS=y
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
#
# Block layer
#
CONFIG_BLOCK=y
# CONFIG_LBD is not set
# CONFIG_BLK_DEV_IO_TRACE is not set
# CONFIG_LSF is not set
#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
# CONFIG_IOSCHED_AS is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
# CONFIG_DEFAULT_AS is not set
# CONFIG_DEFAULT_DEADLINE is not set
# CONFIG_DEFAULT_CFQ is not set
CONFIG_DEFAULT_NOOP=y
CONFIG_DEFAULT_IOSCHED="noop"
#
# System Type
#
# CONFIG_ARCH_AAEC2000 is not set
# CONFIG_ARCH_INTEGRATOR is not set
# CONFIG_ARCH_REALVIEW is not set
# CONFIG_ARCH_VERSATILE is not set
CONFIG_ARCH_AT91=y
# CONFIG_ARCH_CLPS7500 is not set
# CONFIG_ARCH_CLPS711X is not set
# CONFIG_ARCH_CO285 is not set
# CONFIG_ARCH_EBSA110 is not set
# CONFIG_ARCH_EP93XX is not set
# CONFIG_ARCH_FOOTBRIDGE is not set
# CONFIG_ARCH_NETX is not set
# CONFIG_ARCH_H720X is not set
# CONFIG_ARCH_IMX is not set
# CONFIG_ARCH_IOP32X is not set
# CONFIG_ARCH_IOP33X is not set
# CONFIG_ARCH_IOP13XX is not set
# CONFIG_ARCH_IXP4XX is not set
# CONFIG_ARCH_IXP2000 is not set
# CONFIG_ARCH_IXP23XX is not set
# CONFIG_ARCH_L7200 is not set
# CONFIG_ARCH_PNX4008 is not set
# CONFIG_ARCH_PXA is not set
# CONFIG_ARCH_RPC is not set
# CONFIG_ARCH_SA1100 is not set
# CONFIG_ARCH_S3C2410 is not set
# CONFIG_ARCH_SHARK is not set
# CONFIG_ARCH_LH7A40X is not set
# CONFIG_ARCH_OMAP is not set
#
# Atmel AT91 System-on-Chip
#
# CONFIG_ARCH_AT91RM9200 is not set
CONFIG_ARCH_AT91SAM9260=y
# CONFIG_ARCH_AT91SAM9261 is not set
# CONFIG_ARCH_AT91SAM9263 is not set
#
# AT91SAM9260 Variants
#
# CONFIG_ARCH_AT91SAM9260_SAM9XE is not set
#
# AT91SAM9260 / AT91SAM9XE Board Type
#
CONFIG_MACH_AT91SAM9260EK=y
#
# AT91 Board Options
#
CONFIG_MTD_AT91_DATAFLASH_CARD=y
CONFIG_MTD_NAND_AT91_BUSWIDTH_16=y
#
# AT91 Feature Selections
#
CONFIG_AT91_PROGRAMMABLE_CLOCKS=y
#
# Processor Type
#
CONFIG_CPU_32=y
CONFIG_CPU_ARM926T=y
CONFIG_CPU_32v5=y
CONFIG_CPU_ABRT_EV5TJ=y
CONFIG_CPU_CACHE_VIVT=y
CONFIG_CPU_COPY_V4WB=y
CONFIG_CPU_TLB_V4WBI=y
CONFIG_CPU_CP15=y
CONFIG_CPU_CP15_MMU=y
#
# Processor Features
#
CONFIG_ARM_THUMB=y
# CONFIG_CPU_ICACHE_DISABLE is not set
# CONFIG_CPU_DCACHE_DISABLE is not set
# CONFIG_CPU_DCACHE_WRITETHROUGH is not set
# CONFIG_CPU_CACHE_ROUND_ROBIN is not set
#
# Bus support
#
#
# PCCARD (PCMCIA/CardBus) support
#
# CONFIG_PCCARD is not set
#
# Kernel Features
#
# CONFIG_PREEMPT is not set
# CONFIG_NO_IDLE_HZ is not set
CONFIG_HZ=100
CONFIG_AEABI=y
CONFIG_OABI_COMPAT=y
# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
# CONFIG_SPARSEMEM_MANUAL is not set
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
CONFIG_SPLIT_PTLOCK_CPUS=4096
CONFIG_RESOURCES_64BIT=y
CONFIG_LEDS=y
CONFIG_LEDS_TIMER=y
CONFIG_LEDS_CPU=y
CONFIG_ALIGNMENT_TRAP=y
#
# Boot options
#
CONFIG_ZBOOT_ROM_TEXT=0
CONFIG_ZBOOT_ROM_BSS=0
CONFIG_CMDLINE=""
# CONFIG_XIP_KERNEL is not set
#
# Floating point emulation
#
#
# At least one emulation must be selected
#
CONFIG_FPE_NWFPE=y
# CONFIG_FPE_NWFPE_XP is not set
# CONFIG_FPE_FASTFPE is not set
# CONFIG_VFP is not set
#
# Userspace binary formats
#
CONFIG_BINFMT_ELF=y
# CONFIG_BINFMT_AOUT is not set
CONFIG_BINFMT_MISC=y
#
# Power management options
#
# CONFIG_PM is not set
# CONFIG_APM is not set
#
# Networking
#
CONFIG_NET=y
#
# Networking options
#
# CONFIG_NETDEBUG is not set
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
CONFIG_XFRM=y
# CONFIG_XFRM_USER is not set
# CONFIG_XFRM_SUB_POLICY is not set
CONFIG_NET_KEY=y
CONFIG_INET=y
# CONFIG_IP_MULTICAST is not set
# CONFIG_IP_ADVANCED_ROUTER is not set
CONFIG_IP_FIB_HASH=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
# CONFIG_IP_PNP_BOOTP is not set
# CONFIG_IP_PNP_RARP is not set
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
# CONFIG_ARPD is not set
# CONFIG_SYN_COOKIES is not set
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
# CONFIG_INET_XFRM_TUNNEL is not set
# CONFIG_INET_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_DIAG is not set
# CONFIG_TCP_CONG_ADVANCED is not set
CONFIG_TCP_CONG_CUBIC=y
CONFIG_DEFAULT_TCP_CONG="cubic"
# CONFIG_TCP_MD5SIG is not set
# CONFIG_IPV6 is not set
# CONFIG_INET6_XFRM_TUNNEL is not set
# CONFIG_INET6_TUNNEL is not set
# CONFIG_NETWORK_SECMARK is not set
# CONFIG_NETFILTER is not set
#
# DCCP Configuration (EXPERIMENTAL)
#
# CONFIG_IP_DCCP is not set
#
# SCTP Configuration (EXPERIMENTAL)
#
# CONFIG_IP_SCTP is not set
#
# TIPC Configuration (EXPERIMENTAL)
#
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
#
# QoS and/or fair queueing
#
# CONFIG_NET_SCHED is not set
#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_IEEE80211 is not set
#
# Device Drivers
#
#
# Generic Driver Options
#
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
# CONFIG_DEBUG_DRIVER is not set
# CONFIG_SYS_HYPERVISOR is not set
#
# Connector - unified userspace <-> kernelspace linker
#
# CONFIG_CONNECTOR is not set
#
# Memory Technology Devices (MTD)
#
CONFIG_MTD=y
# CONFIG_MTD_DEBUG is not set
CONFIG_MTD_CONCAT=y
CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_REDBOOT_PARTS=y
CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
# CONFIG_MTD_CMDLINE_PARTS is not set
# CONFIG_MTD_AFS_PARTS is not set
#
# User Modules And Translation Layers
#
CONFIG_MTD_CHAR=y
CONFIG_MTD_BLKDEVS=y
CONFIG_MTD_BLOCK=y
CONFIG_FTL=y
CONFIG_NFTL=y
CONFIG_NFTL_RW=y
# CONFIG_INFTL is not set
# CONFIG_RFD_FTL is not set
CONFIG_SSFDC=y
#
# RAM/ROM/Flash chip drivers
#
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_GEN_PROBE=y
# CONFIG_MTD_CFI_ADV_OPTIONS is not set
CONFIG_MTD_MAP_BANK_WIDTH_1=y
CONFIG_MTD_MAP_BANK_WIDTH_2=y
CONFIG_MTD_MAP_BANK_WIDTH_4=y
# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
CONFIG_MTD_CFI_I1=y
CONFIG_MTD_CFI_I2=y
# CONFIG_MTD_CFI_I4 is not set
# CONFIG_MTD_CFI_I8 is not set
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_CFI_STAA=y
CONFIG_MTD_CFI_UTIL=y
CONFIG_MTD_RAM=y
CONFIG_MTD_ROM=y
CONFIG_MTD_ABSENT=y
# CONFIG_MTD_OBSOLETE_CHIPS is not set
#
# Mapping drivers for chip access
#
CONFIG_MTD_COMPLEX_MAPPINGS=y
# CONFIG_MTD_PHYSMAP is not set
# CONFIG_MTD_ARM_INTEGRATOR is not set
# CONFIG_MTD_IMPA7 is not set
# CONFIG_MTD_PLATRAM is not set
#
# Self-contained MTD device drivers
#
CONFIG_MTD_DATAFLASH=y
# CONFIG_MTD_M25P80 is not set
# CONFIG_MTD_SLRAM is not set
# CONFIG_MTD_PHRAM is not set
# CONFIG_MTD_MTDRAM is not set
# CONFIG_MTD_BLOCK2MTD is not set
#
# Disk-On-Chip Device Drivers
#
# CONFIG_MTD_DOC2000 is not set
# CONFIG_MTD_DOC2001 is not set
# CONFIG_MTD_DOC2001PLUS is not set
#
# NAND Flash Device Drivers
#
CONFIG_MTD_NAND=y
# CONFIG_MTD_NAND_VERIFY_WRITE is not set
# CONFIG_MTD_NAND_ECC_SMC is not set
CONFIG_MTD_NAND_IDS=y
# CONFIG_MTD_NAND_DISKONCHIP is not set
CONFIG_MTD_NAND_AT91=y
# CONFIG_MTD_NAND_NANDSIM is not set
#
# OneNAND Flash Device Drivers
#
# CONFIG_MTD_ONENAND is not set
#
# Parallel port support
#
# CONFIG_PARPORT is not set
#
# Plug and Play support
#
#
# Block devices
#
# CONFIG_BLK_DEV_COW_COMMON is not set
CONFIG_BLK_DEV_LOOP=y
# CONFIG_BLK_DEV_CRYPTOLOOP is not set
# CONFIG_BLK_DEV_NBD is not set
# CONFIG_BLK_DEV_UB is not set
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
#
# SCSI device support
#
# CONFIG_RAID_ATTRS is not set
# CONFIG_SCSI is not set
# CONFIG_SCSI_NETLINK is not set
#
# Serial ATA (prod) and Parallel ATA (experimental) drivers
#
# CONFIG_ATA is not set
#
# Multi-device support (RAID and LVM)
#
# CONFIG_MD is not set
#
# Fusion MPT device support
#
# CONFIG_FUSION is not set
#
# IEEE 1394 (FireWire) support
#
#
# I2O device support
#
#
# Network device support
#
CONFIG_NETDEVICES=y
CONFIG_DUMMY=y
# CONFIG_BONDING is not set
# CONFIG_EQUALIZER is not set
# CONFIG_TUN is not set
#
# PHY device support
#
CONFIG_PHYLIB=y
#
# MII PHY device drivers
#
# CONFIG_MARVELL_PHY is not set
CONFIG_DAVICOM_PHY=y
# CONFIG_QSEMI_PHY is not set
# CONFIG_LXT_PHY is not set
# CONFIG_CICADA_PHY is not set
# CONFIG_VITESSE_PHY is not set
# CONFIG_SMSC_PHY is not set
# CONFIG_BROADCOM_PHY is not set
# CONFIG_FIXED_PHY is not set
#
# Ethernet (10 or 100Mbit)
#
CONFIG_NET_ETHERNET=y
CONFIG_MII=y
CONFIG_MACB=y
# CONFIG_SMC91X is not set
CONFIG_DM9000=y
#
# Ethernet (1000 Mbit)
#
#
# Ethernet (10000 Mbit)
#
#
# Token Ring devices
#
#
# Wireless LAN (non-hamradio)
#
# CONFIG_NET_RADIO is not set
#
# Wan interfaces
#
# CONFIG_WAN is not set
# CONFIG_PPP is not set
# CONFIG_SLIP is not set
# CONFIG_SHAPER is not set
# CONFIG_NETCONSOLE is not set
# CONFIG_NETPOLL is not set
# CONFIG_NET_POLL_CONTROLLER is not set
#
# ISDN subsystem
#
# CONFIG_ISDN is not set
#
# Input device support
#
CONFIG_INPUT=y
# CONFIG_INPUT_FF_MEMLESS is not set
#
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
# CONFIG_INPUT_JOYDEV is not set
# CONFIG_INPUT_TSDEV is not set
# CONFIG_INPUT_EVDEV is not set
# CONFIG_INPUT_EVBUG is not set
#
# Input Device Drivers
#
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_INPUT_JOYSTICK is not set
# CONFIG_INPUT_TOUCHSCREEN is not set
# CONFIG_INPUT_MISC is not set
#
# Hardware I/O ports
#
CONFIG_SERIO=y
CONFIG_SERIO_SERPORT=y
# CONFIG_SERIO_RAW is not set
# CONFIG_GAMEPORT is not set
#
# Character devices
#
CONFIG_VT=y
CONFIG_VT_CONSOLE=y
CONFIG_HW_CONSOLE=y
# CONFIG_VT_HW_CONSOLE_BINDING is not set
# CONFIG_SERIAL_NONSTANDARD is not set
#
# Serial drivers
#
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_NR_UARTS=4
CONFIG_SERIAL_8250_RUNTIME_UARTS=4
CONFIG_SERIAL_8250_EXTENDED=y
# CONFIG_SERIAL_8250_MANY_PORTS is not set
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
#
# Non-8250 serial port support
#
CONFIG_SERIAL_ATMEL=y
CONFIG_SERIAL_ATMEL_CONSOLE=y
# CONFIG_SERIAL_ATMEL_TTYAT is not set
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
CONFIG_UNIX98_PTYS=y
# CONFIG_LEGACY_PTYS is not set
#
# IPMI
#
# CONFIG_IPMI_HANDLER is not set
#
# Watchdog Cards
#
# CONFIG_WATCHDOG is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_NVRAM is not set
# CONFIG_DTLK is not set
# CONFIG_R3964 is not set
# CONFIG_RAW_DRIVER is not set
#
# TPM devices
#
# CONFIG_TCG_TPM is not set
#
# I2C support
#
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
#
# I2C Algorithms
#
# CONFIG_I2C_ALGOBIT is not set
# CONFIG_I2C_ALGOPCF is not set
# CONFIG_I2C_ALGOPCA is not set
#
# I2C Hardware Bus support
#
# CONFIG_I2C_ATMELTWI is not set
CONFIG_I2C_AT91=y
CONFIG_I2C_AT91_CLOCKRATE=100000
# CONFIG_I2C_OCORES is not set
# CONFIG_I2C_PARPORT_LIGHT is not set
# CONFIG_I2C_STUB is not set
# CONFIG_I2C_PCA_ISA is not set
#
# Miscellaneous I2C Chip support
#
# CONFIG_SENSORS_DS1337 is not set
# CONFIG_SENSORS_DS1374 is not set
# CONFIG_SENSORS_EEPROM is not set
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_SENSORS_PCA9539 is not set
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_MAX6875 is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# CONFIG_I2C_DEBUG_CHIP is not set
#
# SPI support
#
CONFIG_SPI=y
# CONFIG_SPI_DEBUG is not set
CONFIG_SPI_MASTER=y
#
# SPI Master Controller Drivers
#
CONFIG_SPI_ATMEL=y
# CONFIG_SPI_BITBANG is not set
#
# SPI Protocol Masters
#
#
# Dallas's 1-wire bus
#
# CONFIG_W1 is not set
#
# Hardware Monitoring support
#
# CONFIG_HWMON is not set
# CONFIG_HWMON_VID is not set
#
# Misc devices
#
# CONFIG_TIFM_CORE is not set
#
# LED devices
#
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
#
# LED drivers
#
#
# LED Triggers
#
CONFIG_LEDS_TRIGGERS=y
CONFIG_LEDS_TRIGGER_TIMER=y
CONFIG_LEDS_TRIGGER_HEARTBEAT=y
#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
#
# Digital Video Broadcasting Devices
#
# CONFIG_DVB is not set
# CONFIG_USB_DABUSB is not set
#
# Graphics support
#
# CONFIG_FIRMWARE_EDID is not set
# CONFIG_FB is not set
#
# Console display driver support
#
# CONFIG_VGA_CONSOLE is not set
CONFIG_DUMMY_CONSOLE=y
# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
#
# Sound
#
# CONFIG_SOUND is not set
#
# HID Devices
#
CONFIG_HID=y
#
# USB support
#
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
# CONFIG_USB_ARCH_HAS_EHCI is not set
CONFIG_USB=y
# CONFIG_USB_DEBUG is not set
#
# Miscellaneous USB options
#
CONFIG_USB_DEVICEFS=y
# CONFIG_USB_BANDWIDTH is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
# CONFIG_USB_OTG is not set
#
# USB Host Controller Drivers
#
# CONFIG_USB_ISP116X_HCD is not set
CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OHCI_BIG_ENDIAN is not set
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
# CONFIG_USB_SL811_HCD is not set
#
# USB Device Class drivers
#
# CONFIG_USB_ACM is not set
# CONFIG_USB_PRINTER is not set
#
# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
#
#
# may also be needed; see USB_STORAGE Help for more information
#
# CONFIG_USB_LIBUSUAL is not set
#
# USB Input Devices
#
CONFIG_USB_HID=y
# CONFIG_USB_HIDINPUT_POWERBOOK is not set
# CONFIG_HID_FF is not set
# CONFIG_USB_HIDDEV is not set
# CONFIG_USB_AIPTEK is not set
# CONFIG_USB_WACOM is not set
# CONFIG_USB_ACECAD is not set
# CONFIG_USB_KBTAB is not set
# CONFIG_USB_POWERMATE is not set
# CONFIG_USB_TOUCHSCREEN is not set
# CONFIG_USB_YEALINK is not set
# CONFIG_USB_XPAD is not set
# CONFIG_USB_ATI_REMOTE is not set
# CONFIG_USB_ATI_REMOTE2 is not set
# CONFIG_USB_KEYSPAN_REMOTE is not set
# CONFIG_USB_APPLETOUCH is not set
#
# USB Imaging devices
#
# CONFIG_USB_MDC800 is not set
#
# USB Network Adapters
#
# CONFIG_USB_CATC is not set
# CONFIG_USB_KAWETH is not set
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_USBNET_MII is not set
# CONFIG_USB_USBNET is not set
# CONFIG_USB_MON is not set
#
# USB port drivers
#
#
# USB Serial Converter support
#
CONFIG_USB_SERIAL=y
# CONFIG_USB_SERIAL_CONSOLE is not set
CONFIG_USB_SERIAL_GENERIC=y
# CONFIG_USB_SERIAL_AIRCABLE is not set
# CONFIG_USB_SERIAL_AIRPRIME is not set
# CONFIG_USB_SERIAL_ARK3116 is not set
# CONFIG_USB_SERIAL_BELKIN is not set
# CONFIG_USB_SERIAL_WHITEHEAT is not set
# CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set
CONFIG_USB_SERIAL_CP2101=y
# CONFIG_USB_SERIAL_CYPRESS_M8 is not set
# CONFIG_USB_SERIAL_EMPEG is not set
# CONFIG_USB_SERIAL_FTDI_SIO is not set
# CONFIG_USB_SERIAL_FUNSOFT is not set
# CONFIG_USB_SERIAL_VISOR is not set
# CONFIG_USB_SERIAL_IPAQ is not set
# CONFIG_USB_SERIAL_IR is not set
# CONFIG_USB_SERIAL_EDGEPORT is not set
# CONFIG_USB_SERIAL_EDGEPORT_TI is not set
# CONFIG_USB_SERIAL_GARMIN is not set
# CONFIG_USB_SERIAL_IPW is not set
# CONFIG_USB_SERIAL_KEYSPAN_PDA is not set
# CONFIG_USB_SERIAL_KEYSPAN is not set
# CONFIG_USB_SERIAL_KLSI is not set
# CONFIG_USB_SERIAL_KOBIL_SCT is not set
# CONFIG_USB_SERIAL_MCT_U232 is not set
# CONFIG_USB_SERIAL_MOS7720 is not set
# CONFIG_USB_SERIAL_MOS7840 is not set
# CONFIG_USB_SERIAL_NAVMAN is not set
# CONFIG_USB_SERIAL_PL2303 is not set
# CONFIG_USB_SERIAL_HP4X is not set
# CONFIG_USB_SERIAL_SAFE is not set
# CONFIG_USB_SERIAL_SIERRAWIRELESS is not set
# CONFIG_USB_SERIAL_TI is not set
# CONFIG_USB_SERIAL_CYBERJACK is not set
# CONFIG_USB_SERIAL_XIRCOM is not set
# CONFIG_USB_SERIAL_OPTION is not set
# CONFIG_USB_SERIAL_OMNINET is not set
# CONFIG_USB_SERIAL_DEBUG is not set
#
# USB Miscellaneous drivers
#
# CONFIG_USB_EMI62 is not set
# CONFIG_USB_EMI26 is not set
# CONFIG_USB_ADUTUX is not set
# CONFIG_USB_AUERSWALD is not set
# CONFIG_USB_RIO500 is not set
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
# CONFIG_USB_LED is not set
# CONFIG_USB_CYPRESS_CY7C63 is not set
# CONFIG_USB_CYTHERM is not set
# CONFIG_USB_PHIDGET is not set
# CONFIG_USB_IDMOUSE is not set
# CONFIG_USB_FTDI_ELAN is not set
# CONFIG_USB_APPLEDISPLAY is not set
# CONFIG_USB_LD is not set
# CONFIG_USB_TRANCEVIBRATOR is not set
# CONFIG_USB_TEST is not set
#
# USB DSL modem support
#
#
# USB Gadget Support
#
CONFIG_USB_GADGET=y
# CONFIG_USB_GADGET_DEBUG_FILES is not set
CONFIG_USB_GADGET_SELECTED=y
# CONFIG_USB_GADGET_NET2280 is not set
# CONFIG_USB_GADGET_PXA2XX is not set
# CONFIG_USB_GADGET_GOKU is not set
# CONFIG_USB_GADGET_LH7A40X is not set
# CONFIG_USB_GADGET_HUSB2DEV is not set
# CONFIG_USB_GADGET_OMAP is not set
CONFIG_USB_GADGET_AT91=y
CONFIG_USB_AT91=y
# CONFIG_USB_GADGET_DUMMY_HCD is not set
# CONFIG_USB_GADGET_DUALSPEED is not set
# CONFIG_USB_ZERO is not set
CONFIG_USB_ETH=y
# CONFIG_USB_ETH_RNDIS is not set
# CONFIG_USB_GADGETFS is not set
# CONFIG_USB_FILE_STORAGE is not set
# CONFIG_USB_G_SERIAL is not set
# CONFIG_USB_MIDI_GADGET is not set
#
# MMC/SD Card support
#
CONFIG_MMC=y
# CONFIG_MMC_DEBUG is not set
CONFIG_MMC_BLOCK=y
CONFIG_MMC_AT91=y
# CONFIG_MMC_TIFM_SD is not set
#
# Real Time Clock
#
CONFIG_RTC_LIB=y
# CONFIG_RTC_CLASS is not set
#
# File systems
#
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
# CONFIG_EXT2_FS_XIP is not set
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_XATTR=y
# CONFIG_EXT3_FS_POSIX_ACL is not set
# CONFIG_EXT3_FS_SECURITY is not set
# CONFIG_EXT4DEV_FS is not set
CONFIG_JBD=y
# CONFIG_JBD_DEBUG is not set
CONFIG_FS_MBCACHE=y
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
CONFIG_FS_POSIX_ACL=y
# CONFIG_XFS_FS is not set
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_ROMFS_FS is not set
# CONFIG_INOTIFY is not set
# CONFIG_QUOTA is not set
CONFIG_DNOTIFY=y
# CONFIG_AUTOFS_FS is not set
# CONFIG_AUTOFS4_FS is not set
# CONFIG_FUSE_FS is not set
#
# CD-ROM/DVD Filesystems
#
# CONFIG_ISO9660_FS is not set
# CONFIG_UDF_FS is not set
#
# DOS/FAT/NT Filesystems
#
# CONFIG_MSDOS_FS is not set
# CONFIG_VFAT_FS is not set
# CONFIG_NTFS_FS is not set
#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
CONFIG_PROC_SYSCTL=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
# CONFIG_TMPFS_POSIX_ACL is not set
# CONFIG_HUGETLB_PAGE is not set
CONFIG_RAMFS=y
CONFIG_CONFIGFS_FS=y
#
# Miscellaneous filesystems
#
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_HFS_FS is not set
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
CONFIG_JFFS2_FS=y
CONFIG_JFFS2_FS_DEBUG=0
CONFIG_JFFS2_FS_WRITEBUFFER=y
# CONFIG_JFFS2_SUMMARY is not set
# CONFIG_JFFS2_FS_XATTR is not set
# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
CONFIG_JFFS2_ZLIB=y
CONFIG_JFFS2_RTIME=y
# CONFIG_JFFS2_RUBIN is not set
CONFIG_CRAMFS=y
# CONFIG_VXFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
#
# Network File Systems
#
# CONFIG_NFS_FS is not set
# CONFIG_NFSD is not set
# CONFIG_SMB_FS is not set
# CONFIG_CIFS is not set
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
# CONFIG_9P_FS is not set
#
# Partition Types
#
# CONFIG_PARTITION_ADVANCED is not set
CONFIG_MSDOS_PARTITION=y
#
# Native Language Support
#
# CONFIG_NLS is not set
#
# Distributed Lock Manager
#
# CONFIG_DLM is not set
#
# Profiling support
#
# CONFIG_PROFILING is not set
#
# Kernel hacking
#
# CONFIG_PRINTK_TIME is not set
CONFIG_ENABLE_MUST_CHECK=y
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
# CONFIG_DEBUG_FS is not set
# CONFIG_HEADERS_CHECK is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_LOG_BUF_SHIFT=17
CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_SCHEDSTATS is not set
# CONFIG_DEBUG_SLAB is not set
# CONFIG_DEBUG_RT_MUTEXES is not set
# CONFIG_RT_MUTEX_TESTER is not set
CONFIG_DEBUG_SPINLOCK=y
# CONFIG_DEBUG_MUTEXES is not set
# CONFIG_DEBUG_RWSEMS is not set
CONFIG_DEBUG_SPINLOCK_SLEEP=y
# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
# CONFIG_DEBUG_KOBJECT is not set
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
CONFIG_FRAME_POINTER=y
CONFIG_FORCED_INLINING=y
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_DEBUG_USER is not set
# CONFIG_DEBUG_ERRORS is not set
# CONFIG_DEBUG_LL is not set
#
# Security options
#
# CONFIG_KEYS is not set
# CONFIG_SECURITY is not set
#
# Cryptographic options
#
CONFIG_CRYPTO=y
CONFIG_CRYPTO_ALGAPI=y
CONFIG_CRYPTO_HASH=y
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_XCBC is not set
# CONFIG_CRYPTO_NULL is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_SHA1 is not set
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_WP512 is not set
# CONFIG_CRYPTO_TGR192 is not set
# CONFIG_CRYPTO_GF128MUL is not set
# CONFIG_CRYPTO_ECB is not set
# CONFIG_CRYPTO_CBC is not set
# CONFIG_CRYPTO_LRW is not set
# CONFIG_CRYPTO_DES is not set
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_TWOFISH is not set
# CONFIG_CRYPTO_SERPENT is not set
# CONFIG_CRYPTO_AES is not set
# CONFIG_CRYPTO_CAST5 is not set
# CONFIG_CRYPTO_CAST6 is not set
# CONFIG_CRYPTO_TEA is not set
# CONFIG_CRYPTO_ARC4 is not set
# CONFIG_CRYPTO_KHAZAD is not set
# CONFIG_CRYPTO_ANUBIS is not set
# CONFIG_CRYPTO_DEFLATE is not set
# CONFIG_CRYPTO_MICHAEL_MIC is not set
# CONFIG_CRYPTO_CRC32C is not set
# CONFIG_CRYPTO_TEST is not set
#
# Hardware crypto devices
#
#
# Library routines
#
CONFIG_BITREVERSE=y
CONFIG_CRC_CCITT=y
CONFIG_CRC16=y
CONFIG_CRC32=y
CONFIG_LIBCRC32C=y
CONFIG_ZLIB_INFLATE=y
CONFIG_ZLIB_DEFLATE=y
CONFIG_PLIST=y
CONFIG_IOMAP_COPY=y
--
Ryan Ordway E-mail: rordway@oregonstate.edu
Unix Systems Administrator rordway@library.oregonstate.edu
OSU Libraries, Corvallis, OR 97370 Office: Valley Library #4657
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-04 19:15 ` Davide Libenzi
2007-05-04 19:20 ` 2.6.20.4 / 2.6.21.1 AT91SAM9260-EK oops Ryan Ordway
@ 2007-05-04 23:38 ` Ulrich Drepper
2007-05-05 18:54 ` Davide Libenzi
1 sibling, 1 reply; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-04 23:38 UTC (permalink / raw)
To: Davide Libenzi
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
On 5/4/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> This is a pretty specific case, that is not very typical to find in the
> usual common event loop dispatch application design.
This is where you are very wrong. Yes, it's rare in the Unix world
because non-trivial programs cannot implement this in most cases with
the available infrastructure. But it is very common in other places
and what is more, it makes a lot of sense. It gives you scalability
with the size of the machines at no cost associated to reorganizing
the program.
> And if you *really* want your truly generic WaitForMultipleObjects
> implementation, your only way is to base it on files. Files are our almost
> perfect match to HANDLEs in our world. We have the basic infrastructure
> already there.
"basic", but not complete. And I never said that the implementation
thye have is perfect, far from it. The concept is good and if we now
can implement it, with all the event sources available, using an
efficient event delivery mechanism we are far ahead of their design.
The proposal now on the table doesn't bring us there all the way and
it has the potential to make future work in the area of event delivery
harder just because there is more legacy code to be kept happy. This
is why I propose to not consider these changes and instead go for the
gold, i.e., the full solution.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-04 23:38 ` [patch 14/22] pollfs: pollable futex Ulrich Drepper
@ 2007-05-05 18:54 ` Davide Libenzi
2007-05-06 7:50 ` Ulrich Drepper
0 siblings, 1 reply; 71+ messages in thread
From: Davide Libenzi @ 2007-05-05 18:54 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
On Fri, 4 May 2007, Ulrich Drepper wrote:
> On 5/4/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> > This is a pretty specific case, that is not very typical to find in the
> > usual common event loop dispatch application design.
>
> This is where you are very wrong. Yes, it's rare in the Unix world
> because non-trivial programs cannot implement this in most cases with
> the available infrastructure. But it is very common in other places
> and what is more, it makes a lot of sense. It gives you scalability
> with the size of the machines at no cost associated to reorganizing
> the program.
But we have our own *sane* version of WaitForMultipleObjects, and it's
called poll(2).
> > And if you *really* want your truly generic WaitForMultipleObjects
> > implementation, your only way is to base it on files. Files are our almost
> > perfect match to HANDLEs in our world. We have the basic infrastructure
> > already there.
>
> "basic", but not complete. And I never said that the implementation
> thye have is perfect, far from it. The concept is good and if we now
> can implement it, with all the event sources available, using an
> efficient event delivery mechanism we are far ahead of their design.
>
> The proposal now on the table doesn't bring us there all the way and
> it has the potential to make future work in the area of event delivery
> harder just because there is more legacy code to be kept happy. This
> is why I propose to not consider these changes and instead go for the
> gold, i.e., the full solution.
So, on one side we have a proposal made by a set of new modular objects
that fits our own infrastructure (internal - kernel, and external - POSIX)
and that are not bound to a specific interface.
On the other side we have a completely new, monolitic interface, whose
objects are strictly bound to it and are not usable if not only inside the
interface itself.
Now, considering that POSIX is the backbone of Linux (and *nix in
general), and considering that we certainly cannot drop existing POSIX
semantics, where the lagacy code will come from?
I really do not understand your point. You're too smart to not appreciate
the beauty and the simmetry of objects that responds to a common interface
(our files, win32 handles), and that fits our existing kernel infrastructure.
- Davide
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-05 18:54 ` Davide Libenzi
@ 2007-05-06 7:50 ` Ulrich Drepper
2007-05-06 19:47 ` Davide Libenzi
2007-05-06 19:54 ` Andrew Morton
0 siblings, 2 replies; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-06 7:50 UTC (permalink / raw)
To: Davide Libenzi
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
On 5/5/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> But we have our own *sane* version of WaitForMultipleObjects, and it's
> called poll(2).
No, we don't. Don't start all over again. The interface of poll it
to primitive. See the kevent code, each record is, IIRC, 16 bytes in
size to return more data. For poll you only have bits.
> Now, considering that POSIX is the backbone of Linux (and *nix in
> general), and considering that we certainly cannot drop existing POSIX
> semantics, where the lagacy code will come from?
The legacy part comes from all this extra "make into a file
descriptor" stuff which is new, not needed now and especially not when
a full solution is available.
> I really do not understand your point. You're too smart to not appreciate
> the beauty and the simmetry of objects that responds to a common interface
> (our files, win32 handles), and that fits our existing kernel infrastructure.
You're blinded by this symmetry. Not everything that looks like a
good fit is a good idea. This is one case. Get over it, poll is not
powerful enough to serve as the unifying event mechanism.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-06 7:50 ` Ulrich Drepper
@ 2007-05-06 19:47 ` Davide Libenzi
2007-05-06 19:54 ` Andrew Morton
1 sibling, 0 replies; 71+ messages in thread
From: Davide Libenzi @ 2007-05-06 19:47 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Davi Arnaut, Eric Dumazet, Andrew Morton, Linus Torvalds,
Linux Kernel Mailing List
On Sun, 6 May 2007, Ulrich Drepper wrote:
> On 5/5/07, Davide Libenzi <davidel@xmailserver.org> wrote:
> > But we have our own *sane* version of WaitForMultipleObjects, and it's
> > called poll(2).
>
> No, we don't. Don't start all over again. The interface of poll it
> to primitive. See the kevent code, each record is, IIRC, 16 bytes in
> size to return more data. For poll you only have bits.
Yes, event bits plus opaque token are enough for most of it. Then you use
POSIX read/write to fetch/store the data. All the files (sockets, pipes, ...)
works this way. Signals you fetch a siginfo-like structure, through POSIX
read. Timers, you fetch a counter, through POSIX read. AIO, you use the
native AIO API (that I'd prefer, or you can choose to have a POSIX read
too). All these through isolated POSIX read semantics.
Now let's see how it'd look with a monolitic kevent-like interface. You'll
have a mosnter-union ala siginfo_t, with multiple nested structures, and
every time you need to extend it, you'll go through pain. Come on, that's
beyond ugly. With a file-like interface, each new addition comes to a
seaparate isolated interface, with separate POSIX read/write ABI.
Do you realise that to justify your all new bulk interface,
you had to pull out of the hat a Windows WaitForMultipleObjects?
Please drop the BS. I made you a full list of things that are readily and
POSIX-friendly handled/signaled with file-like interfaces.
Any sockets, pipes, all devices, signals, timers, AIO, and I'm probably
forgetting something.
You pulled "it's slow". False.
You pulled "it's memory expensive". It's not.
So far, I did not hear a single valid reason to go with a new, monolitic
interface. WaitForMultipleObjects? Please ...
- Davide
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-06 7:50 ` Ulrich Drepper
2007-05-06 19:47 ` Davide Libenzi
@ 2007-05-06 19:54 ` Andrew Morton
2007-05-06 20:18 ` Davide Libenzi
` (3 more replies)
1 sibling, 4 replies; 71+ messages in thread
From: Andrew Morton @ 2007-05-06 19:54 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Davide Libenzi, Davi Arnaut, Eric Dumazet, Linus Torvalds,
Linux Kernel Mailing List
On Sun, 6 May 2007 00:50:47 -0700 "Ulrich Drepper" <drepper@gmail.com> wrote:
> > I really do not understand your point. You're too smart to not appreciate
> > the beauty and the simmetry of objects that responds to a common interface
> > (our files, win32 handles), and that fits our existing kernel infrastructure.
>
> You're blinded by this symmetry. Not everything that looks like a
> good fit is a good idea. This is one case. Get over it, poll is not
> powerful enough to serve as the unifying event mechanism.
What is your position on the timerfd/signalfd/etc patches?
Seems to me that if we were to have fancy new event-delivery machinery
like kevent then the timerfd/signalfd work is heading in the other
direction and ultimately would prove to have been unneeded?
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-06 19:54 ` Andrew Morton
@ 2007-05-06 20:18 ` Davide Libenzi
2007-05-06 21:57 ` Davi Arnaut
` (2 subsequent siblings)
3 siblings, 0 replies; 71+ messages in thread
From: Davide Libenzi @ 2007-05-06 20:18 UTC (permalink / raw)
To: Andrew Morton
Cc: Ulrich Drepper, Davi Arnaut, Eric Dumazet, Linus Torvalds,
Linux Kernel Mailing List
On Sun, 6 May 2007, Andrew Morton wrote:
> On Sun, 6 May 2007 00:50:47 -0700 "Ulrich Drepper" <drepper@gmail.com> wrote:
>
> > > I really do not understand your point. You're too smart to not appreciate
> > > the beauty and the simmetry of objects that responds to a common interface
> > > (our files, win32 handles), and that fits our existing kernel infrastructure.
> >
> > You're blinded by this symmetry. Not everything that looks like a
> > good fit is a good idea. This is one case. Get over it, poll is not
> > powerful enough to serve as the unifying event mechanism.
>
> What is your position on the timerfd/signalfd/etc patches?
>
> Seems to me that if we were to have fancy new event-delivery machinery
> like kevent then the timerfd/signalfd work is heading in the other
> direction and ultimately would prove to have been unneeded?
Yes, of course. If we're heading to yet-another monolitic interface, we're
heading with no valid reasons given if other than some handwaving. While
there are quite a few (modularity, compatibilty, plus the other ones that
came in my mind and that I explained in the way-too-many emails) to back a
file-based approach.
Conversation with Uli, as often happen when arguing about software, got
stuck. And since noone else seems interested in bringing valid points in
one way or another, I'll leave the discussion as is, and I'll let you sort
it out.
- Davide
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-06 19:54 ` Andrew Morton
2007-05-06 20:18 ` Davide Libenzi
@ 2007-05-06 21:57 ` Davi Arnaut
2007-05-07 5:33 ` Ulrich Drepper
2007-05-07 5:46 ` Ulrich Drepper
3 siblings, 0 replies; 71+ messages in thread
From: Davi Arnaut @ 2007-05-06 21:57 UTC (permalink / raw)
To: Andrew Morton
Cc: Ulrich Drepper, Davide Libenzi, Eric Dumazet, Linus Torvalds,
Linux Kernel Mailing List
Andrew Morton wrote:
> On Sun, 6 May 2007 00:50:47 -0700 "Ulrich Drepper" <drepper@gmail.com> wrote:
>
>>> I really do not understand your point. You're too smart to not appreciate
>>> the beauty and the simmetry of objects that responds to a common interface
>>> (our files, win32 handles), and that fits our existing kernel infrastructure.
>> You're blinded by this symmetry. Not everything that looks like a
>> good fit is a good idea. This is one case. Get over it, poll is not
>> powerful enough to serve as the unifying event mechanism.
>
> What is your position on the timerfd/signalfd/etc patches?
>
> Seems to me that if we were to have fancy new event-delivery machinery
> like kevent then the timerfd/signalfd work is heading in the other
> direction and ultimately would prove to have been unneeded?
IMHO, I thought we had already gone down the *fd road with inotify,
posix message queue, and _hundred_ others file objects with poll methods.
I also think that inotify+(e)poll proves how well the fd/epoll model
fits together, scales, and that a new fancy event-delivery machinery is
not necessary. And it makes me wonder why I hadn't followed its "watch"
approach for futexes:
futex_init(); // Davide's anon fd
futex_add_watch(int fd, void *addr, int val, uint32_t mask);
futex_rm_watch(int fd, uint32_t wd);
Anyway, this unifying event machinery can be built, if needed, in user
space by libevent and others.
--
Davi Arnaut
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-06 19:54 ` Andrew Morton
2007-05-06 20:18 ` Davide Libenzi
2007-05-06 21:57 ` Davi Arnaut
@ 2007-05-07 5:33 ` Ulrich Drepper
2007-05-07 5:46 ` Ulrich Drepper
3 siblings, 0 replies; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-07 5:33 UTC (permalink / raw)
To: Andrew Morton
Cc: Davide Libenzi, Davi Arnaut, Eric Dumazet, Linus Torvalds,
Linux Kernel Mailing List
On 5/6/07, Andrew Morton <akpm@linux-foundation.org> wrote:
> What is your position on the timerfd/signalfd/etc patches?
>
> Seems to me that if we were to have fancy new event-delivery machinery
> like kevent then the timerfd/signalfd work is heading in the other
> direction and ultimately would prove to have been unneeded?
That's my point. I think we ultimately have to have something like
kevent and then all this *fd() work is unnecessary and just adds code
to the kernel which has to be kept around and which might hinder
further work in this area.
^ permalink raw reply [flat|nested] 71+ messages in thread
* Re: [patch 14/22] pollfs: pollable futex
2007-05-06 19:54 ` Andrew Morton
` (2 preceding siblings ...)
2007-05-07 5:33 ` Ulrich Drepper
@ 2007-05-07 5:46 ` Ulrich Drepper
3 siblings, 0 replies; 71+ messages in thread
From: Ulrich Drepper @ 2007-05-07 5:46 UTC (permalink / raw)
To: Andrew Morton
Cc: Davide Libenzi, Davi Arnaut, Eric Dumazet, Linus Torvalds,
Linux Kernel Mailing List, David S. Miller
On 5/6/07, Andrew Morton <akpm@linux-foundation.org> wrote:
> What is your position on the timerfd/signalfd/etc patches?
One more thing: recently in a network-related discussion with DaveM
et.al. we came across a situation where we want events from the
kernel. The requirement is for fast event notification (or
non-blocking polling) and the event is only implicitly needed. Having
a file descriptor open is not an option. The possibilities are
- open a file in /proc or /sys or a socket for every call to te
function to check for events
- have a memory-mapped interface like kevent which does not keep file
descriptors open
File descriptions are problematic when it comes implicitly uses in the
runtime. This is, for instance, why we have MAP_ANON instead of
keeping a /dev/null file descriptor open all the time.
^ permalink raw reply [flat|nested] 71+ messages in thread
end of thread, other threads:[~2007-05-07 5:46 UTC | newest]
Thread overview: 71+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-05-02 5:22 [patch 00/22] pollfs: filesystem abstraction for pollable objects Davi Arnaut
2007-05-02 5:22 ` [patch 01/22] pollfs: kernel-side API header Davi Arnaut
2007-05-02 5:22 ` [patch 02/22] pollfs: file system operations Davi Arnaut
2007-05-02 5:22 ` [patch 03/22] pollfs: asynchronously wait for a signal Davi Arnaut
2007-05-02 5:22 ` [patch 04/22] pollfs: pollable signal Davi Arnaut
2007-05-02 5:22 ` [patch 05/22] pollfs: pollable signal compat code Davi Arnaut
2007-05-02 5:22 ` [patch 06/22] pollfs: export the plsignal system call Davi Arnaut
2007-05-02 5:22 ` [patch 07/22] pollfs: x86, wire up " Davi Arnaut
2007-05-02 5:22 ` [patch 08/22] pollfs: x86_64, " Davi Arnaut
2007-05-02 5:22 ` [patch 09/22] pollfs: pollable hrtimers Davi Arnaut
2007-05-02 21:16 ` Thomas Gleixner
2007-05-02 23:00 ` Davi Arnaut
2007-05-02 5:22 ` [patch 10/22] pollfs: export the pltimer system call Davi Arnaut
2007-05-02 5:22 ` [patch 11/22] pollfs: x86, wire up " Davi Arnaut
2007-05-02 5:22 ` [patch 12/22] pollfs: x86_64, " Davi Arnaut
2007-05-02 5:22 ` [patch 13/22] pollfs: asynchronous futex wait Davi Arnaut
2007-05-02 5:22 ` [patch 14/22] pollfs: pollable futex Davi Arnaut
2007-05-02 5:54 ` Eric Dumazet
2007-05-02 6:16 ` Davi Arnaut
2007-05-02 6:39 ` Eric Dumazet
2007-05-02 6:54 ` Davi Arnaut
2007-05-02 7:11 ` Davi Arnaut
2007-05-02 7:40 ` Ulrich Drepper
2007-05-02 7:55 ` Eric Dumazet
2007-05-02 8:08 ` Ulrich Drepper
2007-05-02 8:49 ` Eric Dumazet
2007-05-02 16:39 ` Ulrich Drepper
2007-05-02 16:59 ` Davi Arnaut
2007-05-02 17:10 ` Ulrich Drepper
2007-05-02 17:29 ` Davide Libenzi
2007-05-02 17:53 ` Ulrich Drepper
2007-05-02 18:21 ` Davide Libenzi
2007-05-03 13:46 ` Ulrich Drepper
2007-05-03 18:24 ` Davide Libenzi
2007-05-03 19:03 ` Ulrich Drepper
2007-05-03 22:14 ` Davide Libenzi
2007-05-04 15:28 ` Ulrich Drepper
2007-05-04 19:15 ` Davide Libenzi
2007-05-04 19:20 ` 2.6.20.4 / 2.6.21.1 AT91SAM9260-EK oops Ryan Ordway
2007-05-04 23:38 ` [patch 14/22] pollfs: pollable futex Ulrich Drepper
2007-05-05 18:54 ` Davide Libenzi
2007-05-06 7:50 ` Ulrich Drepper
2007-05-06 19:47 ` Davide Libenzi
2007-05-06 19:54 ` Andrew Morton
2007-05-06 20:18 ` Davide Libenzi
2007-05-06 21:57 ` Davi Arnaut
2007-05-07 5:33 ` Ulrich Drepper
2007-05-07 5:46 ` Ulrich Drepper
2007-05-02 17:37 ` Davi Arnaut
2007-05-02 17:49 ` Ulrich Drepper
2007-05-02 18:05 ` Davi Arnaut
2007-05-03 13:40 ` Ulrich Drepper
2007-05-02 12:20 ` Davi Arnaut
2007-05-02 12:39 ` Davi Arnaut
2007-05-02 16:46 ` Ulrich Drepper
2007-05-02 17:05 ` Davi Arnaut
2007-05-02 5:22 ` [patch 15/22] pollfs: export the plfutex system call Davi Arnaut
2007-05-02 5:22 ` [patch 16/22] pollfs: x86, wire up " Davi Arnaut
2007-05-02 5:22 ` [patch 17/22] pollfs: x86_64, " Davi Arnaut
2007-05-02 5:22 ` [patch 18/22] pollfs: check if a AIO event ring is empty Davi Arnaut
2007-05-02 5:22 ` [patch 19/22] pollfs: pollable aio Davi Arnaut
2007-05-02 5:22 ` [patch 20/22] pollfs: export the plaio system call Davi Arnaut
2007-05-02 5:22 ` [patch 21/22] pollfs: x86, wire up " Davi Arnaut
2007-05-02 5:22 ` [patch 22/22] pollfs: x86_64, " Davi Arnaut
2007-05-02 6:05 ` [patch 00/22] pollfs: filesystem abstraction for pollable objects Andrew Morton
2007-05-02 17:28 ` Davide Libenzi
2007-05-02 17:47 ` Davi Arnaut
2007-05-02 18:23 ` Davide Libenzi
2007-05-02 18:50 ` Davi Arnaut
2007-05-02 19:42 ` Davide Libenzi
2007-05-02 20:11 ` Davi Arnaut
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).