* [patch 01/12] pollfs: kernel-side API header
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
@ 2007-04-01 15:58 ` davi
2007-04-01 15:58 ` [patch 02/12] pollfs: file system operations davi
` (10 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton
[-- Attachment #1: pollfs-api.patch --]
[-- Type: text/plain, Size: 1787 bytes --]
Add pollfs_fs.h header which contains the kernel-side declarations
and auxiliary macros for type safety checks.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
Index: linux-2.6/include/linux/pollfs_fs.h
===================================================================
--- /dev/null
+++ linux-2.6/include/linux/pollfs_fs.h
@@ -0,0 +1,57 @@
+/*
+ * pollfs, a naive filesystem for pollable (waitable) files (objects)
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ */
+
+#ifndef _LINUX_POLL_FS_H
+#define _LINUX_POLL_FS_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+
+#define PFS_CHECK_CALLBACK_1(f, a) (void*) \
+ (sizeof((f)((typeof(a *))0)))
+
+#define PFS_CHECK_CALLBACK_2(f, a, b) (void*) \
+ (sizeof((f)((typeof(a *))0, (typeof(b*))0)))
+
+#define PFS_WRITE(func, type, utype) \
+ (ssize_t (*)(void *, const void __user *)) \
+ (0 ? PFS_CHECK_CALLBACK_2(func, type, utype) : func)
+
+#define PFS_READ(func, type, utype) \
+ (ssize_t (*)(void *, void __user *)) \
+ (0 ? PFS_CHECK_CALLBACK_2(func, type, utype) : func)
+
+#define PFS_POLL(func, type) \
+ (int (*)(void *))(0 ? PFS_CHECK_CALLBACK_1(func, type) : func)
+
+#define PFS_RELEASE(func, type) \
+ (int (*)(void *))(0 ? PFS_CHECK_CALLBACK_1(func, type) : func)
+
+struct pfs_operations {
+ ssize_t (*read)(void *, void __user *);
+ ssize_t (*write)(void *, const void __user *);
+ int (*mmap)(void *, struct vm_area_struct *);
+ int (*poll)(void *);
+ int (*release)(void *);
+ size_t rsize;
+ size_t wsize;
+};
+
+struct pfs_file {
+ void *data;
+ wait_queue_head_t *wait;
+ const struct pfs_operations *fops;
+};
+
+long pfs_open(struct pfs_file *pfs);
+
+#endif /* __KERNEL __ */
+
+#endif /* _LINUX_POLLFS_FS_H */
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* [patch 02/12] pollfs: file system operations
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
2007-04-01 15:58 ` [patch 01/12] pollfs: kernel-side API header davi
@ 2007-04-01 15:58 ` davi
2007-04-01 15:58 ` [patch 03/12] pollfs: asynchronously wait for a signal davi
` (9 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton
[-- Attachment #1: pollfs-core.patch --]
[-- Type: text/plain, Size: 6437 bytes --]
The key feature of the pollfs file operations is to internally handle
pollable (waitable) resources as files without exporting complex and
bug-prone underlying (VFS) implementation details.
All resource handlers are required to implement the read, write, poll,
release operations and must not block.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
Index: linux-2.6/fs/pollfs/file.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/file.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <asm/uaccess.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/pollfs_fs.h>
+
+#define POLLFS_MAGIC 0x9a6afcd
+
+MODULE_LICENSE("GPL");
+
+/* pollfs vfsmount entry */
+static struct vfsmount *pfs_mnt;
+
+/* pollfs file operations */
+static const struct file_operations pfs_fops;
+
+static inline ssize_t
+pfs_read_nonblock(const struct pfs_operations *fops, void *data,
+ void __user *obj, size_t nr)
+{
+ ssize_t count = 0, res = 0;
+
+ do {
+ res = fops->read(data, obj);
+ if (res)
+ break;
+ count++;
+ obj += fops->rsize;
+ } while (--nr);
+
+ if (count)
+ return count * fops->rsize;
+ else if (res)
+ return res;
+ else
+ return -EAGAIN;
+}
+
+static inline ssize_t
+pfs_read_block(const struct pfs_operations *fops, void *data,
+ wait_queue_head_t *wait, void __user *obj, size_t nr)
+{
+ ssize_t count;
+
+ do {
+ count = pfs_read_nonblock(fops, data, obj, nr);
+ if (count != -EAGAIN)
+ break;
+ count = wait_event_interruptible((*wait), fops->poll(data));
+ } while (!count);
+
+ return count;
+}
+
+static ssize_t pfs_read(struct file *filp, char __user * buf,
+ size_t count, loff_t * pos)
+{
+ size_t nevents = count;
+ struct pfs_file *pfs = filp->private_data;
+ const struct pfs_operations *fops = pfs->fops;
+
+ if (fops->rsize)
+ nevents /= fops->rsize;
+ else
+ nevents = 1;
+
+ if (!nevents)
+ return -EINVAL;
+
+ if (filp->f_flags & O_NONBLOCK)
+ return pfs_read_nonblock(fops, pfs->data, buf, nevents);
+ else
+ return pfs_read_block(fops, pfs->data, pfs->wait, buf, nevents);
+}
+
+static ssize_t pfs_write(struct file *filp, const char __user * buf,
+ size_t count, loff_t * ppos)
+{
+ ssize_t res = 0;
+ size_t nevents = count;
+ struct pfs_file *pfs = filp->private_data;
+ const struct pfs_operations *fops = pfs->fops;
+
+ if (fops->wsize)
+ nevents /= fops->wsize;
+ else
+ nevents = 1;
+
+ if (!nevents)
+ return -EINVAL;
+
+ count = 0;
+
+ do {
+ res = fops->write(pfs->data, buf);
+ if (res)
+ break;
+ count++;
+ buf += fops->wsize;
+ } while (--nevents);
+
+ if (count)
+ return count * fops->wsize;
+ else if (res)
+ return res;
+ else
+ return 0;
+}
+
+static unsigned int pfs_poll(struct file *filp, struct poll_table_struct *wait)
+{
+ int ret = 0;
+ struct pfs_file *pfs = filp->private_data;
+
+ poll_wait(filp, pfs->wait, wait);
+
+ if (pfs->fops->poll)
+ ret = pfs->fops->poll(pfs->data);
+ else
+ ret = POLLIN;
+
+ return ret;
+}
+
+static int pfs_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct pfs_file *pfs = filp->private_data;
+
+ return (pfs->fops->mmap) ? pfs->fops->mmap(pfs->data, vma) : -ENODEV;
+}
+
+static int pfs_release(struct inode *inode, struct file *filp)
+{
+ struct pfs_file *pfs = filp->private_data;
+
+ return pfs->fops->release(pfs->data);
+}
+
+static const struct file_operations pfs_fops = {
+ .poll = pfs_poll,
+ .mmap = pfs_mmap,
+ .read = pfs_read,
+ .write = pfs_write,
+ .release = pfs_release
+};
+
+long pfs_open(struct pfs_file *pfs)
+{
+ int fd;
+ struct file *filp;
+ const struct pfs_operations *fops = pfs->fops;
+
+ if (IS_ERR(pfs_mnt))
+ return -ENOSYS;
+
+ if (!fops->poll || (!fops->read || !fops->write))
+ return -EINVAL;
+
+ fd = get_unused_fd();
+ if (fd < 0)
+ return -ENFILE;
+
+ filp = get_empty_filp();
+ if (!filp) {
+ put_unused_fd(fd);
+ return -ENFILE;
+ }
+
+ filp->f_op = &pfs_fops;
+ filp->f_path.mnt = mntget(pfs_mnt);
+ filp->f_path.dentry = dget(pfs_mnt->mnt_root);
+ filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
+ filp->f_mode = 0;
+ filp->f_flags = 0;
+ filp->private_data = pfs;
+
+ if (fops->read) {
+ filp->f_flags = O_RDONLY;
+ filp->f_mode |= FMODE_READ;
+ }
+
+ if (fops->write) {
+ filp->f_flags = O_WRONLY;
+ filp->f_mode |= FMODE_WRITE;
+ }
+
+ if (fops->write && fops->read)
+ filp->f_flags = O_RDWR;
+
+ fd_install(fd, filp);
+
+ return fd;
+}
+
+EXPORT_SYMBOL(pfs_open);
+
+static int pfs_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data, struct vfsmount *mnt)
+{
+ return get_sb_pseudo(fs_type, "pollfs", NULL, POLLFS_MAGIC, mnt);
+}
+
+static struct file_system_type pollfs_type = {
+ .name = "pollfs",
+ .get_sb = pfs_get_sb,
+ .kill_sb = kill_anon_super
+};
+
+static int __init pollfs_init(void)
+{
+ int ret;
+
+ ret = register_filesystem(&pollfs_type);
+ if (ret)
+ return ret;
+
+ pfs_mnt = kern_mount(&pollfs_type);
+ if (IS_ERR(pfs_mnt)) {
+ ret = PTR_ERR(pfs_mnt);
+ unregister_filesystem(&pollfs_type);
+ }
+
+ return ret;
+}
+
+__initcall(pollfs_init);
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -463,6 +463,12 @@ config EPOLL
Disabling this option will cause the kernel to be built without
support for epoll family of system calls.
+config POLLFS
+ bool "Enable pollfs support" if EMBEDDED
+ default y
+ help
+ Pollfs support
+
config SHMEM
bool "Use full shmem filesystem" if EMBEDDED
default y
Index: linux-2.6/fs/Makefile
===================================================================
--- linux-2.6.orig/fs/Makefile
+++ linux-2.6/fs/Makefile
@@ -114,3 +114,4 @@ obj-$(CONFIG_HPPFS) += hppfs/
obj-$(CONFIG_DEBUG_FS) += debugfs/
obj-$(CONFIG_OCFS2_FS) += ocfs2/
obj-$(CONFIG_GFS2_FS) += gfs2/
+obj-$(CONFIG_POLLFS) += pollfs/
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_POLLFS) += pollfs.o
+pollfs-y := file.o
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* [patch 03/12] pollfs: asynchronously wait for a signal
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
2007-04-01 15:58 ` [patch 01/12] pollfs: kernel-side API header davi
2007-04-01 15:58 ` [patch 02/12] pollfs: file system operations davi
@ 2007-04-01 15:58 ` davi
2007-04-01 15:58 ` [patch 04/12] pollfs: pollable signal davi
` (8 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton
[-- Attachment #1: pollfs-signal-wakeup.patch --]
[-- Type: text/plain, Size: 2851 bytes --]
Add a wait queue to the task_struct in order to be able to
associate (wait for) a signal with other resources.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -939,6 +939,7 @@ struct task_struct {
sigset_t blocked, real_blocked;
sigset_t saved_sigmask; /* To be restored with TIF_RESTORE_SIGMASK */
struct sigpending pending;
+ wait_queue_head_t sigwait;
unsigned long sas_ss_sp;
size_t sas_ss_size;
Index: linux-2.6/include/linux/init_task.h
===================================================================
--- linux-2.6.orig/include/linux/init_task.h
+++ linux-2.6/include/linux/init_task.h
@@ -134,6 +134,7 @@ extern struct group_info init_groups;
.list = LIST_HEAD_INIT(tsk.pending.list), \
.signal = {{0}}}, \
.blocked = {{0}}, \
+ .sigwait = __WAIT_QUEUE_HEAD_INITIALIZER(tsk.sigwait), \
.alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
Index: linux-2.6/kernel/fork.c
===================================================================
--- linux-2.6.orig/kernel/fork.c
+++ linux-2.6/kernel/fork.c
@@ -1034,6 +1034,7 @@ static struct task_struct *copy_process(
clear_tsk_thread_flag(p, TIF_SIGPENDING);
init_sigpending(&p->pending);
+ init_waitqueue_head(&p->sigwait);
p->utime = cputime_zero;
p->stime = cputime_zero;
Index: linux-2.6/kernel/signal.c
===================================================================
--- linux-2.6.orig/kernel/signal.c
+++ linux-2.6/kernel/signal.c
@@ -224,6 +224,8 @@ fastcall void recalc_sigpending_tsk(stru
set_tsk_thread_flag(t, TIF_SIGPENDING);
else
clear_tsk_thread_flag(t, TIF_SIGPENDING);
+
+ wake_up_interruptible_sync(&t->sigwait);
}
void recalc_sigpending(void)
@@ -759,6 +761,7 @@ static int send_signal(int sig, struct s
info->si_code >= 0)));
if (q) {
list_add_tail(&q->list, &signals->list);
+ wake_up_interruptible_sync(&t->sigwait);
switch ((unsigned long) info) {
case (unsigned long) SEND_SIG_NOINFO:
q->info.si_signo = sig;
@@ -1404,6 +1407,7 @@ int send_sigqueue(int sig, struct sigque
list_add_tail(&q->list, &p->pending.list);
sigaddset(&p->pending.signal, sig);
+ wake_up_interruptible_sync(&p->sigwait);
if (!sigismember(&p->blocked, sig))
signal_wake_up(p, sig == SIGKILL);
@@ -1453,6 +1457,7 @@ send_group_sigqueue(int sig, struct sigq
list_add_tail(&q->list, &p->signal->shared_pending.list);
sigaddset(&p->signal->shared_pending.signal, sig);
+ wake_up_interruptible_sync(&p->sigwait);
__group_complete_signal(sig, p);
out:
spin_unlock_irqrestore(&p->sighand->siglock, flags);
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* [patch 04/12] pollfs: pollable signal
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
` (2 preceding siblings ...)
2007-04-01 15:58 ` [patch 03/12] pollfs: asynchronously wait for a signal davi
@ 2007-04-01 15:58 ` davi
2007-04-01 15:58 ` [patch 05/12] pollfs: pollable signal compat code davi
` (7 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton
[-- Attachment #1: pollfs-signal.patch --]
[-- Type: text/plain, Size: 4043 bytes --]
Retrieve multiple per-process signals through a file descriptor.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
Index: linux-2.6/fs/pollfs/signal.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/signal.c
@@ -0,0 +1,144 @@
+/*
+ * sigtimedwait4, retrieve multiple signals with one call.
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <asm/uaccess.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/signal.h>
+
+struct pfs_signal {
+ sigset_t set;
+ spinlock_t lock;
+ struct task_struct *task;
+ struct pfs_file file;
+};
+
+static void inline sigset_adjust(sigset_t *set)
+{
+ /* SIGKILL and SIGSTOP cannot be caught, blocked, or ignored */
+ sigdelsetmask(set, sigmask(SIGKILL) | sigmask(SIGSTOP));
+
+ /* Signals we don't want to dequeue */
+ signotset(set);
+}
+
+static ssize_t read(struct pfs_signal *evs, siginfo_t __user *infoup)
+{
+ int signo;
+ siginfo_t info;
+
+ signo = dequeue_signal_lock(evs->task, &evs->set, &info);
+ if (!signo)
+ return -EAGAIN;
+
+ if (copy_siginfo_to_user(infoup, &info))
+ return -EFAULT;
+
+ return 0;
+}
+
+static ssize_t write(struct pfs_signal *evs, const sigset_t __user *uset)
+{
+ sigset_t set;
+
+ if (copy_from_user(&set, uset, sizeof(sigset_t)))
+ return -EFAULT;
+
+ sigset_adjust(&set);
+
+ spin_lock_irq(&evs->lock);
+ sigemptyset(&evs->set);
+ sigorsets(&evs->set, &evs->set, &set);
+ spin_unlock_irq(&evs->lock);
+
+ return 0;
+}
+
+static int poll(struct pfs_signal *evs)
+{
+ int ret = 0;
+ sigset_t pending;
+ unsigned long flags;
+
+ rcu_read_lock();
+
+ if (!lock_task_sighand(evs->task, &flags))
+ goto out_unlock;
+
+ sigorsets(&pending, &evs->task->pending.signal,
+ &evs->task->signal->shared_pending.signal);
+
+ unlock_task_sighand(evs->task, &flags);
+
+ spin_lock_irqsave(&evs->lock, flags);
+ signandsets(&pending, &pending, &evs->set);
+ spin_unlock_irqrestore(&evs->lock, flags);
+
+ if (!sigisemptyset(&pending))
+ ret = POLLIN;
+
+out_unlock:
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int release(struct pfs_signal *evs)
+{
+ put_task_struct(evs->task);
+ kfree(evs);
+
+ return 0;
+}
+
+static const struct pfs_operations signal_ops = {
+ .read = PFS_READ(read, struct pfs_signal, siginfo_t),
+ .write = PFS_WRITE(write, struct pfs_signal, sigset_t),
+ .poll = PFS_POLL(poll, struct pfs_signal),
+ .release = PFS_RELEASE(release, struct pfs_signal),
+ .rsize = sizeof(siginfo_t),
+ .wsize = sizeof(sigset_t),
+};
+
+asmlinkage long sys_plsignal(const sigset_t __user *uset)
+{
+ long error;
+ struct pfs_signal *evs;
+
+ evs = kmalloc(sizeof(*evs), GFP_KERNEL);
+ if (!evs)
+ return -ENOMEM;
+
+ if (copy_from_user(&evs->set, uset, sizeof(sigset_t))) {
+ kfree(evs);
+ return -EFAULT;
+ }
+
+ spin_lock_init(&evs->lock);
+
+ evs->task = current;
+ get_task_struct(current);
+
+ sigset_adjust(&evs->set);
+
+ evs->file.data = evs;
+ evs->file.fops = &signal_ops;
+ evs->file.wait = &evs->task->sigwait;
+
+ error = pfs_open(&evs->file);
+ if (error < 0)
+ release(evs);
+
+ return error;
+}
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -1,2 +1,4 @@
obj-$(CONFIG_POLLFS) += pollfs.o
pollfs-y := file.o
+
+pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -469,6 +469,13 @@ config POLLFS
help
Pollfs support
+config POLLFS_SIGNAL
+ bool "Enable pollfs signal" if EMBEDDED
+ default y
+ depends on POLLFS
+ help
+ Pollable signal support
+
config SHMEM
bool "Use full shmem filesystem" if EMBEDDED
default y
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* [patch 05/12] pollfs: pollable signal compat code
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
` (3 preceding siblings ...)
2007-04-01 15:58 ` [patch 04/12] pollfs: pollable signal davi
@ 2007-04-01 15:58 ` davi
2007-04-01 15:58 ` [patch 06/12] pollfs: pollable hrtimers davi
` (6 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton
[-- Attachment #1: pollfs-signal-compat.patch --]
[-- Type: text/plain, Size: 2727 bytes --]
Compat handlers for the pollable signal operations.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
Index: linux-2.6/fs/pollfs/signal.c
===================================================================
--- linux-2.6.orig/fs/pollfs/signal.c
+++ linux-2.6/fs/pollfs/signal.c
@@ -16,6 +16,7 @@
#include <linux/poll.h>
#include <linux/pollfs_fs.h>
#include <linux/signal.h>
+#include <linux/compat.h>
struct pfs_signal {
sigset_t set;
@@ -48,6 +49,24 @@ static ssize_t read(struct pfs_signal *e
return 0;
}
+#ifdef CONFIG_COMPAT
+static ssize_t compat_read(struct pfs_signal *evs,
+ struct compat_siginfo __user *infoup)
+{
+ int signo;
+ siginfo_t info;
+
+ signo = dequeue_signal_lock(evs->task, &evs->set, &info);
+ if (!signo)
+ return -EAGAIN;
+
+ if (copy_siginfo_to_user32(infoup, &info))
+ return -EFAULT;
+
+ return 0;
+}
+#endif
+
static ssize_t write(struct pfs_signal *evs, const sigset_t __user *uset)
{
sigset_t set;
@@ -65,6 +84,28 @@ static ssize_t write(struct pfs_signal *
return 0;
}
+#ifdef CONFIG_COMPAT
+static ssize_t compat_write(struct pfs_signal *evs,
+ const compat_sigset_t __user *uset)
+{
+ sigset_t set;
+ compat_sigset_t cset;
+
+ if (copy_from_user(&cset, uset, sizeof(compat_sigset_t)))
+ return -EFAULT;
+
+ sigset_from_compat(&set, &cset);
+ sigset_adjust(&set);
+
+ spin_lock_irq(&evs->lock);
+ sigemptyset(&evs->set);
+ sigorsets(&evs->set, &evs->set, &set);
+ spin_unlock_irq(&evs->lock);
+
+ return 0;
+}
+#endif
+
static int poll(struct pfs_signal *evs)
{
int ret = 0;
@@ -142,3 +183,47 @@ asmlinkage long sys_plsignal(const sigse
return error;
}
+
+#ifdef CONFIG_COMPAT
+static const struct pfs_operations compat_signal_ops = {
+ /* .read = PFS_READ(compat_read, struct pfs_signal, struct compat_siginfo), */
+ .write = PFS_WRITE(compat_write, struct pfs_signal, compat_sigset_t),
+ .poll = PFS_POLL(poll, struct pfs_signal),
+ .release = PFS_RELEASE(release, struct pfs_signal),
+ /* .rsize = sizeof(compat_siginfo_t), */
+ .wsize = sizeof(sigset_t)
+};
+
+asmlinkage long compat_plsignal(const compat_sigset_t __user *uset)
+{
+ long error;
+ compat_sigset_t cset;
+ struct pfs_signal *evs;
+
+ if (copy_from_user(&cset, uset, sizeof(compat_sigset_t)))
+ return -EFAULT;
+
+ evs = kmalloc(sizeof(*evs), GFP_KERNEL);
+ if (!evs)
+ return -ENOMEM;
+
+ spin_lock_init(&evs->lock);
+
+ evs->task = current;
+ get_task_struct(current);
+
+ sigset_from_compat(&evs->set, &cset);
+ sigset_adjust(&evs->set);
+
+ evs->file.data = evs;
+ evs->file.fops = &compat_signal_ops;
+ evs->file.wait = &evs->task->sigwait;
+
+ error = pfs_open(&evs->file);
+
+ if (error < 0)
+ release(evs);
+
+ return error;
+}
+#endif
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* [patch 06/12] pollfs: pollable hrtimers
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
` (4 preceding siblings ...)
2007-04-01 15:58 ` [patch 05/12] pollfs: pollable signal compat code davi
@ 2007-04-01 15:58 ` davi
2007-04-01 15:58 ` [patch 07/12] pollfs: asynchronous futex wait davi
` (5 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton
[-- Attachment #1: pollfs-timer.patch --]
[-- Type: text/plain, Size: 5329 bytes --]
Per file descriptor high-resolution timers. A classic unix file interface for
the POSIX timer_(create|settime|gettime|delete) family of functions.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
Index: linux-2.6/fs/pollfs/timer.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/timer.c
@@ -0,0 +1,191 @@
+/*
+ * pollable timers
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/hrtimer.h>
+
+struct pfs_timer {
+ wait_queue_head_t wait;
+ ktime_t interval;
+ spinlock_t lock;
+ atomic_t counter;
+ struct hrtimer timer;
+ struct pfs_file file;
+};
+
+struct hrtimerspec {
+ int flags;
+ clockid_t clock;
+ struct itimerspec expr;
+};
+
+static ssize_t read(struct pfs_timer *evs, struct itimerspec __user * uspec)
+{
+ ktime_t remaining = {};
+ struct itimerspec spec = {};
+ struct hrtimer *timer = &evs->timer;
+
+ /* atomic_dec_not_zero */
+ if (!atomic_add_unless(&evs->counter, -1, 0))
+ return -EAGAIN;
+
+ spin_lock_irq(&evs->lock);
+
+ if (hrtimer_active(timer))
+ remaining = hrtimer_get_remaining(timer);
+
+ if (remaining.tv64 > 0)
+ spec.it_value = ktime_to_timespec(remaining);
+
+ spec.it_interval = ktime_to_timespec(evs->interval);
+
+ spin_unlock_irq(&evs->lock);
+
+ if (copy_to_user(uspec, &spec, sizeof(spec)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static enum hrtimer_restart timer_fn(struct hrtimer *timer)
+{
+ unsigned long flags;
+ enum hrtimer_restart ret = HRTIMER_RESTART;
+ struct pfs_timer *evs = container_of(timer, struct pfs_timer, timer);
+
+ spin_lock_irqsave(&evs->lock, flags);
+
+ if (evs->interval.tv64 > 0)
+ hrtimer_forward(timer, hrtimer_cb_get_time(timer),
+ evs->interval);
+ else
+ ret = HRTIMER_NORESTART;
+
+ spin_unlock_irqrestore(&evs->lock, flags);
+
+ /* timer tick, interval has elapsed */
+ atomic_inc(&evs->counter);
+ wake_up_all(&evs->wait);
+
+ return ret;
+}
+
+static inline void rearm_timer(struct pfs_timer *evs, struct hrtimerspec *spec)
+{
+ struct hrtimer *timer = &evs->timer;
+ enum hrtimer_mode mode = HRTIMER_MODE_REL;
+
+ if (spec->flags & TIMER_ABSTIME)
+ mode = HRTIMER_MODE_ABS;
+
+ do {
+ spin_lock_irq(&evs->lock);
+ if (hrtimer_try_to_cancel(timer) >= 0)
+ break;
+ spin_unlock_irq(&evs->lock);
+ cpu_relax();
+ } while (1);
+
+ hrtimer_init(timer, spec->clock, mode);
+
+ timer->function = timer_fn;
+ timer->expires = timespec_to_ktime(spec->expr.it_value);
+ evs->interval = timespec_to_ktime(spec->expr.it_interval);
+
+ if (timer->expires.tv64)
+ hrtimer_start(timer, timer->expires, mode);
+
+ spin_unlock_irq(&evs->lock);
+}
+
+static inline int spec_invalid(const struct hrtimerspec *spec)
+{
+ if (spec->clock != CLOCK_REALTIME && spec->clock != CLOCK_MONOTONIC)
+ return 1;
+
+ if (!timespec_valid(&spec->expr.it_value) ||
+ !timespec_valid(&spec->expr.it_interval))
+ return 1;
+
+ return 0;
+}
+
+static ssize_t write(struct pfs_timer *evs,
+ const struct hrtimerspec __user *uspec)
+{
+ struct hrtimerspec spec;
+
+ if (copy_from_user(&spec, uspec, sizeof(spec)))
+ return -EFAULT;
+
+ if (spec_invalid(&spec))
+ return -EINVAL;
+
+ rearm_timer(evs, &spec);
+
+ return 0;
+}
+
+static int poll(struct pfs_timer *evs)
+{
+ int ret;
+
+ ret = atomic_read(&evs->counter) ? POLLIN : 0;
+
+ return ret;
+}
+
+static int release(struct pfs_timer *evs)
+{
+ hrtimer_cancel(&evs->timer);
+ kfree(evs);
+
+ return 0;
+}
+
+static const struct pfs_operations timer_ops = {
+ .read = PFS_READ(read, struct pfs_timer, struct itimerspec),
+ .write = PFS_WRITE(write, struct pfs_timer, struct hrtimerspec),
+ .poll = PFS_POLL(poll, struct pfs_timer),
+ .release = PFS_RELEASE(release, struct pfs_timer),
+ .rsize = sizeof(struct itimerspec),
+ .wsize = sizeof(struct hrtimerspec),
+};
+
+asmlinkage long sys_pltimer(void)
+{
+ long error;
+ struct pfs_timer *evs;
+
+ evs = kmalloc(sizeof(*evs), GFP_KERNEL);
+ if (!evs)
+ return -ENOMEM;
+
+ spin_lock_init(&evs->lock);
+ atomic_set(&evs->counter, 0);
+ init_waitqueue_head(&evs->wait);
+ hrtimer_init(&evs->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+
+ evs->file.data = evs;
+ evs->file.fops = &timer_ops;
+ evs->file.wait = &evs->wait;
+
+ error = pfs_open(&evs->file);
+
+ if (error < 0)
+ release(evs);
+
+ return error;
+}
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_POLLFS) += pollfs.o
pollfs-y := file.o
pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
+pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -476,6 +476,13 @@ config POLLFS_SIGNAL
help
Pollable signal support
+config POLLFS_TIMER
+ bool "Enable pollfs timer" if EMBEDDED
+ default y
+ depends on POLLFS
+ help
+ Pollable timer support
+
config SHMEM
bool "Use full shmem filesystem" if EMBEDDED
default y
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* [patch 07/12] pollfs: asynchronous futex wait
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
` (5 preceding siblings ...)
2007-04-01 15:58 ` [patch 06/12] pollfs: pollable hrtimers davi
@ 2007-04-01 15:58 ` davi
2007-04-01 15:58 ` [patch 08/12] pollfs: pollable futex davi
` (4 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton
[-- Attachment #1: pollfs-futex-async-wait.patch --]
[-- Type: text/plain, Size: 7755 bytes --]
Break apart and export the futex_wait function in order to be able to
associate (wait for) a futex with other resources.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
Index: linux-2.6/kernel/futex.c
===================================================================
--- linux-2.6.orig/kernel/futex.c
+++ linux-2.6/kernel/futex.c
@@ -55,81 +55,6 @@
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
/*
- * Futexes are matched on equal values of this key.
- * The key type depends on whether it's a shared or private mapping.
- * Don't rearrange members without looking at hash_futex().
- *
- * offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
- * We set bit 0 to indicate if it's an inode-based key.
- */
-union futex_key {
- struct {
- unsigned long pgoff;
- struct inode *inode;
- int offset;
- } shared;
- struct {
- unsigned long address;
- struct mm_struct *mm;
- int offset;
- } private;
- struct {
- unsigned long word;
- void *ptr;
- int offset;
- } both;
-};
-
-/*
- * Priority Inheritance state:
- */
-struct futex_pi_state {
- /*
- * list of 'owned' pi_state instances - these have to be
- * cleaned up in do_exit() if the task exits prematurely:
- */
- struct list_head list;
-
- /*
- * The PI object:
- */
- struct rt_mutex pi_mutex;
-
- struct task_struct *owner;
- atomic_t refcount;
-
- union futex_key key;
-};
-
-/*
- * We use this hashed waitqueue instead of a normal wait_queue_t, so
- * we can wake only the relevant ones (hashed queues may be shared).
- *
- * A futex_q has a woken state, just like tasks have TASK_RUNNING.
- * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0.
- * The order of wakup is always to make the first condition true, then
- * wake up q->waiters, then make the second condition true.
- */
-struct futex_q {
- struct list_head list;
- wait_queue_head_t waiters;
-
- /* Which hash list lock to use: */
- spinlock_t *lock_ptr;
-
- /* Key which the futex is hashed on: */
- union futex_key key;
-
- /* For fd, sigio sent using these: */
- int fd;
- struct file *filp;
-
- /* Optional priority inheritance state: */
- struct futex_pi_state *pi_state;
- struct task_struct *task;
-};
-
-/*
* Split the global futex_lock into every hash list lock.
*/
struct futex_hash_bucket {
@@ -904,8 +829,6 @@ queue_lock(struct futex_q *q, int fd, st
q->fd = fd;
q->filp = filp;
- init_waitqueue_head(&q->waiters);
-
get_key_refs(&q->key);
hb = hash_futex(&q->key);
q->lock_ptr = &hb->lock;
@@ -938,6 +861,7 @@ static void queue_me(struct futex_q *q,
{
struct futex_hash_bucket *hb;
+ init_waitqueue_head(&q->waiters);
hb = queue_lock(q, fd, filp);
__queue_me(q, hb);
}
@@ -1002,24 +926,22 @@ static void unqueue_me_pi(struct futex_q
drop_key_refs(&q->key);
}
-static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
+int futex_wait_queue(struct futex_q *q, u32 __user *uaddr, u32 val)
{
struct task_struct *curr = current;
- DECLARE_WAITQUEUE(wait, curr);
struct futex_hash_bucket *hb;
- struct futex_q q;
u32 uval;
int ret;
- q.pi_state = NULL;
+ q->pi_state = NULL;
retry:
down_read(&curr->mm->mmap_sem);
- ret = get_futex_key(uaddr, &q.key);
+ ret = get_futex_key(uaddr, &q->key);
if (unlikely(ret != 0))
goto out_release_sem;
- hb = queue_lock(&q, -1, NULL);
+ hb = queue_lock(q, -1, NULL);
/*
* Access the page AFTER the futex is queued.
@@ -1044,7 +966,7 @@ static int futex_wait(u32 __user *uaddr,
ret = get_futex_value_locked(&uval, uaddr);
if (unlikely(ret)) {
- queue_unlock(&q, hb);
+ queue_unlock(q, hb);
/*
* If we would have faulted, release mmap_sem, fault it in and
@@ -1063,14 +985,37 @@ static int futex_wait(u32 __user *uaddr,
goto out_unlock_release_sem;
/* Only actually queue if *uaddr contained val. */
- __queue_me(&q, hb);
+ __queue_me(q, hb);
/*
* Now the futex is queued and we have checked the data, we
- * don't want to hold mmap_sem while we sleep.
+ * don't want to hold mmap_sem while we (might) sleep.
*/
up_read(&curr->mm->mmap_sem);
+ return 0;
+
+ out_unlock_release_sem:
+ queue_unlock(q, hb);
+
+ out_release_sem:
+ up_read(&curr->mm->mmap_sem);
+ return ret;
+}
+
+static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
+{
+ int ret;
+ struct futex_q q;
+ DECLARE_WAITQUEUE(wait, current);
+
+ init_waitqueue_head(&q.waiters);
+
+ ret = futex_wait_queue(&q, uaddr, val);
+
+ if (ret)
+ return ret;
+
/*
* There might have been scheduling since the queue_me(), as we
* cannot hold a spinlock across the get_user() in case it
@@ -1106,13 +1051,12 @@ static int futex_wait(u32 __user *uaddr,
* have handled it for us already.
*/
return -EINTR;
+}
- out_unlock_release_sem:
- queue_unlock(&q, hb);
-
- out_release_sem:
- up_read(&curr->mm->mmap_sem);
- return ret;
+/* Return 1 if we were still queued, 0 means we were woken. */
+int futex_wait_unqueue(struct futex_q *q)
+{
+ return unqueue_me(q);
}
/*
@@ -1142,6 +1086,8 @@ static int futex_lock_pi(u32 __user *uad
}
q.pi_state = NULL;
+
+ init_waitqueue_head(&q.waiters);
retry:
down_read(&curr->mm->mmap_sem);
Index: linux-2.6/include/linux/futex.h
===================================================================
--- linux-2.6.orig/include/linux/futex.h
+++ linux-2.6/include/linux/futex.h
@@ -94,12 +94,92 @@ struct robust_list_head {
#define ROBUST_LIST_LIMIT 2048
#ifdef __KERNEL__
+
+#include <linux/rtmutex.h>
+
+/*
+ * Futexes are matched on equal values of this key.
+ * The key type depends on whether it's a shared or private mapping.
+ * Don't rearrange members without looking at hash_futex().
+ *
+ * offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
+ * We set bit 0 to indicate if it's an inode-based key.
+ */
+union futex_key {
+ struct {
+ unsigned long pgoff;
+ struct inode *inode;
+ int offset;
+ } shared;
+ struct {
+ unsigned long address;
+ struct mm_struct *mm;
+ int offset;
+ } private;
+ struct {
+ unsigned long word;
+ void *ptr;
+ int offset;
+ } both;
+};
+
+/*
+ * Priority Inheritance state:
+ */
+struct futex_pi_state {
+ /*
+ * list of 'owned' pi_state instances - these have to be
+ * cleaned up in do_exit() if the task exits prematurely:
+ */
+ struct list_head list;
+
+ /*
+ * The PI object:
+ */
+ struct rt_mutex pi_mutex;
+
+ struct task_struct *owner;
+ atomic_t refcount;
+
+ union futex_key key;
+};
+
+/*
+ * We use this hashed waitqueue instead of a normal wait_queue_t, so
+ * we can wake only the relevant ones (hashed queues may be shared).
+ *
+ * A futex_q has a woken state, just like tasks have TASK_RUNNING.
+ * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0.
+ * The order of wakup is always to make the first condition true, then
+ * wake up q->waiters, then make the second condition true.
+ */
+struct futex_q {
+ struct list_head list;
+ wait_queue_head_t waiters;
+
+ /* Which hash list lock to use: */
+ spinlock_t *lock_ptr;
+
+ /* Key which the futex is hashed on: */
+ union futex_key key;
+
+ /* For fd, sigio sent using these: */
+ int fd;
+ struct file *filp;
+
+ /* Optional priority inheritance state: */
+ struct futex_pi_state *pi_state;
+ struct task_struct *task;
+};
long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
extern int
handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
+extern int futex_wait_queue(struct futex_q *q, u32 __user *uaddr, u32 val);
+extern int futex_wait_unqueue(struct futex_q *q);
+
#ifdef CONFIG_FUTEX
extern void exit_robust_list(struct task_struct *curr);
extern void exit_pi_state_list(struct task_struct *curr);
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* [patch 08/12] pollfs: pollable futex
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
` (6 preceding siblings ...)
2007-04-01 15:58 ` [patch 07/12] pollfs: asynchronous futex wait davi
@ 2007-04-01 15:58 ` davi
2007-04-01 15:58 ` [patch 09/12] pollfs: check if a AIO event ring is empty davi
` (3 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton
[-- Attachment #1: pollfs-futex.patch --]
[-- Type: text/plain, Size: 4127 bytes --]
Asynchronously wait for FUTEX_WAKE operation on a futex if it still contains
a given value. There can be only one futex wait per file descriptor. However,
it can be rearmed (possibly at a different address) anytime.
Building block for pollable semaphores and user-defined events.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -3,3 +3,4 @@ pollfs-y := file.o
pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
+pollfs-$(CONFIG_POLLFS_FUTEX) += futex.o
Index: linux-2.6/fs/pollfs/futex.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/futex.c
@@ -0,0 +1,155 @@
+/*
+ * pollable futex
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/futex.h>
+
+struct futex_event {
+ union {
+ void __user *addr;
+ u64 padding;
+ };
+ int val;
+};
+
+struct pfs_futex {
+ struct futex_q q;
+ struct futex_event fevt;
+ struct mutex mutex;
+ unsigned volatile queued;
+ struct pfs_file file;
+};
+
+static ssize_t read(struct pfs_futex *evs, struct futex_event __user *ufevt)
+{
+ int ret;
+ struct futex_event fevt;
+
+ mutex_lock(&evs->mutex);
+
+ fevt = evs->fevt;
+
+ ret = -EAGAIN;
+
+ if (!evs->queued)
+ ret = -EINVAL;
+ else if (list_empty(&evs->q.list))
+ ret = futex_wait_unqueue(&evs->q);
+
+ switch (ret) {
+ case 1:
+ ret = -EAGAIN;
+ case 0:
+ evs->queued = 0;
+ }
+
+ mutex_unlock(&evs->mutex);
+
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user(ufevt, &fevt, sizeof(fevt)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static ssize_t write(struct pfs_futex *evs,
+ const struct futex_event __user *ufevt)
+{
+ int ret;
+ struct futex_event fevt;
+
+ if (copy_from_user(&fevt, ufevt, sizeof(fevt)))
+ return -EFAULT;
+
+ mutex_lock(&evs->mutex);
+
+ if (evs->queued)
+ futex_wait_unqueue(&evs->q);
+
+ ret = futex_wait_queue(&evs->q, fevt.addr, fevt.val);
+
+ if (!ret) {
+ evs->queued = 1;
+ evs->fevt = fevt;
+ } else {
+ evs->queued = 0;
+ evs->fevt.addr = NULL;
+ }
+
+ mutex_unlock(&evs->mutex);
+
+ return ret;
+}
+
+static int poll(struct pfs_futex *evs)
+{
+ int ret;
+
+ while (!mutex_trylock(&evs->mutex))
+ cpu_relax();
+
+ ret = evs->queued && list_empty(&evs->q.list) ? POLLIN : 0;
+
+ mutex_unlock(&evs->mutex);
+
+ return ret;
+}
+
+static int release(struct pfs_futex *evs)
+{
+ if (evs->queued)
+ futex_wait_unqueue(&evs->q);
+
+ mutex_destroy(&evs->mutex);
+
+ kfree(evs);
+
+ return 0;
+}
+
+static const struct pfs_operations futex_ops = {
+ .read = PFS_READ(read, struct pfs_futex, struct futex_event),
+ .write = PFS_WRITE(write, struct pfs_futex, struct futex_event),
+ .poll = PFS_POLL(poll, struct pfs_futex),
+ .release = PFS_RELEASE(release, struct pfs_futex),
+ .rsize = sizeof(struct futex_event),
+ .wsize = sizeof(struct futex_event),
+};
+
+asmlinkage long sys_plfutex(void)
+{
+ long error;
+ struct pfs_futex *evs;
+
+ evs = kzalloc(sizeof(*evs), GFP_KERNEL);
+ if (!evs)
+ return -ENOMEM;
+
+ mutex_init(&evs->mutex);
+ init_waitqueue_head(&evs->q.waiters);
+
+ evs->file.data = evs;
+ evs->file.fops = &futex_ops;
+ evs->file.wait = &evs->q.waiters;
+
+ error = pfs_open(&evs->file);
+
+ if (error < 0)
+ release(evs);
+
+ return error;
+}
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -483,6 +483,13 @@ config POLLFS_TIMER
help
Pollable timer support
+config POLLFS_FUTEX
+ bool "Enable pollfs futex" if EMBEDDED
+ default y
+ depends on POLLFS && FUTEX
+ help
+ Pollable futex support
+
config SHMEM
bool "Use full shmem filesystem" if EMBEDDED
default y
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* [patch 09/12] pollfs: check if a AIO event ring is empty
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
` (7 preceding siblings ...)
2007-04-01 15:58 ` [patch 08/12] pollfs: pollable futex davi
@ 2007-04-01 15:58 ` davi
2007-04-01 15:58 ` [patch 10/12] pollfs: pollable aio davi
` (2 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton
[-- Attachment #1: pollfs-aio-ring-empty.patch --]
[-- Type: text/plain, Size: 1493 bytes --]
The aio_ring_empty() function returns true if the AIO event ring has no
elements, false otherwise.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
Index: linux-2.6/fs/aio.c
===================================================================
--- linux-2.6.orig/fs/aio.c
+++ linux-2.6/fs/aio.c
@@ -1004,6 +1004,23 @@ put_rq:
return ret;
}
+int fastcall aio_ring_empty(struct kioctx *ioctx)
+{
+ struct aio_ring_info *info = &ioctx->ring_info;
+ struct aio_ring *ring;
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&ioctx->ctx_lock, flags);
+ ring = kmap_atomic(info->ring_pages[0], KM_IRQ1);
+ if (ring->head == ring->tail)
+ ret = 1;
+ kunmap_atomic(ring, KM_IRQ1);
+ spin_unlock_irqrestore(&ioctx->ctx_lock, flags);
+
+ return ret;
+}
+
/* aio_read_evt
* Pull an event off of the ioctx's event ring. Returns the number of
* events fetched (0 or 1 ;-)
Index: linux-2.6/include/linux/aio.h
===================================================================
--- linux-2.6.orig/include/linux/aio.h
+++ linux-2.6/include/linux/aio.h
@@ -202,6 +202,7 @@ extern unsigned aio_max_size;
extern ssize_t FASTCALL(wait_on_sync_kiocb(struct kiocb *iocb));
extern int FASTCALL(aio_put_req(struct kiocb *iocb));
+extern int FASTCALL(aio_ring_empty(struct kioctx *ioctx));
extern void FASTCALL(kick_iocb(struct kiocb *iocb));
extern int FASTCALL(aio_complete(struct kiocb *iocb, long res, long res2));
extern void FASTCALL(__put_ioctx(struct kioctx *ctx));
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* [patch 10/12] pollfs: pollable aio
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
` (8 preceding siblings ...)
2007-04-01 15:58 ` [patch 09/12] pollfs: check if a AIO event ring is empty davi
@ 2007-04-01 15:58 ` davi
2007-04-01 15:58 ` [patch 11/12] pollfs: asynchronous workqueue davi
2007-04-01 15:58 ` [patch 12/12] pollfs: pollable fsync davi
11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton
[-- Attachment #1: pollfs-aio.patch --]
[-- Type: text/plain, Size: 3103 bytes --]
Submit, retrieve, or poll aio requests for completion through a
file descriptor. Untested.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -4,3 +4,4 @@ pollfs-y := file.o
pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
pollfs-$(CONFIG_POLLFS_FUTEX) += futex.o
+pollfs-$(CONFIG_POLLFS_AIO) += aio.o
Index: linux-2.6/fs/pollfs/aio.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/aio.c
@@ -0,0 +1,103 @@
+/*
+ * pollable aio
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/aio.h>
+#include <linux/syscalls.h>
+
+struct pfs_aio {
+ struct kioctx *ioctx;
+ struct pfs_file file;
+};
+
+static ssize_t read(struct pfs_aio *evs, struct io_event __user *uioevt)
+{
+ int ret;
+
+ ret = sys_io_getevents(evs->ioctx->user_id, 0, 1, uioevt, NULL);
+
+ if (!ret)
+ ret = -EAGAIN;
+ else if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
+static ssize_t write(struct pfs_aio *evs, const struct iocb __user *uiocb)
+{
+ struct iocb iocb;
+
+ if (copy_from_user(&iocb, uiocb, sizeof(iocb)))
+ return -EFAULT;
+
+ return io_submit_one(evs->ioctx, uiocb, &iocb);
+}
+
+static int poll(struct pfs_aio *evs)
+{
+ int ret;
+
+ ret = aio_ring_empty(evs->ioctx) ? 0 : POLLIN;
+
+ return ret;
+}
+
+static int release(struct pfs_aio *evs)
+{
+ put_ioctx(evs->ioctx);
+
+ kfree(evs);
+
+ return 0;
+}
+
+static const struct pfs_operations aio_ops = {
+ .read = PFS_READ(read, struct pfs_aio, struct io_event),
+ .write = PFS_WRITE(write, struct pfs_aio, struct iocb),
+ .poll = PFS_POLL(poll, struct pfs_aio),
+ .release = PFS_RELEASE(release, struct pfs_aio),
+ .rsize = sizeof(struct io_event),
+ .wsize = sizeof(struct iocb),
+};
+
+asmlinkage long sys_plaio(aio_context_t ctx)
+{
+ long error;
+ struct pfs_aio *evs;
+ struct kioctx *ioctx = lookup_ioctx(ctx);
+
+ if (!ioctx)
+ return -EINVAL;
+
+ evs = kzalloc(sizeof(*evs), GFP_KERNEL);
+ if (!evs) {
+ put_ioctx(ioctx);
+ return -ENOMEM;
+ }
+
+ evs->ioctx = ioctx;
+
+ evs->file.data = evs;
+ evs->file.fops = &aio_ops;
+ evs->file.wait = &ioctx->wait;
+
+ error = pfs_open(&evs->file);
+
+ if (error < 0)
+ release(evs);
+
+ return error;
+}
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -490,6 +490,13 @@ config POLLFS_FUTEX
help
Pollable futex support
+config POLLFS_AIO
+ bool "Enable pollfs aio" if EMBEDDED
+ default y
+ depends on POLLFS
+ help
+ Pollable aio support
+
config SHMEM
bool "Use full shmem filesystem" if EMBEDDED
default y
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* [patch 11/12] pollfs: asynchronous workqueue
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
` (9 preceding siblings ...)
2007-04-01 15:58 ` [patch 10/12] pollfs: pollable aio davi
@ 2007-04-01 15:58 ` davi
2007-04-01 15:58 ` [patch 12/12] pollfs: pollable fsync davi
11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton
[-- Attachment #1: pollfs-async-workqueue.patch --]
[-- Type: text/plain, Size: 8960 bytes --]
Asynchronously run work items.
If the worker thread blocks while the kernel executes the work function
call a new worker thread is created (if one is not available) to handle
the remaining workqueue items.
Various errors and resource limitations are not yet handled.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
Index: linux-2.6/include/linux/workqueue.h
===================================================================
--- linux-2.6.orig/include/linux/workqueue.h
+++ linux-2.6/include/linux/workqueue.h
@@ -25,7 +25,8 @@ struct work_struct {
atomic_long_t data;
#define WORK_STRUCT_PENDING 0 /* T if work item pending execution */
#define WORK_STRUCT_NOAUTOREL 1 /* F if work item automatically released on exec */
-#define WORK_STRUCT_FLAG_MASK (3UL)
+#define WORK_STRUCT_ASYNC 2 /* T if work item can be executed asynchronously */
+#define WORK_STRUCT_FLAG_MASK (7UL)
#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
struct list_head entry;
work_func_t func;
@@ -171,6 +172,7 @@ extern int FASTCALL(queue_work(struct wo
extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay));
extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *work, unsigned long delay);
+extern int FASTCALL(queue_async_work(struct workqueue_struct *wq, struct work_struct *work));
extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
extern int FASTCALL(schedule_work(struct work_struct *work));
Index: linux-2.6/kernel/workqueue.c
===================================================================
--- linux-2.6.orig/kernel/workqueue.c
+++ linux-2.6/kernel/workqueue.c
@@ -14,6 +14,7 @@
* Theodore Ts'o <tytso@mit.edu>
*
* Made to use alloc_percpu by Christoph Lameter <clameter@sgi.com>.
+ * Asynchronous workqueue by Davi E. M. Arnaut <davi.arnaut@gmail.com>
*/
#include <linux/module.h>
@@ -60,6 +61,8 @@ struct cpu_workqueue_struct {
int run_depth; /* Detect run_workqueue() recursion depth */
int freezeable; /* Freeze the thread during suspend */
+
+ struct list_head threadlist;
} ____cacheline_aligned;
/*
@@ -297,9 +300,27 @@ int queue_delayed_work_on(int cpu, struc
}
EXPORT_SYMBOL_GPL(queue_delayed_work_on);
+/**
+ * queue_async_work - queue an asynchronous work on a workqueue
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
+ *
+ * We queue the work to the CPU it was submitted, but there is no
+ * guarantee that it will be processed by that CPU.
+ */
+int fastcall queue_async_work(struct workqueue_struct *wq, struct work_struct *work)
+{
+ set_bit(WORK_STRUCT_ASYNC, work_data_bits(work));
+
+ return queue_work(wq, work);
+}
+EXPORT_SYMBOL_GPL(queue_async_work);
+
static void run_workqueue(struct cpu_workqueue_struct *cwq)
{
- unsigned long flags;
+ unsigned long flags, async;
/*
* Keep taking off work from the queue until
@@ -324,8 +345,18 @@ static void run_workqueue(struct cpu_wor
BUG_ON(get_wq_data(work) != cwq);
if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
work_release(work);
+
+ async = test_bit(WORK_STRUCT_ASYNC, work_data_bits(work));
+ if (unlikely(async))
+ current->cwq = cwq;
+
f(work);
+ if (current->cwq)
+ current->cwq = NULL;
+ else if (async)
+ async++;
+
if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
"%s/0x%08x/%d\n",
@@ -340,6 +371,17 @@ static void run_workqueue(struct cpu_wor
spin_lock_irqsave(&cwq->lock, flags);
cwq->remove_sequence++;
wake_up(&cwq->work_done);
+
+ if (async > 1) {
+ if (cwq->thread) {
+ list_add_tail(¤t->cwq_entry, &cwq->threadlist);
+ spin_unlock_irqrestore(&cwq->lock, flags);
+ schedule();
+ spin_lock_irqsave(&cwq->lock, flags);
+ }
+ else
+ cwq->thread = current;
+ }
}
cwq->run_depth--;
spin_unlock_irqrestore(&cwq->lock, flags);
@@ -467,6 +509,7 @@ static struct task_struct *create_workqu
cwq->remove_sequence = 0;
cwq->freezeable = freezeable;
INIT_LIST_HEAD(&cwq->worklist);
+ INIT_LIST_HEAD(&cwq->threadlist);
init_waitqueue_head(&cwq->more_work);
init_waitqueue_head(&cwq->work_done);
@@ -534,15 +577,19 @@ static void cleanup_workqueue_thread(str
{
struct cpu_workqueue_struct *cwq;
unsigned long flags;
- struct task_struct *p;
+ struct task_struct *p, *tmp;
+ LIST_HEAD(threadlist);
cwq = per_cpu_ptr(wq->cpu_wq, cpu);
spin_lock_irqsave(&cwq->lock, flags);
p = cwq->thread;
cwq->thread = NULL;
+ list_splice_init(&cwq->threadlist, &threadlist);
spin_unlock_irqrestore(&cwq->lock, flags);
if (p)
kthread_stop(p);
+ list_for_each_entry_safe(p, tmp, &threadlist, cwq_entry)
+ kthread_stop(p);
}
/**
@@ -811,6 +858,68 @@ static int __devinit workqueue_cpu_callb
return NOTIFY_OK;
}
+static void create_cpu_worker(struct cpu_workqueue_struct *cwq)
+{
+ unsigned long flags;
+ struct task_struct *p;
+ struct workqueue_struct *wq = cwq->wq;
+ int cpu = first_cpu(current->cpus_allowed);
+
+ mutex_lock(&workqueue_mutex);
+ if (is_single_threaded(wq))
+ p = kthread_create(worker_thread, cwq, "%s", wq->name);
+ else
+ p = kthread_create(worker_thread, cwq, "%s/%d", wq->name, cpu);
+
+ if (IS_ERR(p))
+ /* oh well, there isn't much we can do anyway. */
+ goto unlock;
+
+ kthread_bind(p, cpu);
+
+ spin_lock_irqsave(&cwq->lock, flags);
+ if (!cwq->thread)
+ wake_up_process(p);
+ else
+ list_add_tail(&p->cwq_entry, &cwq->threadlist);
+ spin_unlock_irqrestore(&cwq->lock, flags);
+
+unlock:
+ mutex_unlock(&workqueue_mutex);
+}
+
+static inline void wake_up_cpu_worker(struct cpu_workqueue_struct *cwq)
+{
+ struct task_struct *worker = list_entry(cwq->threadlist.next,
+ struct task_struct, cwq_entry);
+
+ list_del_init(cwq->threadlist.next);
+
+ cwq->thread = worker;
+
+ wake_up_process(worker);
+}
+
+void schedule_workqueue(struct task_struct *task)
+{
+ struct cpu_workqueue_struct *cwq = task->cwq;
+ unsigned long flags;
+
+ task->cwq = NULL;
+
+ spin_lock_irqsave(&cwq->lock, flags);
+ if (cwq->thread == task) {
+ if (!list_empty(&cwq->threadlist))
+ wake_up_cpu_worker(cwq);
+ else
+ task = cwq->thread = NULL;
+ }
+ spin_unlock_irqrestore(&cwq->lock, flags);
+
+ if (!task)
+ create_cpu_worker(cwq);
+}
+
void init_workqueues(void)
{
singlethread_cpu = first_cpu(cpu_possible_map);
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -843,6 +843,9 @@ struct task_struct {
struct mm_struct *mm, *active_mm;
+ /* (asynchronous) cpu workqueue */
+ void *cwq;
+ struct list_head cwq_entry;
/* task state */
struct linux_binfmt *binfmt;
long exit_state;
@@ -1409,6 +1412,7 @@ extern int disallow_signal(int);
extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
struct task_struct *fork_idle(int);
+extern void schedule_workqueue(struct task_struct *);
extern void set_task_comm(struct task_struct *tsk, char *from);
extern void get_task_comm(char *to, struct task_struct *tsk);
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -3305,6 +3305,12 @@ asmlinkage void __sched schedule(void)
}
profile_hit(SCHED_PROFILING, __builtin_return_address(0));
+ /* asynchronous queue worker */
+ if (unlikely(current->cwq))
+ /* only if it's a voluntary sleep */
+ if (!(preempt_count() & PREEMPT_ACTIVE) && current->state != TASK_RUNNING)
+ schedule_workqueue(current);
+
need_resched:
preempt_disable();
prev = current;
Index: linux-2.6/include/linux/init_task.h
===================================================================
--- linux-2.6.orig/include/linux/init_task.h
+++ linux-2.6/include/linux/init_task.h
@@ -112,6 +112,7 @@ extern struct group_info init_groups;
.tasks = LIST_HEAD_INIT(tsk.tasks), \
.ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \
.ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \
+ .cwq_entry = LIST_HEAD_INIT(tsk.cwq_entry), \
.real_parent = &tsk, \
.parent = &tsk, \
.children = LIST_HEAD_INIT(tsk.children), \
Index: linux-2.6/kernel/fork.c
===================================================================
--- linux-2.6.orig/kernel/fork.c
+++ linux-2.6/kernel/fork.c
@@ -1173,6 +1173,7 @@ static struct task_struct *copy_process(
INIT_LIST_HEAD(&p->thread_group);
INIT_LIST_HEAD(&p->ptrace_children);
INIT_LIST_HEAD(&p->ptrace_list);
+ INIT_LIST_HEAD(&p->cwq_entry);
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags);
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* [patch 12/12] pollfs: pollable fsync
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
` (10 preceding siblings ...)
2007-04-01 15:58 ` [patch 11/12] pollfs: asynchronous workqueue davi
@ 2007-04-01 15:58 ` davi
11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton
[-- Attachment #1: pollfs-sync-file.patch --]
[-- Type: text/plain, Size: 4473 bytes --]
Pollable asynchronous fsync() using a global workqueue. Maybe a sync_file_range
in the future.
Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -5,3 +5,4 @@ pollfs-$(CONFIG_POLLFS_SIGNAL) += signal
pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
pollfs-$(CONFIG_POLLFS_FUTEX) += futex.o
pollfs-$(CONFIG_POLLFS_AIO) += aio.o
+pollfs-$(CONFIG_POLLFS_SYNC) += sync.o
Index: linux-2.6/fs/pollfs/sync.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/sync.c
@@ -0,0 +1,173 @@
+/*
+ * pollable fsync
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/workqueue.h>
+#include <linux/file.h>
+
+struct sync_file {
+ int fd;
+ int datasync;
+ long result;
+};
+
+struct pfs_sync {
+ struct file *filp;
+ struct sync_file sync;
+ struct work_struct work;
+ struct mutex mutex;
+ enum {
+ WORK_REST,
+ WORK_BUSY,
+ WORK_DONE,
+ } status;
+ wait_queue_head_t wait;
+ struct pfs_file file;
+};
+
+static struct workqueue_struct *sync_wq;
+
+static void sync_file_work(struct work_struct *work)
+{
+ struct pfs_sync *evs = container_of(work, struct pfs_sync, work);
+
+ evs->sync.result = do_fsync(evs->filp, evs->sync.datasync);
+
+ fput(evs->filp);
+ evs->status = WORK_DONE;
+
+ wake_up_all(&evs->wait);
+}
+
+static ssize_t read(struct pfs_sync *evs, struct sync_file __user *usync)
+{
+ int ret = 0;
+ struct sync_file sync = {};
+
+ mutex_lock(&evs->mutex);
+ switch (evs->status) {
+ case WORK_REST:
+ ret = -EINVAL; break;
+ case WORK_BUSY:
+ ret = -EAGAIN; break;
+ case WORK_DONE:
+ evs->status = WORK_REST;
+ sync = evs->sync;
+ break;
+ }
+ mutex_unlock(&evs->mutex);
+
+ if (ret)
+ return ret;
+
+ if (copy_to_user(usync, &sync, sizeof(sync)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static ssize_t write(struct pfs_sync *evs, const struct sync_file __user *usync)
+{
+ int ret = 0;
+ struct file *filp;
+ struct sync_file sync;
+
+ if (copy_from_user(&sync, usync, sizeof(sync)))
+ return -EFAULT;
+
+ filp = fget(sync.fd);
+ if (!filp)
+ return -EINVAL;
+
+ mutex_lock(&evs->mutex);
+ if (evs->status != WORK_REST)
+ ret = -EAGAIN;
+ else {
+ evs->filp = filp;
+ evs->status = WORK_BUSY;
+ queue_async_work(sync_wq, &evs->work);
+ }
+ mutex_unlock(&evs->mutex);
+
+ return ret;
+}
+
+static int poll(struct pfs_sync *evs)
+{
+ int ret = 0;
+
+ if (evs->status == WORK_DONE)
+ ret = POLLIN;
+ else if (evs->status == WORK_REST)
+ ret = POLLOUT;
+
+ return ret;
+}
+
+static int release(struct pfs_sync *evs)
+{
+ wait_event(evs->wait, evs->status != WORK_BUSY);
+
+ kfree(evs);
+
+ return 0;
+}
+
+static const struct pfs_operations sync_ops = {
+ .read = PFS_READ(read, struct pfs_sync, struct sync_file),
+ .write = PFS_WRITE(write, struct pfs_sync, struct sync_file),
+ .poll = PFS_POLL(poll, struct pfs_sync),
+ .release = PFS_RELEASE(release, struct pfs_sync),
+ .rsize = sizeof(struct sync_file),
+ .wsize = sizeof(struct sync_file),
+};
+
+asmlinkage long sys_plsync(void)
+{
+ long error;
+ struct pfs_sync *evs;
+
+ if (!sync_wq)
+ return -ENOSYS;
+
+ evs = kzalloc(sizeof(*evs), GFP_KERNEL);
+ if (!evs)
+ return -ENOMEM;
+
+ evs->status = WORK_REST;
+ mutex_init(&evs->mutex);
+ init_waitqueue_head(&evs->wait);
+ INIT_WORK(&evs->work, sync_file_work);
+
+ evs->file.data = evs;
+ evs->file.fops = &sync_ops;
+ evs->file.wait = &evs->wait;
+
+ error = pfs_open(&evs->file);
+
+ if (error < 0)
+ release(evs);
+
+ return error;
+}
+
+static int __init init(void)
+{
+ sync_wq = create_workqueue("syncd");
+ WARN_ON(!sync_wq);
+ return 0;
+}
+
+__initcall(init);
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -497,6 +497,13 @@ config POLLFS_AIO
help
Pollable aio support
+config POLLFS_SYNC
+ bool "Enable pollfs file sync" if EMBEDDED
+ default y
+ depends on POLLFS
+ help
+ Pollable file sync support
+
config SHMEM
bool "Use full shmem filesystem" if EMBEDDED
default y
--
^ permalink raw reply [flat|nested] 13+ messages in thread