LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 2.6.21-rc4-mm1 0/4] Futexes functionalities and improvements
@ 2007-03-21 9:54 Pierre.Peiffer
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 1/4] futex priority based wakeup Pierre.Peiffer
` (3 more replies)
0 siblings, 4 replies; 12+ messages in thread
From: Pierre.Peiffer @ 2007-03-21 9:54 UTC (permalink / raw)
To: akpm; +Cc: mingo, drepper, linux-kernel, jean-pierre.dion
Hi Andrew,
This is a re-send of a series of patches concerning futexes (here
after is a short description).
I have reworked the patches to take into account the last changes
about futex, and this series should apply cleanly on -mm tree (the changes
mostly affect patch 2 "futex_wait uses hrtimer")
I also took into account the remark of Peter Zijlstra in patch 3
concerning futex_requeue_pi.
Could you consider (again) them for inclusion in -mm tree ?
All of them have already been discussed in January and have already
been included in -rt for a while. I think that we agreed to potentially
include them in the -mm tree.
And, again, Ulrich is specially interested by sys_futex64.
There are:
* futex uses prio list : allows RT-threads to be woken in priority order
instead of FIFO order.
* futex_wait uses hrtimer : allows the use of finer timer resolution.
* futex_requeue_pi functionality : allows use of requeue optimization for
PI-mutexes/PI-futexes.
* futex64 syscall : allows use of 64-bit futexes instead of 32-bit.
Thanks,
--
Pierre P.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 2.6.21-rc4-mm1 1/4] futex priority based wakeup
2007-03-21 9:54 [PATCH 2.6.21-rc4-mm1 0/4] Futexes functionalities and improvements Pierre.Peiffer
@ 2007-03-21 9:54 ` Pierre.Peiffer
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 2/4] Make futex_wait() use an hrtimer for timeout Pierre.Peiffer
` (2 subsequent siblings)
3 siblings, 0 replies; 12+ messages in thread
From: Pierre.Peiffer @ 2007-03-21 9:54 UTC (permalink / raw)
To: akpm
Cc: mingo, drepper, linux-kernel, jean-pierre.dion, Sebastien Dugue,
Pierre Peiffer
[-- Attachment #1: futex-use-prio-list.diff --]
[-- Type: text/plain, Size: 7985 bytes --]
Today, all threads waiting for a given futex are woken in FIFO order (first
waiter woken first) instead of priority order.
This patch makes use of plist (pirotity ordered lists) instead of simple list in
futex_hash_bucket.
All non-RT threads are stored with priority MAX_RT_PRIO, causing them to be
woken last, in FIFO order (RT-threads are woken first, in priority order).
Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net>
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
---
kernel/futex.c | 78 +++++++++++++++++++++++++++++++++++----------------------
1 file changed, 49 insertions(+), 29 deletions(-)
Index: b/kernel/futex.c
===================================================================
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -81,12 +81,12 @@ struct futex_pi_state {
* we can wake only the relevant ones (hashed queues may be shared).
*
* A futex_q has a woken state, just like tasks have TASK_RUNNING.
- * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0.
+ * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
* The order of wakup is always to make the first condition true, then
* wake up q->waiters, then make the second condition true.
*/
struct futex_q {
- struct list_head list;
+ struct plist_node list;
wait_queue_head_t waiters;
/* Which hash list lock to use: */
@@ -108,8 +108,8 @@ struct futex_q {
* Split the global futex_lock into every hash list lock.
*/
struct futex_hash_bucket {
- spinlock_t lock;
- struct list_head chain;
+ spinlock_t lock;
+ struct plist_head chain;
};
static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
@@ -443,13 +443,13 @@ lookup_pi_state(u32 uval, struct futex_h
{
struct futex_pi_state *pi_state = NULL;
struct futex_q *this, *next;
- struct list_head *head;
+ struct plist_head *head;
struct task_struct *p;
pid_t pid;
head = &hb->chain;
- list_for_each_entry_safe(this, next, head, list) {
+ plist_for_each_entry_safe(this, next, head, list) {
if (match_futex(&this->key, &me->key)) {
/*
* Another waiter already exists - bump up
@@ -513,12 +513,12 @@ lookup_pi_state(u32 uval, struct futex_h
*/
static void wake_futex(struct futex_q *q)
{
- list_del_init(&q->list);
+ plist_del(&q->list, &q->list.plist);
if (q->filp)
send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
/*
* The lock in wake_up_all() is a crucial memory barrier after the
- * list_del_init() and also before assigning to q->lock_ptr.
+ * plist_del() and also before assigning to q->lock_ptr.
*/
wake_up_all(&q->waiters);
/*
@@ -633,7 +633,7 @@ static int futex_wake(u32 __user *uaddr,
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
- struct list_head *head;
+ struct plist_head *head;
union futex_key key;
int ret;
@@ -647,7 +647,7 @@ static int futex_wake(u32 __user *uaddr,
spin_lock(&hb->lock);
head = &hb->chain;
- list_for_each_entry_safe(this, next, head, list) {
+ plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key)) {
if (this->pi_state) {
ret = -EINVAL;
@@ -675,7 +675,7 @@ futex_wake_op(u32 __user *uaddr1, u32 __
{
union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2;
- struct list_head *head;
+ struct plist_head *head;
struct futex_q *this, *next;
int ret, op_ret, attempt = 0;
@@ -748,7 +748,7 @@ retry:
head = &hb1->chain;
- list_for_each_entry_safe(this, next, head, list) {
+ plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key1)) {
wake_futex(this);
if (++ret >= nr_wake)
@@ -760,7 +760,7 @@ retry:
head = &hb2->chain;
op_ret = 0;
- list_for_each_entry_safe(this, next, head, list) {
+ plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key2)) {
wake_futex(this);
if (++op_ret >= nr_wake2)
@@ -787,7 +787,7 @@ static int futex_requeue(u32 __user *uad
{
union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2;
- struct list_head *head1;
+ struct plist_head *head1;
struct futex_q *this, *next;
int ret, drop_count = 0;
@@ -836,7 +836,7 @@ static int futex_requeue(u32 __user *uad
}
head1 = &hb1->chain;
- list_for_each_entry_safe(this, next, head1, list) {
+ plist_for_each_entry_safe(this, next, head1, list) {
if (!match_futex (&this->key, &key1))
continue;
if (++ret <= nr_wake) {
@@ -847,9 +847,13 @@ static int futex_requeue(u32 __user *uad
* requeue.
*/
if (likely(head1 != &hb2->chain)) {
- list_move_tail(&this->list, &hb2->chain);
+ plist_del(&this->list, &hb1->chain);
+ plist_add(&this->list, &hb2->chain);
this->lock_ptr = &hb2->lock;
- }
+#ifdef CONFIG_DEBUG_PI_LIST
+ this->list.plist.lock = &hb2->lock;
+#endif
+ }
this->key = key2;
get_futex_key_refs(&key2);
drop_count++;
@@ -894,7 +898,23 @@ queue_lock(struct futex_q *q, int fd, st
static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
{
- list_add_tail(&q->list, &hb->chain);
+ int prio;
+
+ /*
+ * The priority used to register this element is
+ * - either the real thread-priority for the real-time threads
+ * (i.e. threads with a priority lower than MAX_RT_PRIO)
+ * - or MAX_RT_PRIO for non-RT threads.
+ * Thus, all RT-threads are woken first in priority order, and
+ * the others are woken last, in FIFO order.
+ */
+ prio = min(current->normal_prio, MAX_RT_PRIO);
+
+ plist_node_init(&q->list, prio);
+#ifdef CONFIG_DEBUG_PI_LIST
+ q->list.plist.lock = &hb->lock;
+#endif
+ plist_add(&q->list, &hb->chain);
q->task = current;
spin_unlock(&hb->lock);
}
@@ -949,8 +969,8 @@ static int unqueue_me(struct futex_q *q)
spin_unlock(lock_ptr);
goto retry;
}
- WARN_ON(list_empty(&q->list));
- list_del(&q->list);
+ WARN_ON(plist_node_empty(&q->list));
+ plist_del(&q->list, &q->list.plist);
BUG_ON(q->pi_state);
@@ -968,8 +988,8 @@ static int unqueue_me(struct futex_q *q)
*/
static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb)
{
- WARN_ON(list_empty(&q->list));
- list_del(&q->list);
+ WARN_ON(plist_node_empty(&q->list));
+ plist_del(&q->list, &q->list.plist);
BUG_ON(!q->pi_state);
free_pi_state(q->pi_state);
@@ -1065,11 +1085,11 @@ static int futex_wait_abstime(u32 __user
__set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&q.waiters, &wait);
/*
- * !list_empty() is safe here without any lock.
+ * !plist_node_empty() is safe here without any lock.
* q.lock_ptr != 0 is not safe, because of ordering against wakeup.
*/
time_left = 0;
- if (likely(!list_empty(&q.list))) {
+ if (likely(!plist_node_empty(&q.list))) {
unsigned long rel_time;
if (timed) {
@@ -1384,7 +1404,7 @@ static int futex_unlock_pi(u32 __user *u
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
u32 uval;
- struct list_head *head;
+ struct plist_head *head;
union futex_key key;
int ret, attempt = 0;
@@ -1435,7 +1455,7 @@ retry_locked:
*/
head = &hb->chain;
- list_for_each_entry_safe(this, next, head, list) {
+ plist_for_each_entry_safe(this, next, head, list) {
if (!match_futex (&this->key, &key))
continue;
ret = wake_futex_pi(uaddr, uval, this);
@@ -1509,10 +1529,10 @@ static unsigned int futex_poll(struct fi
poll_wait(filp, &q->waiters, wait);
/*
- * list_empty() is safe here without any lock.
+ * plist_node_empty() is safe here without any lock.
* q->lock_ptr != 0 is not safe, because of ordering against wakeup.
*/
- if (list_empty(&q->list))
+ if (plist_node_empty(&q->list))
ret = POLLIN | POLLRDNORM;
return ret;
@@ -1895,7 +1915,7 @@ static int __init init(void)
}
for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
- INIT_LIST_HEAD(&futex_queues[i].chain);
+ plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
spin_lock_init(&futex_queues[i].lock);
}
return 0;
--
Pierre Peiffer
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 2.6.21-rc4-mm1 2/4] Make futex_wait() use an hrtimer for timeout
2007-03-21 9:54 [PATCH 2.6.21-rc4-mm1 0/4] Futexes functionalities and improvements Pierre.Peiffer
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 1/4] futex priority based wakeup Pierre.Peiffer
@ 2007-03-21 9:54 ` Pierre.Peiffer
2007-03-26 9:57 ` Andrew Morton
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 3/4] futex_requeue_pi optimization Pierre.Peiffer
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes Pierre.Peiffer
3 siblings, 1 reply; 12+ messages in thread
From: Pierre.Peiffer @ 2007-03-21 9:54 UTC (permalink / raw)
To: akpm
Cc: mingo, drepper, linux-kernel, jean-pierre.dion, Sebastien Dugue,
Pierre Peiffer
[-- Attachment #1: futex_wait-use-hrtimer.diff --]
[-- Type: text/plain, Size: 8977 bytes --]
This patch modifies futex_wait() to use an hrtimer + schedule() in place of
schedule_timeout().
schedule_timeout() is tick based, therefore the timeout granularity is
the tick (1 ms, 4 ms or 10 ms depending on HZ). By using a high resolution
timer for timeout wakeup, we can attain a much finer timeout granularity
(in the microsecond range). This parallels what is already done for
futex_lock_pi().
The timeout passed to the syscall is no longer converted to jiffies
and is therefore passed to do_futex() and futex_wait() as an absolute
ktime_t therefore keeping nanosecond resolution.
Also this removes the need to pass the nanoseconds timeout part to
futex_lock_pi() in val2.
In futex_wait(), if there is no timeout then a regular schedule() is
performed. Otherwise, an hrtimer is fired before schedule() is called.
Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net>
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
---
include/linux/futex.h | 3 +
kernel/futex.c | 85 ++++++++++++++++++++++++--------------------------
kernel/futex_compat.c | 17 ++++------
3 files changed, 51 insertions(+), 54 deletions(-)
Index: b/kernel/futex.c
===================================================================
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1001,16 +1001,16 @@ static void unqueue_me_pi(struct futex_q
}
static long futex_wait_restart(struct restart_block *restart);
-static int futex_wait_abstime(u32 __user *uaddr, u32 val,
- int timed, unsigned long abs_time)
+static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
{
struct task_struct *curr = current;
DECLARE_WAITQUEUE(wait, curr);
struct futex_hash_bucket *hb;
struct futex_q q;
- unsigned long time_left = 0;
u32 uval;
int ret;
+ struct hrtimer_sleeper t;
+ int rem = 0;
q.pi_state = NULL;
retry:
@@ -1088,20 +1088,29 @@ static int futex_wait_abstime(u32 __user
* !plist_node_empty() is safe here without any lock.
* q.lock_ptr != 0 is not safe, because of ordering against wakeup.
*/
- time_left = 0;
if (likely(!plist_node_empty(&q.list))) {
- unsigned long rel_time;
+ if (!abs_time)
+ schedule();
+ else {
+ hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init_sleeper(&t, current);
+ t.timer.expires = *abs_time;
+
+ hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS);
+
+ /*
+ * the timer could have already expired, in which
+ * case current would be flagged for rescheduling.
+ * Don't bother calling schedule.
+ */
+ if (likely(t.task))
+ schedule();
- if (timed) {
- unsigned long now = jiffies;
- if (time_after(now, abs_time))
- rel_time = 0;
- else
- rel_time = abs_time - now;
- } else
- rel_time = MAX_SCHEDULE_TIMEOUT;
+ hrtimer_cancel(&t.timer);
- time_left = schedule_timeout(rel_time);
+ /* Flag if a timeout occured */
+ rem = (t.task == NULL);
+ }
}
__set_current_state(TASK_RUNNING);
@@ -1113,14 +1122,14 @@ static int futex_wait_abstime(u32 __user
/* If we were woken (and unqueued), we succeeded, whatever. */
if (!unqueue_me(&q))
return 0;
- if (time_left == 0)
+ if (rem)
return -ETIMEDOUT;
/*
* We expect signal_pending(current), but another thread may
* have handled it for us already.
*/
- if (time_left == MAX_SCHEDULE_TIMEOUT)
+ if (!abs_time)
return -ERESTARTSYS;
else {
struct restart_block *restart;
@@ -1128,8 +1137,7 @@ static int futex_wait_abstime(u32 __user
restart->fn = futex_wait_restart;
restart->arg0 = (unsigned long)uaddr;
restart->arg1 = (unsigned long)val;
- restart->arg2 = (unsigned long)timed;
- restart->arg3 = abs_time;
+ restart->arg2 = (unsigned long)abs_time;
return -ERESTART_RESTARTBLOCK;
}
@@ -1141,21 +1149,15 @@ static int futex_wait_abstime(u32 __user
return ret;
}
-static int futex_wait(u32 __user *uaddr, u32 val, unsigned long rel_time)
-{
- int timed = (rel_time != MAX_SCHEDULE_TIMEOUT);
- return futex_wait_abstime(uaddr, val, timed, jiffies+rel_time);
-}
static long futex_wait_restart(struct restart_block *restart)
{
u32 __user *uaddr = (u32 __user *)restart->arg0;
u32 val = (u32)restart->arg1;
- int timed = (int)restart->arg2;
- unsigned long abs_time = restart->arg3;
+ ktime_t *abs_time = (ktime_t *)restart->arg2;
restart->fn = do_no_restart_syscall;
- return (long)futex_wait_abstime(uaddr, val, timed, abs_time);
+ return (long)futex_wait(uaddr, val, abs_time);
}
@@ -1165,8 +1167,8 @@ static long futex_wait_restart(struct re
* if there are waiters then it will block, it does PI, etc. (Due to
* races the kernel might see a 0 value of the futex too.)
*/
-static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
- long nsec, int trylock)
+static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
+ int trylock)
{
struct hrtimer_sleeper timeout, *to = NULL;
struct task_struct *curr = current;
@@ -1178,11 +1180,11 @@ static int futex_lock_pi(u32 __user *uad
if (refill_pi_state_cache())
return -ENOMEM;
- if (sec != MAX_SCHEDULE_TIMEOUT) {
+ if (time) {
to = &timeout;
hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
hrtimer_init_sleeper(to, current);
- to->timer.expires = ktime_set(sec, nsec);
+ to->timer.expires = *time;
}
q.pi_state = NULL;
@@ -1818,7 +1820,7 @@ void exit_robust_list(struct task_struct
}
}
-long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3)
{
int ret;
@@ -1844,13 +1846,13 @@ long do_futex(u32 __user *uaddr, int op,
ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
break;
case FUTEX_LOCK_PI:
- ret = futex_lock_pi(uaddr, val, timeout, val2, 0);
+ ret = futex_lock_pi(uaddr, val, timeout, 0);
break;
case FUTEX_UNLOCK_PI:
ret = futex_unlock_pi(uaddr);
break;
case FUTEX_TRYLOCK_PI:
- ret = futex_lock_pi(uaddr, 0, timeout, val2, 1);
+ ret = futex_lock_pi(uaddr, 0, timeout, 1);
break;
default:
ret = -ENOSYS;
@@ -1863,21 +1865,18 @@ asmlinkage long sys_futex(u32 __user *ua
struct timespec __user *utime, u32 __user *uaddr2,
u32 val3)
{
- struct timespec t;
- unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+ struct timespec ts;
+ ktime_t t, *tp = NULL;
u32 val2 = 0;
if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
- if (copy_from_user(&t, utime, sizeof(t)) != 0)
+ if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
return -EFAULT;
- if (!timespec_valid(&t))
+ if (!timespec_valid(&ts))
return -EINVAL;
if (op == FUTEX_WAIT)
- timeout = timespec_to_jiffies(&t) + 1;
- else {
- timeout = t.tv_sec;
- val2 = t.tv_nsec;
- }
+ t = ktime_add(ktime_get(), timespec_to_ktime(ts));
+ tp = &t;
}
/*
* requeue parameter in 'utime' if op == FUTEX_REQUEUE.
@@ -1885,7 +1884,7 @@ asmlinkage long sys_futex(u32 __user *ua
if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
val2 = (u32) (unsigned long) utime;
- return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
+ return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}
static int futexfs_get_sb(struct file_system_type *fs_type,
Index: b/include/linux/futex.h
===================================================================
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -1,6 +1,7 @@
#ifndef _LINUX_FUTEX_H
#define _LINUX_FUTEX_H
+#include <linux/ktime.h>
#include <linux/sched.h>
/* Second argument to futex syscall */
@@ -94,7 +95,7 @@ struct robust_list_head {
#define ROBUST_LIST_LIMIT 2048
#ifdef __KERNEL__
-long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
extern int
Index: b/kernel/futex_compat.c
===================================================================
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -141,24 +141,21 @@ asmlinkage long compat_sys_futex(u32 __u
struct compat_timespec __user *utime, u32 __user *uaddr2,
u32 val3)
{
- struct timespec t;
- unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+ struct timespec ts;
+ ktime_t t, *tp = NULL;
int val2 = 0;
if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
- if (get_compat_timespec(&t, utime))
+ if (get_compat_timespec(&ts, utime))
return -EFAULT;
- if (!timespec_valid(&t))
+ if (!timespec_valid(&ts))
return -EINVAL;
if (op == FUTEX_WAIT)
- timeout = timespec_to_jiffies(&t) + 1;
- else {
- timeout = t.tv_sec;
- val2 = t.tv_nsec;
- }
+ t = ktime_add(ktime_get(), timespec_to_ktime(ts));
+ tp = &t;
}
if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
val2 = (int) (unsigned long) utime;
- return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
+ return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}
--
Pierre Peiffer
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 2.6.21-rc4-mm1 3/4] futex_requeue_pi optimization
2007-03-21 9:54 [PATCH 2.6.21-rc4-mm1 0/4] Futexes functionalities and improvements Pierre.Peiffer
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 1/4] futex priority based wakeup Pierre.Peiffer
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 2/4] Make futex_wait() use an hrtimer for timeout Pierre.Peiffer
@ 2007-03-21 9:54 ` Pierre.Peiffer
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes Pierre.Peiffer
3 siblings, 0 replies; 12+ messages in thread
From: Pierre.Peiffer @ 2007-03-21 9:54 UTC (permalink / raw)
To: akpm; +Cc: mingo, drepper, linux-kernel, jean-pierre.dion, Pierre Peiffer
[-- Attachment #1: futex-requeue-pi.diff --]
[-- Type: text/plain, Size: 26758 bytes --]
This patch provides the futex_requeue_pi functionality, which allows some
threads waiting on a normal futex to be requeued on the wait-queue of
a PI-futex.
This provides an optimization, already used for (normal) futexes, to be used with
the PI-futexes.
This optimization is currently used by the glibc in pthread_broadcast, when
using "normal" mutexes. With futex_requeue_pi, it can be used with PRIO_INHERIT
mutexes too.
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
---
include/linux/futex.h | 9
kernel/futex.c | 541 +++++++++++++++++++++++++++++++++++++++++++-----
kernel/futex_compat.c | 3
kernel/rtmutex.c | 41 ---
kernel/rtmutex_common.h | 34 +++
5 files changed, 540 insertions(+), 88 deletions(-)
Index: b/include/linux/futex.h
===================================================================
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -16,6 +16,7 @@
#define FUTEX_LOCK_PI 6
#define FUTEX_UNLOCK_PI 7
#define FUTEX_TRYLOCK_PI 8
+#define FUTEX_CMP_REQUEUE_PI 9
/*
* Support for robust futexes: the kernel cleans up held futexes at
@@ -84,9 +85,14 @@ struct robust_list_head {
#define FUTEX_OWNER_DIED 0x40000000
/*
+ * Some processes have been requeued on this PI-futex
+ */
+#define FUTEX_WAITER_REQUEUED 0x20000000
+
+/*
* The rest of the robust-futex field is for the TID:
*/
-#define FUTEX_TID_MASK 0x3fffffff
+#define FUTEX_TID_MASK 0x0fffffff
/*
* This limit protects against a deliberately circular list.
@@ -110,6 +116,7 @@ handle_futex_death(u32 __user *uaddr, st
* We set bit 0 to indicate if it's an inode-based key.
*/
union futex_key {
+ u32 __user *uaddr;
struct {
unsigned long pgoff;
struct inode *inode;
Index: b/kernel/futex.c
===================================================================
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -53,6 +53,12 @@
#include "rtmutex_common.h"
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+# include "rtmutex-debug.h"
+#else
+# include "rtmutex.h"
+#endif
+
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
/*
@@ -102,6 +108,12 @@ struct futex_q {
/* Optional priority inheritance state: */
struct futex_pi_state *pi_state;
struct task_struct *task;
+
+ /*
+ * This waiter is used in case of requeue from a
+ * normal futex to a PI-futex
+ */
+ struct rt_mutex_waiter waiter;
};
/*
@@ -180,6 +192,9 @@ int get_futex_key(u32 __user *uaddr, uni
if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
+ /* Save the user address in the ley */
+ key->uaddr = uaddr;
+
/*
* Private mappings are handled in a simple way.
*
@@ -439,7 +454,8 @@ void exit_pi_state_list(struct task_stru
}
static int
-lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
+lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
+ union futex_key *key, struct futex_pi_state **ps)
{
struct futex_pi_state *pi_state = NULL;
struct futex_q *this, *next;
@@ -450,7 +466,7 @@ lookup_pi_state(u32 uval, struct futex_h
head = &hb->chain;
plist_for_each_entry_safe(this, next, head, list) {
- if (match_futex(&this->key, &me->key)) {
+ if (match_futex(&this->key, key)) {
/*
* Another waiter already exists - bump up
* the refcount and return its pi_state:
@@ -465,7 +481,7 @@ lookup_pi_state(u32 uval, struct futex_h
WARN_ON(!atomic_read(&pi_state->refcount));
atomic_inc(&pi_state->refcount);
- me->pi_state = pi_state;
+ *ps = pi_state;
return 0;
}
@@ -492,7 +508,7 @@ lookup_pi_state(u32 uval, struct futex_h
rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
/* Store the key for possible exit cleanups: */
- pi_state->key = me->key;
+ pi_state->key = *key;
spin_lock_irq(&p->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
@@ -502,7 +518,7 @@ lookup_pi_state(u32 uval, struct futex_h
put_task_struct(p);
- me->pi_state = pi_state;
+ *ps = pi_state;
return 0;
}
@@ -562,6 +578,8 @@ static int wake_futex_pi(u32 __user *uad
*/
if (!(uval & FUTEX_OWNER_DIED)) {
newval = FUTEX_WAITERS | new_owner->pid;
+ /* Keep the FUTEX_WAITER_REQUEUED flag if it was set */
+ newval |= (uval & FUTEX_WAITER_REQUEUED);
pagefault_disable();
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
@@ -666,6 +684,254 @@ out:
}
/*
+ * Called from futex_requeue_pi.
+ * Set FUTEX_WAITERS and FUTEX_WAITER_REQUEUED flags on the
+ * PI-futex value; search its associated pi_state if an owner exist
+ * or create a new one without owner.
+ */
+static inline int
+lookup_pi_state_for_requeue(u32 __user *uaddr, struct futex_hash_bucket *hb,
+ union futex_key *key,
+ struct futex_pi_state **pi_state)
+{
+ u32 curval, uval, newval;
+
+retry:
+ /*
+ * We can't handle a fault cleanly because we can't
+ * release the locks here. Simply return the fault.
+ */
+ if (get_futex_value_locked(&curval, uaddr))
+ return -EFAULT;
+
+ /* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
+ if ((curval & (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED))
+ != (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED)) {
+ /*
+ * No waiters yet, we prepare the futex to have some waiters.
+ */
+
+ uval = curval;
+ newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
+
+ pagefault_disable();
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ pagefault_enable();
+
+ if (unlikely(curval == -EFAULT))
+ return -EFAULT;
+ if (unlikely(curval != uval))
+ goto retry;
+ }
+
+ if (!(curval & FUTEX_TID_MASK)
+ || lookup_pi_state(curval, hb, key, pi_state)) {
+ /* the futex has no owner (yet) or the lookup failed:
+ allocate one pi_state without owner */
+
+ *pi_state = alloc_pi_state();
+
+ /* Already stores the key: */
+ (*pi_state)->key = *key;
+
+ /* init the mutex without owner */
+ __rt_mutex_init(&(*pi_state)->pi_mutex, NULL);
+ }
+
+ return 0;
+}
+
+/*
+ * Keep the first nr_wake waiter from futex1, wake up one,
+ * and requeue the next nr_requeue waiters following hashed on
+ * one physical page to another physical page (PI-futex uaddr2)
+ */
+static int futex_requeue_pi(u32 __user *uaddr1, u32 __user *uaddr2,
+ int nr_wake, int nr_requeue, u32 *cmpval)
+{
+ union futex_key key1, key2;
+ struct futex_hash_bucket *hb1, *hb2;
+ struct plist_head *head1;
+ struct futex_q *this, *next;
+ struct futex_pi_state *pi_state2 = NULL;
+ struct rt_mutex_waiter *waiter, *top_waiter = NULL;
+ struct rt_mutex *lock2 = NULL;
+ int ret, drop_count = 0;
+
+ if (refill_pi_state_cache())
+ return -ENOMEM;
+
+retry:
+ /*
+ * First take all the futex related locks:
+ */
+ down_read(¤t->mm->mmap_sem);
+
+ ret = get_futex_key(uaddr1, &key1);
+ if (unlikely(ret != 0))
+ goto out;
+ ret = get_futex_key(uaddr2, &key2);
+ if (unlikely(ret != 0))
+ goto out;
+
+ hb1 = hash_futex(&key1);
+ hb2 = hash_futex(&key2);
+
+ double_lock_hb(hb1, hb2);
+
+ if (likely(cmpval != NULL)) {
+ u32 curval;
+
+ ret = get_futex_value_locked(&curval, uaddr1);
+
+ if (unlikely(ret)) {
+ spin_unlock(&hb1->lock);
+ if (hb1 != hb2)
+ spin_unlock(&hb2->lock);
+
+ /*
+ * If we would have faulted, release mmap_sem, fault
+ * it in and start all over again.
+ */
+ up_read(¤t->mm->mmap_sem);
+
+ ret = get_user(curval, uaddr1);
+
+ if (!ret)
+ goto retry;
+
+ return ret;
+ }
+ if (curval != *cmpval) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+ }
+
+ head1 = &hb1->chain;
+ plist_for_each_entry_safe(this, next, head1, list) {
+ if (!match_futex (&this->key, &key1))
+ continue;
+ if (++ret <= nr_wake) {
+ wake_futex(this);
+ } else {
+ /*
+ * FIRST: get and set the pi_state
+ */
+ if (!pi_state2) {
+ int s;
+ /* do this only the first time we requeue someone */
+ s = lookup_pi_state_for_requeue(uaddr2, hb2,
+ &key2, &pi_state2);
+ if (s) {
+ ret = s;
+ goto out_unlock;
+ }
+
+ lock2 = &pi_state2->pi_mutex;
+ spin_lock(&lock2->wait_lock);
+
+ /* Save the top waiter of the wait_list */
+ if (rt_mutex_has_waiters(lock2))
+ top_waiter = rt_mutex_top_waiter(lock2);
+ } else
+ atomic_inc(&pi_state2->refcount);
+
+
+ this->pi_state = pi_state2;
+
+ /*
+ * SECOND: requeue futex_q to the correct hashbucket
+ */
+
+ /*
+ * If key1 and key2 hash to the same bucket, no need to
+ * requeue.
+ */
+ if (likely(head1 != &hb2->chain)) {
+ plist_del(&this->list, &hb1->chain);
+ plist_add(&this->list, &hb2->chain);
+ this->lock_ptr = &hb2->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+ this->list.plist.lock = &hb2->lock;
+#endif
+ }
+ this->key = key2;
+ get_futex_key_refs(&key2);
+ drop_count++;
+
+
+ /*
+ * THIRD: queue it to lock2
+ */
+ spin_lock_irq(&this->task->pi_lock);
+ waiter = &this->waiter;
+ waiter->task = this->task;
+ waiter->lock = lock2;
+ plist_node_init(&waiter->list_entry, this->task->prio);
+ plist_node_init(&waiter->pi_list_entry, this->task->prio);
+ plist_add(&waiter->list_entry, &lock2->wait_list);
+ this->task->pi_blocked_on = waiter;
+ spin_unlock_irq(&this->task->pi_lock);
+
+ if (ret - nr_wake >= nr_requeue)
+ break;
+ }
+ }
+
+ /* If we've requeued some tasks and the top_waiter of the rt_mutex
+ has changed, we must adjust the priority of the owner, if any */
+ if (drop_count) {
+ struct task_struct *owner = rt_mutex_owner(lock2);
+ if (owner &&
+ (top_waiter != (waiter = rt_mutex_top_waiter(lock2)))) {
+ int chain_walk = 0;
+
+ spin_lock_irq(&owner->pi_lock);
+ if (top_waiter)
+ plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
+ else
+ /*
+ * There was no waiters before the requeue,
+ * the flag must be updated
+ */
+ mark_rt_mutex_waiters(lock2);
+
+ plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
+ __rt_mutex_adjust_prio(owner);
+ if (owner->pi_blocked_on) {
+ chain_walk = 1;
+ get_task_struct(owner);
+ }
+
+ spin_unlock_irq(&owner->pi_lock);
+ spin_unlock(&lock2->wait_lock);
+
+ if (chain_walk)
+ rt_mutex_adjust_prio_chain(owner, 0, lock2, NULL,
+ current);
+ } else {
+ /* No owner or the top_waiter does not change */
+ mark_rt_mutex_waiters(lock2);
+ spin_unlock(&lock2->wait_lock);
+ }
+ }
+
+out_unlock:
+ spin_unlock(&hb1->lock);
+ if (hb1 != hb2)
+ spin_unlock(&hb2->lock);
+
+ /* drop_futex_key_refs() must be called outside the spinlocks. */
+ while (--drop_count >= 0)
+ drop_futex_key_refs(&key1);
+
+out:
+ up_read(¤t->mm->mmap_sem);
+ return ret;
+}
+
+/*
* Wake up all waiters hashed on the physical page that is mapped
* to this virtual address:
*/
@@ -984,9 +1250,10 @@ static int unqueue_me(struct futex_q *q)
/*
* PI futexes can not be requeued and must remove themself from the
- * hash bucket. The hash bucket lock is held on entry and dropped here.
+ * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
+ * and dropped here.
*/
-static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb)
+static void unqueue_me_pi(struct futex_q *q)
{
WARN_ON(plist_node_empty(&q->list));
plist_del(&q->list, &q->list.plist);
@@ -995,11 +1262,65 @@ static void unqueue_me_pi(struct futex_q
free_pi_state(q->pi_state);
q->pi_state = NULL;
- spin_unlock(&hb->lock);
+ spin_unlock(q->lock_ptr);
drop_futex_key_refs(&q->key);
}
+/*
+ * Fixup the pi_state owner with current.
+ *
+ * The cur->mm semaphore must be held, it is released at return of this
+ * function.
+ */
+static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+ struct futex_hash_bucket *hb,
+ struct task_struct *curr)
+{
+ u32 newtid = curr->pid | FUTEX_WAITERS;
+ struct futex_pi_state *pi_state = q->pi_state;
+ u32 uval, curval, newval;
+ int ret;
+
+ /* Owner died? */
+ if (pi_state->owner != NULL) {
+ spin_lock_irq(&pi_state->owner->pi_lock);
+ WARN_ON(list_empty(&pi_state->list));
+ list_del_init(&pi_state->list);
+ spin_unlock_irq(&pi_state->owner->pi_lock);
+ } else
+ newtid |= FUTEX_OWNER_DIED;
+
+ pi_state->owner = curr;
+
+ spin_lock_irq(&curr->pi_lock);
+ WARN_ON(!list_empty(&pi_state->list));
+ list_add(&pi_state->list, &curr->pi_state_list);
+ spin_unlock_irq(&curr->pi_lock);
+
+ /* Unqueue and drop the lock */
+ unqueue_me_pi(q);
+ up_read(&curr->mm->mmap_sem);
+ /*
+ * We own it, so we have to replace the pending owner
+ * TID. This must be atomic as we have preserve the
+ * owner died bit here.
+ */
+ ret = get_user(uval, uaddr);
+ while (!ret) {
+ newval = (uval & FUTEX_OWNER_DIED) | newtid;
+ newval |= (uval & FUTEX_WAITER_REQUEUED);
+ curval = futex_atomic_cmpxchg_inatomic(uaddr,
+ uval, newval);
+ if (curval == -EFAULT)
+ ret = -EFAULT;
+ if (curval == uval)
+ break;
+ uval = curval;
+ }
+ return ret;
+}
+
static long futex_wait_restart(struct restart_block *restart);
static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
{
@@ -1009,7 +1330,7 @@ static int futex_wait(u32 __user *uaddr,
struct futex_q q;
u32 uval;
int ret;
- struct hrtimer_sleeper t;
+ struct hrtimer_sleeper t, *to = NULL;
int rem = 0;
q.pi_state = NULL;
@@ -1063,6 +1384,14 @@ static int futex_wait(u32 __user *uaddr,
if (uval != val)
goto out_unlock_release_sem;
+ /*
+ * This rt_mutex_waiter structure is prepared here and will
+ * be used only if this task is requeued from a normal futex to
+ * a PI-futex with futex_requeue_pi.
+ */
+ debug_rt_mutex_init_waiter(&q.waiter);
+ q.waiter.task = NULL;
+
/* Only actually queue if *uaddr contained val. */
__queue_me(&q, hb);
@@ -1092,6 +1421,7 @@ static int futex_wait(u32 __user *uaddr,
if (!abs_time)
schedule();
else {
+ to = &t;
hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
hrtimer_init_sleeper(&t, current);
t.timer.expires = *abs_time;
@@ -1119,6 +1449,66 @@ static int futex_wait(u32 __user *uaddr,
* we are the only user of it.
*/
+ if (q.pi_state) {
+ /*
+ * We were woken but have been requeued on a PI-futex.
+ * We have to complete the lock acquisition by taking
+ * the rtmutex.
+ */
+
+ struct rt_mutex *lock = &q.pi_state->pi_mutex;
+
+ spin_lock(&lock->wait_lock);
+ if (unlikely(q.waiter.task)) {
+ remove_waiter(lock, &q.waiter);
+ }
+ spin_unlock(&lock->wait_lock);
+
+ if (rem)
+ ret = -ETIMEDOUT;
+ else
+ ret = rt_mutex_timed_lock(lock, to, 1);
+
+ down_read(&curr->mm->mmap_sem);
+ spin_lock(q.lock_ptr);
+
+ /*
+ * Got the lock. We might not be the anticipated owner if we
+ * did a lock-steal - fix up the PI-state in that case.
+ */
+ if (!ret && q.pi_state->owner != curr) {
+ /*
+ * We MUST play with the futex we were requeued on,
+ * NOT the current futex.
+ * We can retrieve it from the key of the pi_state
+ */
+ uaddr = q.pi_state->key.uaddr;
+
+ /* mmap_sem and hash_bucket lock are unlocked at
+ return of this function */
+ ret = fixup_pi_state_owner(uaddr, &q, hb, curr);
+ } else {
+ /*
+ * Catch the rare case, where the lock was released
+ * when we were on the way back before we locked
+ * the hash bucket.
+ */
+ if (ret && q.pi_state->owner == curr) {
+ if (rt_mutex_trylock(&q.pi_state->pi_mutex))
+ ret = 0;
+ }
+ /* Unqueue and drop the lock */
+ unqueue_me_pi(&q);
+ up_read(&curr->mm->mmap_sem);
+ }
+
+ debug_rt_mutex_free_waiter(&q.waiter);
+
+ return ret;
+ }
+
+ debug_rt_mutex_free_waiter(&q.waiter);
+
/* If we were woken (and unqueued), we succeeded, whatever. */
if (!unqueue_me(&q))
return 0;
@@ -1161,6 +1551,51 @@ static long futex_wait_restart(struct re
}
+static void set_pi_futex_owner(struct futex_hash_bucket *hb,
+ union futex_key *key, struct task_struct *p)
+{
+ struct plist_head *head;
+ struct futex_q *this, *next;
+ struct futex_pi_state *pi_state = NULL;
+ struct rt_mutex *lock;
+
+ /* Search a waiter that should already exists */
+
+ head = &hb->chain;
+
+ plist_for_each_entry_safe(this, next, head, list) {
+ if (match_futex (&this->key, key)) {
+ pi_state = this->pi_state;
+ break;
+ }
+ }
+
+ BUG_ON(!pi_state);
+
+ /* set p as pi_state's owner */
+ lock = &pi_state->pi_mutex;
+
+ spin_lock(&lock->wait_lock);
+ spin_lock_irq(&p->pi_lock);
+
+ list_add(&pi_state->list, &p->pi_state_list);
+ pi_state->owner = p;
+
+
+ /* set p as pi_mutex's owner */
+ debug_rt_mutex_proxy_lock(lock, p);
+ WARN_ON(rt_mutex_owner(lock));
+ rt_mutex_set_owner(lock, p, 0);
+ rt_mutex_deadlock_account_lock(lock, p);
+
+ plist_add(&rt_mutex_top_waiter(lock)->pi_list_entry,
+ &p->pi_waiters);
+ __rt_mutex_adjust_prio(p);
+
+ spin_unlock_irq(&p->pi_lock);
+ spin_unlock(&lock->wait_lock);
+}
+
/*
* Userspace tried a 0 -> TID atomic transition of the futex value
* and failed. The kernel side here does the whole locking operation:
@@ -1175,7 +1610,7 @@ static int futex_lock_pi(u32 __user *uad
struct futex_hash_bucket *hb;
u32 uval, newval, curval;
struct futex_q q;
- int ret, attempt = 0;
+ int ret, lock_held, attempt = 0;
if (refill_pi_state_cache())
return -ENOMEM;
@@ -1198,6 +1633,8 @@ static int futex_lock_pi(u32 __user *uad
hb = queue_lock(&q, -1, NULL);
retry_locked:
+ lock_held = 0;
+
/*
* To avoid races, we attempt to take the lock here again
* (by doing a 0 -> TID atomic cmpxchg), while holding all
@@ -1216,7 +1653,16 @@ static int futex_lock_pi(u32 __user *uad
if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
if (!detect && 0)
force_sig(SIGKILL, current);
- ret = -EDEADLK;
+ /*
+ * Normally, this check is done in user space.
+ * In case of requeue, the owner may attempt to lock this futex,
+ * even if the ownership has already been given by the previous
+ * waker.
+ * In the usual case, this is a case of deadlock, but not in case
+ * of REQUEUE_PI.
+ */
+ if (!(curval & FUTEX_WAITER_REQUEUED))
+ ret = -EDEADLK;
goto out_unlock_release_sem;
}
@@ -1228,7 +1674,18 @@ static int futex_lock_pi(u32 __user *uad
goto out_unlock_release_sem;
uval = curval;
- newval = uval | FUTEX_WAITERS;
+ /*
+ * In case of a requeue, check if there already is an owner
+ * If not, just take the futex.
+ */
+ if ((curval & FUTEX_WAITER_REQUEUED) && !(curval & FUTEX_TID_MASK)) {
+ /* set current as futex owner */
+ newval = curval | current->pid;
+ lock_held = 1;
+ } else
+ /* Set the WAITERS flag, so the owner will know it has someone
+ to wake at next unlock */
+ newval = curval | FUTEX_WAITERS;
pagefault_disable();
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
@@ -1239,11 +1696,16 @@ static int futex_lock_pi(u32 __user *uad
if (unlikely(curval != uval))
goto retry_locked;
+ if (lock_held) {
+ set_pi_futex_owner(hb, &q.key, curr);
+ goto out_unlock_release_sem;
+ }
+
/*
* We dont have the lock. Look up the PI state (or create it if
* we are the first waiter):
*/
- ret = lookup_pi_state(uval, hb, &q);
+ ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
if (unlikely(ret)) {
/*
@@ -1306,45 +1768,10 @@ static int futex_lock_pi(u32 __user *uad
* Got the lock. We might not be the anticipated owner if we
* did a lock-steal - fix up the PI-state in that case.
*/
- if (!ret && q.pi_state->owner != curr) {
- u32 newtid = current->pid | FUTEX_WAITERS;
-
- /* Owner died? */
- if (q.pi_state->owner != NULL) {
- spin_lock_irq(&q.pi_state->owner->pi_lock);
- WARN_ON(list_empty(&q.pi_state->list));
- list_del_init(&q.pi_state->list);
- spin_unlock_irq(&q.pi_state->owner->pi_lock);
- } else
- newtid |= FUTEX_OWNER_DIED;
-
- q.pi_state->owner = current;
-
- spin_lock_irq(¤t->pi_lock);
- WARN_ON(!list_empty(&q.pi_state->list));
- list_add(&q.pi_state->list, ¤t->pi_state_list);
- spin_unlock_irq(¤t->pi_lock);
-
- /* Unqueue and drop the lock */
- unqueue_me_pi(&q, hb);
- up_read(&curr->mm->mmap_sem);
- /*
- * We own it, so we have to replace the pending owner
- * TID. This must be atomic as we have preserve the
- * owner died bit here.
- */
- ret = get_user(uval, uaddr);
- while (!ret) {
- newval = (uval & FUTEX_OWNER_DIED) | newtid;
- curval = futex_atomic_cmpxchg_inatomic(uaddr,
- uval, newval);
- if (curval == -EFAULT)
- ret = -EFAULT;
- if (curval == uval)
- break;
- uval = curval;
- }
- } else {
+ if (!ret && q.pi_state->owner != curr)
+ /* mmap_sem is unlocked at return of this function */
+ ret = fixup_pi_state_owner(uaddr, &q, hb, curr);
+ else {
/*
* Catch the rare case, where the lock was released
* when we were on the way back before we locked
@@ -1355,7 +1782,7 @@ static int futex_lock_pi(u32 __user *uad
ret = 0;
}
/* Unqueue and drop the lock */
- unqueue_me_pi(&q, hb);
+ unqueue_me_pi(&q);
up_read(&curr->mm->mmap_sem);
}
@@ -1724,6 +2151,8 @@ retry:
* userspace.
*/
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
+ /* Also keep the FUTEX_WAITER_REQUEUED flag if set */
+ mval |= (uval & FUTEX_WAITER_REQUEUED);
nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
if (nval == -EFAULT)
@@ -1854,6 +2283,9 @@ long do_futex(u32 __user *uaddr, int op,
case FUTEX_TRYLOCK_PI:
ret = futex_lock_pi(uaddr, 0, timeout, 1);
break;
+ case FUTEX_CMP_REQUEUE_PI:
+ ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3);
+ break;
default:
ret = -ENOSYS;
}
@@ -1881,7 +2313,8 @@ asmlinkage long sys_futex(u32 __user *ua
/*
* requeue parameter in 'utime' if op == FUTEX_REQUEUE.
*/
- if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
+ if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
+ || op == FUTEX_CMP_REQUEUE_PI)
val2 = (u32) (unsigned long) utime;
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
Index: b/kernel/rtmutex.c
===================================================================
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -56,7 +56,7 @@
* state.
*/
-static void
+void
rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner,
unsigned long mask)
{
@@ -81,29 +81,6 @@ static void fixup_rt_mutex_waiters(struc
}
/*
- * We can speed up the acquire/release, if the architecture
- * supports cmpxchg and if there's no debugging state to be set up
- */
-#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
-# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
-static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
-{
- unsigned long owner, *p = (unsigned long *) &lock->owner;
-
- do {
- owner = *p;
- } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
-}
-#else
-# define rt_mutex_cmpxchg(l,c,n) (0)
-static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
-{
- lock->owner = (struct task_struct *)
- ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
-}
-#endif
-
-/*
* Calculate task priority from the waiter list priority
*
* Return task->normal_prio when the waiter list is empty or when
@@ -123,7 +100,7 @@ int rt_mutex_getprio(struct task_struct
*
* This can be both boosting and unboosting. task->pi_lock must be held.
*/
-static void __rt_mutex_adjust_prio(struct task_struct *task)
+void __rt_mutex_adjust_prio(struct task_struct *task)
{
int prio = rt_mutex_getprio(task);
@@ -159,11 +136,11 @@ int max_lock_depth = 1024;
* Decreases task's usage by one - may thus free the task.
* Returns 0 or -EDEADLK.
*/
-static int rt_mutex_adjust_prio_chain(struct task_struct *task,
- int deadlock_detect,
- struct rt_mutex *orig_lock,
- struct rt_mutex_waiter *orig_waiter,
- struct task_struct *top_task)
+int rt_mutex_adjust_prio_chain(struct task_struct *task,
+ int deadlock_detect,
+ struct rt_mutex *orig_lock,
+ struct rt_mutex_waiter *orig_waiter,
+ struct task_struct *top_task)
{
struct rt_mutex *lock;
struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
@@ -524,8 +501,8 @@ static void wakeup_next_waiter(struct rt
*
* Must be called with lock->wait_lock held
*/
-static void remove_waiter(struct rt_mutex *lock,
- struct rt_mutex_waiter *waiter)
+void remove_waiter(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter)
{
int first = (waiter == rt_mutex_top_waiter(lock));
struct task_struct *owner = rt_mutex_owner(lock);
Index: b/kernel/rtmutex_common.h
===================================================================
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -113,6 +113,29 @@ static inline unsigned long rt_mutex_own
}
/*
+ * We can speed up the acquire/release, if the architecture
+ * supports cmpxchg and if there's no debugging state to be set up
+ */
+#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
+# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
+static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
+{
+ unsigned long owner, *p = (unsigned long *) &lock->owner;
+
+ do {
+ owner = *p;
+ } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
+}
+#else
+# define rt_mutex_cmpxchg(l,c,n) (0)
+static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
+{
+ lock->owner = (struct task_struct *)
+ ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
+}
+#endif
+
+/*
* PI-futex support (proxy locking functions, etc.):
*/
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
@@ -120,4 +143,15 @@ extern void rt_mutex_init_proxy_locked(s
struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner);
+
+extern void rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner,
+ unsigned long mask);
+extern void __rt_mutex_adjust_prio(struct task_struct *task);
+extern int rt_mutex_adjust_prio_chain(struct task_struct *task,
+ int deadlock_detect,
+ struct rt_mutex *orig_lock,
+ struct rt_mutex_waiter *orig_waiter,
+ struct task_struct *top_task);
+extern void remove_waiter(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter);
#endif
Index: b/kernel/futex_compat.c
===================================================================
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -154,7 +154,8 @@ asmlinkage long compat_sys_futex(u32 __u
t = ktime_add(ktime_get(), timespec_to_ktime(ts));
tp = &t;
}
- if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
+ if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
+ || op == FUTEX_CMP_REQUEUE_PI)
val2 = (int) (unsigned long) utime;
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
--
Pierre Peiffer
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes
2007-03-21 9:54 [PATCH 2.6.21-rc4-mm1 0/4] Futexes functionalities and improvements Pierre.Peiffer
` (2 preceding siblings ...)
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 3/4] futex_requeue_pi optimization Pierre.Peiffer
@ 2007-03-21 9:54 ` Pierre.Peiffer
2007-03-26 11:20 ` Andrew Morton
2007-03-27 11:07 ` Jakub Jelinek
3 siblings, 2 replies; 12+ messages in thread
From: Pierre.Peiffer @ 2007-03-21 9:54 UTC (permalink / raw)
To: akpm; +Cc: mingo, drepper, linux-kernel, jean-pierre.dion, Pierre Peiffer
[-- Attachment #1: futex-64bit.patch --]
[-- Type: text/plain, Size: 27462 bytes --]
This last patch is an adaptation of the sys_futex64 syscall provided in -rt
patch (originally written by Ingo Molnar). It allows the use of 64-bit futex.
I have re-worked most of the code to avoid the duplication of the code.
It does not provide the functionality for all architectures (only for x64 for now).
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
---
include/asm-x86_64/futex.h | 113 ++++++++++++++++++
include/asm-x86_64/unistd.h | 4
include/linux/futex.h | 9 -
include/linux/syscalls.h | 3
kernel/futex.c | 264 +++++++++++++++++++++++++++++++-------------
kernel/futex_compat.c | 3
kernel/sys_ni.c | 1
7 files changed, 313 insertions(+), 84 deletions(-)
Index: b/include/asm-x86_64/futex.h
===================================================================
--- a/include/asm-x86_64/futex.h
+++ b/include/asm-x86_64/futex.h
@@ -41,6 +41,39 @@
"=&r" (tem) \
: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
+#define __futex_atomic_op1_64(insn, ret, oldval, uaddr, oparg) \
+ __asm__ __volatile ( \
+"1: " insn "\n" \
+"2: .section .fixup,\"ax\"\n\
+3: movq %3, %1\n\
+ jmp 2b\n\
+ .previous\n\
+ .section __ex_table,\"a\"\n\
+ .align 8\n\
+ .quad 1b,3b\n\
+ .previous" \
+ : "=r" (oldval), "=r" (ret), "=m" (*uaddr) \
+ : "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0))
+
+#define __futex_atomic_op2_64(insn, ret, oldval, uaddr, oparg) \
+ __asm__ __volatile ( \
+"1: movq %2, %0\n\
+ movq %0, %3\n" \
+ insn "\n" \
+"2: " LOCK_PREFIX "cmpxchgq %3, %2\n\
+ jnz 1b\n\
+3: .section .fixup,\"ax\"\n\
+4: movq %5, %1\n\
+ jmp 3b\n\
+ .previous\n\
+ .section __ex_table,\"a\"\n\
+ .align 8\n\
+ .quad 1b,4b,2b,4b\n\
+ .previous" \
+ : "=&a" (oldval), "=&r" (ret), "=m" (*uaddr), \
+ "=&r" (tem) \
+ : "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
+
static inline int
futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
{
@@ -95,6 +128,60 @@ futex_atomic_op_inuser (int encoded_op,
}
static inline int
+futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
+{
+ int op = (encoded_op >> 28) & 7;
+ int cmp = (encoded_op >> 24) & 15;
+ u64 oparg = (encoded_op << 8) >> 20;
+ u64 cmparg = (encoded_op << 20) >> 20;
+ u64 oldval = 0, ret, tem;
+
+ if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+ oparg = 1 << oparg;
+
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u64)))
+ return -EFAULT;
+
+ inc_preempt_count();
+
+ switch (op) {
+ case FUTEX_OP_SET:
+ __futex_atomic_op1_64("xchgq %0, %2", ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ __futex_atomic_op1_64(LOCK_PREFIX "xaddq %0, %2", ret, oldval,
+ uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ __futex_atomic_op2_64("orq %4, %3", ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ __futex_atomic_op2_64("andq %4, %3", ret, oldval, uaddr, ~oparg);
+ break;
+ case FUTEX_OP_XOR:
+ __futex_atomic_op2_64("xorq %4, %3", ret, oldval, uaddr, oparg);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+
+ dec_preempt_count();
+
+ if (!ret) {
+ switch (cmp) {
+ case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+ case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+ case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+ case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+ case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+ case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+ default: ret = -ENOSYS;
+ }
+ }
+ return ret;
+}
+
+static inline int
futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
{
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
@@ -121,5 +208,31 @@ futex_atomic_cmpxchg_inatomic(int __user
return oldval;
}
+static inline u64
+futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
+{
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u64)))
+ return -EFAULT;
+
+ __asm__ __volatile__(
+ "1: " LOCK_PREFIX "cmpxchgq %3, %1 \n"
+
+ "2: .section .fixup, \"ax\" \n"
+ "3: mov %2, %0 \n"
+ " jmp 2b \n"
+ " .previous \n"
+
+ " .section __ex_table, \"a\" \n"
+ " .align 8 \n"
+ " .quad 1b,3b \n"
+ " .previous \n"
+
+ : "=a" (oldval), "=m" (*uaddr)
+ : "i" (-EFAULT), "r" (newval), "0" (oldval)
+ : "memory"
+ );
+
+ return oldval;
+}
#endif
#endif
Index: b/include/asm-x86_64/unistd.h
===================================================================
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -619,8 +619,10 @@ __SYSCALL(__NR_sync_file_range, sys_sync
__SYSCALL(__NR_vmsplice, sys_vmsplice)
#define __NR_move_pages 279
__SYSCALL(__NR_move_pages, sys_move_pages)
+#define __NR_futex64 280
+__SYSCALL(__NR_futex64, sys_futex64)
-#define __NR_syscall_max __NR_move_pages
+#define __NR_syscall_max __NR_futex64
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
Index: b/include/linux/syscalls.h
===================================================================
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -178,6 +178,9 @@ asmlinkage long sys_set_tid_address(int
asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
struct timespec __user *utime, u32 __user *uaddr2,
u32 val3);
+asmlinkage long sys_futex64(u64 __user *uaddr, int op, u64 val,
+ struct timespec __user *utime, u64 __user *uaddr2,
+ u64 val3);
asmlinkage long sys_init_module(void __user *umod, unsigned long len,
const char __user *uargs);
Index: b/kernel/futex.c
===================================================================
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -61,6 +61,44 @@
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
+#ifdef CONFIG_64BIT
+static inline unsigned long
+futex_cmpxchg_inatomic(unsigned long __user *uaddr, unsigned long oldval,
+ unsigned long newval, int futex64)
+{
+ if (futex64)
+ return futex_atomic_cmpxchg_inatomic64((u64 __user *)uaddr,
+ oldval, newval);
+ else {
+ u32 ov = oldval, nv = newval;
+ return futex_atomic_cmpxchg_inatomic((int __user *)uaddr, ov,
+ nv);
+ }
+}
+
+static inline int
+futex_get_user(unsigned long *val, unsigned long __user *uaddr, int futex64)
+{
+ int ret;
+
+ if (futex64)
+ ret = get_user(*val, uaddr);
+ else {
+ u32 __user *addr = (u32 __user *)uaddr;
+
+ ret = get_user(*val, addr);
+ }
+ return ret;
+}
+
+#else
+#define futex_cmpxchg_inatomic(uaddr, oldval, newval, futex64) \
+ futex_atomic_cmpxchg_inatomic((u32*)uaddr, oldval, newval)
+
+#define futex_get_user(val, uaddr, futex64) get_user(*val, uaddr)
+
+#endif
+
/*
* Priority Inheritance state:
*/
@@ -140,6 +178,7 @@ static struct futex_hash_bucket *hash_fu
return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
}
+
/*
* Return 1 if two futex_keys are equal, 0 otherwise.
*/
@@ -162,7 +201,7 @@ static inline int match_futex(union fute
*
* Should be called with ¤t->mm->mmap_sem but NOT any spinlocks.
*/
-int get_futex_key(u32 __user *uaddr, union futex_key *key)
+int get_futex_key(void __user *uaddr, union futex_key *key)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
@@ -271,13 +310,30 @@ void drop_futex_key_refs(union futex_key
}
EXPORT_SYMBOL_GPL(drop_futex_key_refs);
-static inline int get_futex_value_locked(u32 *dest, u32 __user *from)
+static inline int
+get_futex_value_locked(unsigned long *dest, unsigned long __user *from,
+ int futex64)
{
int ret;
+#ifdef CONFIG_64BIT
+ if (futex64) {
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(dest, from, sizeof(u64));
+ pagefault_enable();
+ } else {
+ u32 d;
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(&d, from, sizeof(u32));
+ pagefault_enable();
+
+ *dest = d;
+ }
+#else
pagefault_disable();
ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
pagefault_enable();
+#endif
return ret ? -EFAULT : 0;
}
@@ -550,11 +606,12 @@ static void wake_futex(struct futex_q *q
q->lock_ptr = NULL;
}
-static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
+static int wake_futex_pi(unsigned long __user *uaddr, unsigned long uval,
+ struct futex_q *this, int futex64)
{
struct task_struct *new_owner;
struct futex_pi_state *pi_state = this->pi_state;
- u32 curval, newval;
+ unsigned long curval, newval;
if (!pi_state)
return -EINVAL;
@@ -582,7 +639,7 @@ static int wake_futex_pi(u32 __user *uad
newval |= (uval & FUTEX_WAITER_REQUEUED);
pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
pagefault_enable();
if (curval == -EFAULT)
return -EFAULT;
@@ -607,16 +664,17 @@ static int wake_futex_pi(u32 __user *uad
return 0;
}
-static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
+static int unlock_futex_pi(unsigned long __user *uaddr, unsigned long uval,
+ int futex64)
{
- u32 oldval;
+ unsigned long oldval;
/*
* There is no waiter, so we unlock the futex. The owner died
* bit has not to be preserved here. We are the owner:
*/
pagefault_disable();
- oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
+ oldval = futex_cmpxchg_inatomic(uaddr, uval, 0, futex64);
pagefault_enable();
if (oldval == -EFAULT)
@@ -647,7 +705,7 @@ double_lock_hb(struct futex_hash_bucket
* Wake up all waiters hashed on the physical page that is mapped
* to this virtual address:
*/
-static int futex_wake(u32 __user *uaddr, int nr_wake)
+static int futex_wake(unsigned long __user *uaddr, int nr_wake)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
@@ -690,18 +748,19 @@ out:
* or create a new one without owner.
*/
static inline int
-lookup_pi_state_for_requeue(u32 __user *uaddr, struct futex_hash_bucket *hb,
+lookup_pi_state_for_requeue(unsigned long __user *uaddr,
+ struct futex_hash_bucket *hb,
union futex_key *key,
- struct futex_pi_state **pi_state)
+ struct futex_pi_state **pi_state, int futex64)
{
- u32 curval, uval, newval;
+ unsigned long curval, uval, newval;
retry:
/*
* We can't handle a fault cleanly because we can't
* release the locks here. Simply return the fault.
*/
- if (get_futex_value_locked(&curval, uaddr))
+ if (get_futex_value_locked(&curval, uaddr, futex64))
return -EFAULT;
/* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
@@ -715,7 +774,7 @@ retry:
newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
pagefault_enable();
if (unlikely(curval == -EFAULT))
@@ -746,8 +805,9 @@ retry:
* and requeue the next nr_requeue waiters following hashed on
* one physical page to another physical page (PI-futex uaddr2)
*/
-static int futex_requeue_pi(u32 __user *uaddr1, u32 __user *uaddr2,
- int nr_wake, int nr_requeue, u32 *cmpval)
+static int
+futex_requeue_pi(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
+ int nr_wake, int nr_requeue, unsigned long *cmpval, int futex64)
{
union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2;
@@ -780,9 +840,9 @@ retry:
double_lock_hb(hb1, hb2);
if (likely(cmpval != NULL)) {
- u32 curval;
+ unsigned long curval;
- ret = get_futex_value_locked(&curval, uaddr1);
+ ret = get_futex_value_locked(&curval, uaddr1, futex64);
if (unlikely(ret)) {
spin_unlock(&hb1->lock);
@@ -795,7 +855,7 @@ retry:
*/
up_read(¤t->mm->mmap_sem);
- ret = get_user(curval, uaddr1);
+ ret = futex_get_user(&curval, uaddr1, futex64);
if (!ret)
goto retry;
@@ -822,7 +882,8 @@ retry:
int s;
/* do this only the first time we requeue someone */
s = lookup_pi_state_for_requeue(uaddr2, hb2,
- &key2, &pi_state2);
+ &key2, &pi_state2,
+ futex64);
if (s) {
ret = s;
goto out_unlock;
@@ -936,8 +997,8 @@ out:
* to this virtual address:
*/
static int
-futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2,
- int nr_wake, int nr_wake2, int op)
+futex_wake_op(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
+ int nr_wake, int nr_wake2, int op, int futex64)
{
union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2;
@@ -961,9 +1022,16 @@ retryfull:
retry:
double_lock_hb(hb1, hb2);
- op_ret = futex_atomic_op_inuser(op, uaddr2);
+#ifdef CONFIG_64BIT
+ if (futex64)
+ op_ret = futex_atomic_op_inuser64(op, (u64 __user *)uaddr2);
+ else
+ op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
+#else
+ op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
+#endif
if (unlikely(op_ret < 0)) {
- u32 dummy;
+ unsigned long dummy;
spin_unlock(&hb1->lock);
if (hb1 != hb2)
@@ -1005,7 +1073,7 @@ retry:
*/
up_read(¤t->mm->mmap_sem);
- ret = get_user(dummy, uaddr2);
+ ret = futex_get_user(&dummy, uaddr2, futex64);
if (ret)
return ret;
@@ -1048,8 +1116,9 @@ out:
* Requeue all waiters hashed on one physical page to another
* physical page.
*/
-static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
- int nr_wake, int nr_requeue, u32 *cmpval)
+static int
+futex_requeue(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
+ int nr_wake, int nr_requeue, unsigned long *cmpval, int futex64)
{
union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2;
@@ -1073,9 +1142,9 @@ static int futex_requeue(u32 __user *uad
double_lock_hb(hb1, hb2);
if (likely(cmpval != NULL)) {
- u32 curval;
+ unsigned long curval;
- ret = get_futex_value_locked(&curval, uaddr1);
+ ret = get_futex_value_locked(&curval, uaddr1, futex64);
if (unlikely(ret)) {
spin_unlock(&hb1->lock);
@@ -1088,7 +1157,7 @@ static int futex_requeue(u32 __user *uad
*/
up_read(¤t->mm->mmap_sem);
- ret = get_user(curval, uaddr1);
+ ret = futex_get_user(&curval, uaddr1, futex64);
if (!ret)
goto retry;
@@ -1273,13 +1342,13 @@ static void unqueue_me_pi(struct futex_q
* The cur->mm semaphore must be held, it is released at return of this
* function.
*/
-static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+static int fixup_pi_state_owner(unsigned long __user *uaddr, struct futex_q *q,
struct futex_hash_bucket *hb,
- struct task_struct *curr)
+ struct task_struct *curr, int futex64)
{
- u32 newtid = curr->pid | FUTEX_WAITERS;
+ unsigned long newtid = curr->pid | FUTEX_WAITERS;
struct futex_pi_state *pi_state = q->pi_state;
- u32 uval, curval, newval;
+ unsigned long uval, curval, newval;
int ret;
/* Owner died? */
@@ -1306,12 +1375,12 @@ static int fixup_pi_state_owner(u32 __us
* TID. This must be atomic as we have preserve the
* owner died bit here.
*/
- ret = get_user(uval, uaddr);
+ ret = futex_get_user(&uval, uaddr, futex64);
while (!ret) {
newval = (uval & FUTEX_OWNER_DIED) | newtid;
newval |= (uval & FUTEX_WAITER_REQUEUED);
- curval = futex_atomic_cmpxchg_inatomic(uaddr,
- uval, newval);
+ curval = futex_cmpxchg_inatomic(uaddr,uval,
+ newval, futex64);
if (curval == -EFAULT)
ret = -EFAULT;
if (curval == uval)
@@ -1322,13 +1391,14 @@ static int fixup_pi_state_owner(u32 __us
}
static long futex_wait_restart(struct restart_block *restart);
-static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
+static int futex_wait(unsigned long __user *uaddr, unsigned long val,
+ ktime_t *abs_time, int futex64)
{
struct task_struct *curr = current;
DECLARE_WAITQUEUE(wait, curr);
struct futex_hash_bucket *hb;
struct futex_q q;
- u32 uval;
+ unsigned long uval;
int ret;
struct hrtimer_sleeper t, *to = NULL;
int rem = 0;
@@ -1363,7 +1433,7 @@ static int futex_wait(u32 __user *uaddr,
* We hold the mmap semaphore, so the mapping cannot have changed
* since we looked it up in get_futex_key.
*/
- ret = get_futex_value_locked(&uval, uaddr);
+ ret = get_futex_value_locked(&uval, uaddr, futex64);
if (unlikely(ret)) {
queue_unlock(&q, hb);
@@ -1373,8 +1443,7 @@ static int futex_wait(u32 __user *uaddr,
* start all over again.
*/
up_read(&curr->mm->mmap_sem);
-
- ret = get_user(uval, uaddr);
+ ret = futex_get_user(&uval, uaddr, futex64);
if (!ret)
goto retry;
@@ -1486,7 +1555,7 @@ static int futex_wait(u32 __user *uaddr,
/* mmap_sem and hash_bucket lock are unlocked at
return of this function */
- ret = fixup_pi_state_owner(uaddr, &q, hb, curr);
+ ret = fixup_pi_state_owner(uaddr, &q, hb, curr, futex64);
} else {
/*
* Catch the rare case, where the lock was released
@@ -1526,8 +1595,9 @@ static int futex_wait(u32 __user *uaddr,
restart = ¤t_thread_info()->restart_block;
restart->fn = futex_wait_restart;
restart->arg0 = (unsigned long)uaddr;
- restart->arg1 = (unsigned long)val;
+ restart->arg1 = val;
restart->arg2 = (unsigned long)abs_time;
+ restart->arg3 = (unsigned long)futex64;
return -ERESTART_RESTARTBLOCK;
}
@@ -1542,12 +1612,13 @@ static int futex_wait(u32 __user *uaddr,
static long futex_wait_restart(struct restart_block *restart)
{
- u32 __user *uaddr = (u32 __user *)restart->arg0;
- u32 val = (u32)restart->arg1;
+ unsigned long __user *uaddr = (unsigned long __user *)restart->arg0;
+ unsigned long val = restart->arg1;
ktime_t *abs_time = (ktime_t *)restart->arg2;
+ int futex64 = (int)restart->arg3;
restart->fn = do_no_restart_syscall;
- return (long)futex_wait(uaddr, val, abs_time);
+ return (long)futex_wait(uaddr, val, abs_time, futex64);
}
@@ -1602,13 +1673,13 @@ static void set_pi_futex_owner(struct fu
* if there are waiters then it will block, it does PI, etc. (Due to
* races the kernel might see a 0 value of the futex too.)
*/
-static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
- int trylock)
+static int futex_lock_pi(unsigned long __user *uaddr, int detect, ktime_t *time,
+ int trylock, int futex64)
{
struct hrtimer_sleeper timeout, *to = NULL;
struct task_struct *curr = current;
struct futex_hash_bucket *hb;
- u32 uval, newval, curval;
+ unsigned long uval, newval, curval;
struct futex_q q;
int ret, lock_held, attempt = 0;
@@ -1643,7 +1714,7 @@ static int futex_lock_pi(u32 __user *uad
newval = current->pid;
pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
+ curval = futex_cmpxchg_inatomic(uaddr, 0, newval, futex64);
pagefault_enable();
if (unlikely(curval == -EFAULT))
@@ -1688,7 +1759,7 @@ static int futex_lock_pi(u32 __user *uad
newval = curval | FUTEX_WAITERS;
pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
pagefault_enable();
if (unlikely(curval == -EFAULT))
@@ -1725,8 +1796,8 @@ static int futex_lock_pi(u32 __user *uad
FUTEX_OWNER_DIED | FUTEX_WAITERS;
pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr,
- uval, newval);
+ curval = futex_cmpxchg_inatomic(uaddr, uval,
+ newval, futex64);
pagefault_enable();
if (unlikely(curval == -EFAULT))
@@ -1770,7 +1841,7 @@ static int futex_lock_pi(u32 __user *uad
*/
if (!ret && q.pi_state->owner != curr)
/* mmap_sem is unlocked at return of this function */
- ret = fixup_pi_state_owner(uaddr, &q, hb, curr);
+ ret = fixup_pi_state_owner(uaddr, &q, hb, curr, futex64);
else {
/*
* Catch the rare case, where the lock was released
@@ -1816,7 +1887,7 @@ static int futex_lock_pi(u32 __user *uad
queue_unlock(&q, hb);
up_read(&curr->mm->mmap_sem);
- ret = get_user(uval, uaddr);
+ ret = futex_get_user(&uval, uaddr, futex64);
if (!ret && (uval != -EFAULT))
goto retry;
@@ -1828,17 +1899,17 @@ static int futex_lock_pi(u32 __user *uad
* This is the in-kernel slowpath: we look up the PI state (if any),
* and do the rt-mutex unlock.
*/
-static int futex_unlock_pi(u32 __user *uaddr)
+static int futex_unlock_pi(unsigned long __user *uaddr, int futex64)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
- u32 uval;
+ unsigned long uval;
struct plist_head *head;
union futex_key key;
int ret, attempt = 0;
retry:
- if (get_user(uval, uaddr))
+ if (futex_get_user(&uval, uaddr, futex64))
return -EFAULT;
/*
* We release only a lock we actually own:
@@ -1865,7 +1936,7 @@ retry_locked:
*/
if (!(uval & FUTEX_OWNER_DIED)) {
pagefault_disable();
- uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
+ uval = futex_cmpxchg_inatomic(uaddr, current->pid, 0, futex64);
pagefault_enable();
}
@@ -1887,7 +1958,7 @@ retry_locked:
plist_for_each_entry_safe(this, next, head, list) {
if (!match_futex (&this->key, &key))
continue;
- ret = wake_futex_pi(uaddr, uval, this);
+ ret = wake_futex_pi(uaddr, uval, this, futex64);
/*
* The atomic access to the futex value
* generated a pagefault, so retry the
@@ -1901,7 +1972,7 @@ retry_locked:
* No waiters - kernel unlocks the futex:
*/
if (!(uval & FUTEX_OWNER_DIED)) {
- ret = unlock_futex_pi(uaddr, uval);
+ ret = unlock_futex_pi(uaddr, uval, futex64);
if (ret == -EFAULT)
goto pi_faulted;
}
@@ -1931,7 +2002,7 @@ pi_faulted:
spin_unlock(&hb->lock);
up_read(¤t->mm->mmap_sem);
- ret = get_user(uval, uaddr);
+ ret = futex_get_user(&uval, uaddr, futex64);
if (!ret && (uval != -EFAULT))
goto retry;
@@ -2167,7 +2238,7 @@ retry:
*/
if (!pi) {
if (uval & FUTEX_WAITERS)
- futex_wake(uaddr, 1);
+ futex_wake((unsigned long __user *)uaddr, 1);
}
}
return 0;
@@ -2223,7 +2294,8 @@ void exit_robust_list(struct task_struct
return;
if (pending)
- handle_futex_death((void __user *)pending + futex_offset, curr, pip);
+ handle_futex_death((void __user *)pending + futex_offset,
+ curr, pip);
while (entry != &head->list) {
/*
@@ -2249,42 +2321,46 @@ void exit_robust_list(struct task_struct
}
}
-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
- u32 __user *uaddr2, u32 val2, u32 val3)
+long do_futex(unsigned long __user *uaddr, int op, unsigned long val,
+ ktime_t *timeout, unsigned long __user *uaddr2,
+ unsigned long val2, unsigned long val3, int fut64)
{
int ret;
switch (op) {
case FUTEX_WAIT:
- ret = futex_wait(uaddr, val, timeout);
+ ret = futex_wait(uaddr, val, timeout, fut64);
break;
case FUTEX_WAKE:
ret = futex_wake(uaddr, val);
break;
case FUTEX_FD:
- /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
- ret = futex_fd(uaddr, val);
+ if (fut64)
+ ret = -ENOSYS;
+ else
+ /* non-zero val means F_SETOWN(getpid())&F_SETSIG(val) */
+ ret = futex_fd((u32 __user *)uaddr, val);
break;
case FUTEX_REQUEUE:
- ret = futex_requeue(uaddr, uaddr2, val, val2, NULL);
+ ret = futex_requeue(uaddr, uaddr2, val, val2, NULL, fut64);
break;
case FUTEX_CMP_REQUEUE:
- ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
+ ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, fut64);
break;
case FUTEX_WAKE_OP:
- ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
+ ret = futex_wake_op(uaddr, uaddr2, val, val2, val3, fut64);
break;
case FUTEX_LOCK_PI:
- ret = futex_lock_pi(uaddr, val, timeout, 0);
+ ret = futex_lock_pi(uaddr, val, timeout, 0, fut64);
break;
case FUTEX_UNLOCK_PI:
- ret = futex_unlock_pi(uaddr);
+ ret = futex_unlock_pi(uaddr, fut64);
break;
case FUTEX_TRYLOCK_PI:
- ret = futex_lock_pi(uaddr, 0, timeout, 1);
+ ret = futex_lock_pi(uaddr, 0, timeout, 1, fut64);
break;
case FUTEX_CMP_REQUEUE_PI:
- ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3);
+ ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3, fut64);
break;
default:
ret = -ENOSYS;
@@ -2292,6 +2368,37 @@ long do_futex(u32 __user *uaddr, int op,
return ret;
}
+#ifdef CONFIG_64BIT
+
+asmlinkage long
+sys_futex64(u64 __user *uaddr, int op, u64 val,
+ struct timespec __user *utime, u64 __user *uaddr2, u64 val3)
+{
+ struct timespec ts;
+ ktime_t t, *tp = NULL;
+ u64 val2 = 0;
+
+ if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
+ if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
+ return -EFAULT;
+ if (!timespec_valid(&ts))
+ return -EINVAL;
+ if (op == FUTEX_WAIT)
+ t = ktime_add(ktime_get(), timespec_to_ktime(ts));
+ tp = &t;
+ }
+ /*
+ * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
+ */
+ if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
+ || op == FUTEX_CMP_REQUEUE_PI)
+ val2 = (unsigned long) utime;
+
+ return do_futex((unsigned long __user*)uaddr, op, val, tp,
+ (unsigned long __user*)uaddr2, val2, val3, 1);
+}
+
+#endif
asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
struct timespec __user *utime, u32 __user *uaddr2,
@@ -2317,7 +2424,8 @@ asmlinkage long sys_futex(u32 __user *ua
|| op == FUTEX_CMP_REQUEUE_PI)
val2 = (u32) (unsigned long) utime;
- return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+ return do_futex((unsigned long __user*)uaddr, op, val, tp,
+ (unsigned long __user*)uaddr2, val2, val3, 0);
}
static int futexfs_get_sb(struct file_system_type *fs_type,
Index: b/kernel/sys_ni.c
===================================================================
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -41,6 +41,7 @@ cond_syscall(sys_sendmsg);
cond_syscall(sys_recvmsg);
cond_syscall(sys_socketcall);
cond_syscall(sys_futex);
+cond_syscall(sys_futex64);
cond_syscall(compat_sys_futex);
cond_syscall(sys_set_robust_list);
cond_syscall(compat_sys_set_robust_list);
Index: b/include/linux/futex.h
===================================================================
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -101,8 +101,9 @@ struct robust_list_head {
#define ROBUST_LIST_LIMIT 2048
#ifdef __KERNEL__
-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
- u32 __user *uaddr2, u32 val2, u32 val3);
+long do_futex(unsigned long __user *uaddr, int op, unsigned long val,
+ ktime_t *timeout, unsigned long __user *uaddr2,
+ unsigned long val2, unsigned long val3, int futex64);
extern int
handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
@@ -116,7 +117,7 @@ handle_futex_death(u32 __user *uaddr, st
* We set bit 0 to indicate if it's an inode-based key.
*/
union futex_key {
- u32 __user *uaddr;
+ unsigned long __user *uaddr;
struct {
unsigned long pgoff;
struct inode *inode;
@@ -133,7 +134,7 @@ union futex_key {
int offset;
} both;
};
-int get_futex_key(u32 __user *uaddr, union futex_key *key);
+int get_futex_key(void __user *uaddr, union futex_key *key);
void get_futex_key_refs(union futex_key *key);
void drop_futex_key_refs(union futex_key *key);
Index: b/kernel/futex_compat.c
===================================================================
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -158,5 +158,6 @@ asmlinkage long compat_sys_futex(u32 __u
|| op == FUTEX_CMP_REQUEUE_PI)
val2 = (int) (unsigned long) utime;
- return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+ return do_futex((unsigned long __user*)uaddr, op, val, tp,
+ (unsigned long __user*)uaddr2, val2, val3, 0);
}
--
Pierre Peiffer
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2.6.21-rc4-mm1 2/4] Make futex_wait() use an hrtimer for timeout
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 2/4] Make futex_wait() use an hrtimer for timeout Pierre.Peiffer
@ 2007-03-26 9:57 ` Andrew Morton
0 siblings, 0 replies; 12+ messages in thread
From: Andrew Morton @ 2007-03-26 9:57 UTC (permalink / raw)
To: Pierre.Peiffer
Cc: mingo, drepper, linux-kernel, jean-pierre.dion, Sebastien Dugue,
Pierre Peiffer
On Wed, 21 Mar 2007 10:54:34 +0100 Pierre.Peiffer@bull.net wrote:
> This patch modifies futex_wait() to use an hrtimer + schedule() in place of
> schedule_timeout().
>
> schedule_timeout() is tick based, therefore the timeout granularity is
> the tick (1 ms, 4 ms or 10 ms depending on HZ). By using a high resolution
> timer for timeout wakeup, we can attain a much finer timeout granularity
> (in the microsecond range). This parallels what is already done for
> futex_lock_pi().
>
> The timeout passed to the syscall is no longer converted to jiffies
> and is therefore passed to do_futex() and futex_wait() as an absolute
> ktime_t therefore keeping nanosecond resolution.
>
> Also this removes the need to pass the nanoseconds timeout part to
> futex_lock_pi() in val2.
>
> In futex_wait(), if there is no timeout then a regular schedule() is
> performed. Otherwise, an hrtimer is fired before schedule() is called.
>
Problem.
> --- a/include/linux/futex.h
> +++ b/include/linux/futex.h
> @@ -1,6 +1,7 @@
> #ifndef _LINUX_FUTEX_H
> #define _LINUX_FUTEX_H
>
> +#include <linux/ktime.h>
> #include <linux/sched.h>
>
For a start, please print out a copy of Documentation/SubmitChecklist and
tape it to your monitor. It's really good.
`make headers_check' fails with
/usr/src/devel/usr/include/linux/futex.h requires linux/ktime.h, which does not exist in exported headers
This fixes it:
diff -puN include/linux/Kbuild~make-futex_wait-use-an-hrtimer-for-timeout-fix include/linux/Kbuild
--- a/include/linux/Kbuild~make-futex_wait-use-an-hrtimer-for-timeout-fix
+++ a/include/linux/Kbuild
@@ -40,6 +40,7 @@ header-y += baycom.h
header-y += bfs_fs.h
header-y += blkpg.h
header-y += bpqether.h
+header-y += calc64.h
header-y += cdk.h
header-y += chio.h
header-y += coda_psdev.h
@@ -99,7 +100,9 @@ header-y += isdn_divertif.h
header-y += iso_fs.h
header-y += ixjuser.h
header-y += jffs2.h
+header-y += jiffies.h
header-y += keyctl.h
+header-y += ktime.h
header-y += kvm.h
header-y += limits.h
header-y += lock_dlm_plock.h
diff -puN include/asm-i386/Kbuild~make-futex_wait-use-an-hrtimer-for-timeout-fix include/asm-i386/Kbuild
--- a/include/asm-i386/Kbuild~make-futex_wait-use-an-hrtimer-for-timeout-fix
+++ a/include/asm-i386/Kbuild
@@ -2,6 +2,7 @@ include include/asm-generic/Kbuild.asm
header-y += boot.h
header-y += debugreg.h
+header-y += div64.h
header-y += ldt.h
header-y += ptrace-abi.h
header-y += ucontext.h
_
But only for i386, and no way do we want to export all those headers.
Now. What blithering idiot carefully went and made ktime_t a typedef so we
cannot forward declare it? Sigh. We tell 'em, but they don't listen.
This fixes ktime:
diff -puN include/linux/ktime.h~declare-struct-ktime include/linux/ktime.h
--- a/include/linux/ktime.h~declare-struct-ktime
+++ a/include/linux/ktime.h
@@ -43,7 +43,7 @@
* plain scalar nanosecond based representation can be selected by the
* config switch CONFIG_KTIME_SCALAR.
*/
-typedef union {
+union ktime {
s64 tv64;
#if BITS_PER_LONG != 64 && !defined(CONFIG_KTIME_SCALAR)
struct {
@@ -54,7 +54,9 @@ typedef union {
# endif
} tv;
#endif
-} ktime_t;
+};
+
+typedef union ktime ktime_t; /* Kill this */
#define KTIME_MAX ((s64)~((u64)1 << 63))
#if (BITS_PER_LONG == 64)
_
And this fixes your patch:
--- a/include/linux/futex.h~make-futex_wait-use-an-hrtimer-for-timeout-fix
+++ a/include/linux/futex.h
@@ -1,9 +1,10 @@
#ifndef _LINUX_FUTEX_H
#define _LINUX_FUTEX_H
-#include <linux/ktime.h>
#include <linux/sched.h>
+union ktime;
+
/* Second argument to futex syscall */
@@ -95,7 +96,7 @@ struct robust_list_head {
#define ROBUST_LIST_LIMIT 2048
#ifdef __KERNEL__
-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
extern int
_
And now someone needs to go all over the kernel and do a s/ktime_t/union ktime/g.
Again. How often must we do this?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes Pierre.Peiffer
@ 2007-03-26 11:20 ` Andrew Morton
2007-03-27 11:07 ` Jakub Jelinek
1 sibling, 0 replies; 12+ messages in thread
From: Andrew Morton @ 2007-03-26 11:20 UTC (permalink / raw)
To: Pierre.Peiffer
Cc: mingo, drepper, linux-kernel, jean-pierre.dion, Pierre Peiffer
On Wed, 21 Mar 2007 10:54:36 +0100 Pierre.Peiffer@bull.net wrote:
> It does not provide the functionality for all architectures (only for x64 for now).
Well that scuppers our chances of getting -mm kernels tested on ia64, s390
and sparc64. Which is a problem - people do test s390 and ia64 and so these
patches impact the testing quality of everyone else's work.
Do we have a plan to fix this (promptly, please)?
kernel/built-in.o(.text+0x683a2): In function `futex_requeue_pi':
: undefined reference to `futex_atomic_cmpxchg_inatomic64'
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes Pierre.Peiffer
2007-03-26 11:20 ` Andrew Morton
@ 2007-03-27 11:07 ` Jakub Jelinek
2007-04-23 14:35 ` [PATCH -mm] 64bit-futex - provide new commands instead of new syscall Pierre Peiffer
1 sibling, 1 reply; 12+ messages in thread
From: Jakub Jelinek @ 2007-03-27 11:07 UTC (permalink / raw)
To: Pierre.Peiffer; +Cc: akpm, mingo, drepper, linux-kernel, jean-pierre.dion
On Wed, Mar 21, 2007 at 10:54:36AM +0100, Pierre.Peiffer@bull.net wrote:
> This last patch is an adaptation of the sys_futex64 syscall provided in -rt
> patch (originally written by Ingo Molnar). It allows the use of 64-bit futex.
>
> I have re-worked most of the code to avoid the duplication of the code.
>
> It does not provide the functionality for all architectures (only for x64 for now).
I don't think you should blindly add all operations to sys_futex64 without
thinking what they really do.
E.g. FUTEX_{{,UN,TRY}LOCK,CMP_REQUEUE}_PI doesn't really make any sense for 64-bit
futexes, the format of PI futexes is hardcoded in the kernel and is always
32-bit, see FUTEX_TID_MASK, FUTEX_WAITERS, FUTEX_OWNER_DIED definitions.
exit_robust_list/handle_futex_death will handle 32-bit PI futexes anyway.
Similarly, sys_futex64 shouldn't support the obsolete operations that
are there solely for compatibility (e.g. FUTEX_REQUEUE or FUTEX_FD).
When you just -ENOSYS on the PI ops, there is no need to implement
futex_atomic_cmpxchg_inatomic64.
FUTEX_WAKE_OP is questionable for 64-bit, IMHO it is better to just
-ENOSYS on it and only if anyone ever finds actual uses for it, add it.
For 64-bit futexes the only needed operations are actually
FUTEX_WAIT and perhaps FUTEX_CMP_REQUEUE, so I wonder if it isn't
better to just add FUTEX_WAIT64 and FUTEX_CMP_REQUEUE64 ops to sys_futex
instead of adding a new syscall.
But the FUTEX_{{,UN,TRY}LOCK,CMP_REQUEUE}_PI removal for 64-bit futexes
is IMHO the most important part of my complain.
Jakub
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH -mm] 64bit-futex - provide new commands instead of new syscall
2007-03-27 11:07 ` Jakub Jelinek
@ 2007-04-23 14:35 ` Pierre Peiffer
2007-04-23 15:30 ` Ulrich Drepper
0 siblings, 1 reply; 12+ messages in thread
From: Pierre Peiffer @ 2007-04-23 14:35 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: akpm, mingo, drepper, linux-kernel, jean-pierre.dion
[-- Attachment #1: Type: text/plain, Size: 1717 bytes --]
Hi,
Jakub Jelinek a écrit :
>
> I don't think you should blindly add all operations to sys_futex64 without
> thinking what they really do.
> E.g. FUTEX_{{,UN,TRY}LOCK,CMP_REQUEUE}_PI doesn't really make any sense for 64-bit
> futexes, the format of PI futexes is hardcoded in the kernel and is always
> 32-bit, see FUTEX_TID_MASK, FUTEX_WAITERS, FUTEX_OWNER_DIED definitions.
> exit_robust_list/handle_futex_death will handle 32-bit PI futexes anyway.
> Similarly, sys_futex64 shouldn't support the obsolete operations that
> are there solely for compatibility (e.g. FUTEX_REQUEUE or FUTEX_FD).
>
> When you just -ENOSYS on the PI ops, there is no need to implement
> futex_atomic_cmpxchg_inatomic64.
>
> FUTEX_WAKE_OP is questionable for 64-bit, IMHO it is better to just
> -ENOSYS on it and only if anyone ever finds actual uses for it, add it.
>
> For 64-bit futexes the only needed operations are actually
> FUTEX_WAIT and perhaps FUTEX_CMP_REQUEUE, so I wonder if it isn't
> better to just add FUTEX_WAIT64 and FUTEX_CMP_REQUEUE64 ops to sys_futex
> instead of adding a new syscall.
>
> But the FUTEX_{{,UN,TRY}LOCK,CMP_REQUEUE}_PI removal for 64-bit futexes
> is IMHO the most important part of my complain.
>
Following this mail sent few weeks ago, here is a patch which should meet your
requirements.
I've quickly done it on top of the latest -mm (2.6.21-rc6-mm2) and a little bit
tested.
To be honest, as I'm not really aware of your exact needs and as I don't know
the exact usage which will be done with 64bit futexes, I can't really maintain
it. So I'll let you take/modify/adapt this patch following your needs.
Thanks,
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
--
Pierre
[-- Attachment #2: futex-64bits-command.patch --]
[-- Type: text/x-patch, Size: 24427 bytes --]
---
include/asm-ia64/futex.h | 8 -
include/asm-powerpc/futex.h | 6 -
include/asm-s390/futex.h | 8 -
include/asm-sparc64/futex.h | 8 -
include/asm-um/futex.h | 9 -
include/asm-x86_64/futex.h | 86 -------------------
include/asm-x86_64/unistd.h | 2
include/linux/futex.h | 8 +
include/linux/syscalls.h | 3
kernel/futex.c | 199 +++++++++++++++++---------------------------
kernel/futex_compat.c | 2
kernel/sys_ni.c | 1
12 files changed, 93 insertions(+), 247 deletions(-)
Index: linux-2.6.21-rc6-mm2/include/asm-ia64/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-ia64/futex.h 2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-ia64/futex.h 2007-04-20 13:50:00.000000000 +0200
@@ -124,13 +124,7 @@ futex_atomic_cmpxchg_inatomic(int __user
static inline u64
futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
{
- return 0;
-}
-
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
- return 0;
+ return -ENOSYS;
}
#endif /* _ASM_FUTEX_H */
Index: linux-2.6.21-rc6-mm2/include/asm-powerpc/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-powerpc/futex.h 2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-powerpc/futex.h 2007-04-20 13:51:49.000000000 +0200
@@ -119,11 +119,5 @@ futex_atomic_cmpxchg_inatomic64(u64 __us
return 0;
}
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
- return 0;
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_FUTEX_H */
Index: linux-2.6.21-rc6-mm2/include/asm-s390/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-s390/futex.h 2007-04-20 14:01:24.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-s390/futex.h 2007-04-20 13:47:30.000000000 +0200
@@ -51,13 +51,7 @@ static inline int futex_atomic_cmpxchg_i
static inline u64
futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
{
- return 0;
-}
-
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
- return 0;
+ return -ENOSYS;
}
#endif /* __KERNEL__ */
Index: linux-2.6.21-rc6-mm2/include/asm-sparc64/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-sparc64/futex.h 2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-sparc64/futex.h 2007-04-20 13:48:48.000000000 +0200
@@ -108,13 +108,7 @@ futex_atomic_cmpxchg_inatomic(int __user
static inline u64
futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
{
- return 0;
-}
-
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
- return 0;
+ return -ENOSYS;
}
#endif /* !(_SPARC64_FUTEX_H) */
Index: linux-2.6.21-rc6-mm2/include/asm-um/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-um/futex.h 2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-um/futex.h 2007-04-20 13:51:42.000000000 +0200
@@ -6,14 +6,7 @@
static inline u64
futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
{
- return 0;
+ return -ENOSYS;
}
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
- return 0;
-}
-
-
#endif
Index: linux-2.6.21-rc6-mm2/include/asm-x86_64/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-x86_64/futex.h 2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-x86_64/futex.h 2007-04-20 13:50:38.000000000 +0200
@@ -41,38 +41,6 @@
"=&r" (tem) \
: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
-#define __futex_atomic_op1_64(insn, ret, oldval, uaddr, oparg) \
- __asm__ __volatile ( \
-"1: " insn "\n" \
-"2: .section .fixup,\"ax\"\n\
-3: movq %3, %1\n\
- jmp 2b\n\
- .previous\n\
- .section __ex_table,\"a\"\n\
- .align 8\n\
- .quad 1b,3b\n\
- .previous" \
- : "=r" (oldval), "=r" (ret), "=m" (*uaddr) \
- : "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0))
-
-#define __futex_atomic_op2_64(insn, ret, oldval, uaddr, oparg) \
- __asm__ __volatile ( \
-"1: movq %2, %0\n\
- movq %0, %3\n" \
- insn "\n" \
-"2: " LOCK_PREFIX "cmpxchgq %3, %2\n\
- jnz 1b\n\
-3: .section .fixup,\"ax\"\n\
-4: movq %5, %1\n\
- jmp 3b\n\
- .previous\n\
- .section __ex_table,\"a\"\n\
- .align 8\n\
- .quad 1b,4b,2b,4b\n\
- .previous" \
- : "=&a" (oldval), "=&r" (ret), "=m" (*uaddr), \
- "=&r" (tem) \
- : "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
static inline int
futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
@@ -128,60 +96,6 @@ futex_atomic_op_inuser (int encoded_op,
}
static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- u64 oparg = (encoded_op << 8) >> 20;
- u64 cmparg = (encoded_op << 20) >> 20;
- u64 oldval = 0, ret, tem;
-
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u64)))
- return -EFAULT;
-
- inc_preempt_count();
-
- switch (op) {
- case FUTEX_OP_SET:
- __futex_atomic_op1_64("xchgq %0, %2", ret, oldval, uaddr, oparg);
- break;
- case FUTEX_OP_ADD:
- __futex_atomic_op1_64(LOCK_PREFIX "xaddq %0, %2", ret, oldval,
- uaddr, oparg);
- break;
- case FUTEX_OP_OR:
- __futex_atomic_op2_64("orq %4, %3", ret, oldval, uaddr, oparg);
- break;
- case FUTEX_OP_ANDN:
- __futex_atomic_op2_64("andq %4, %3", ret, oldval, uaddr, ~oparg);
- break;
- case FUTEX_OP_XOR:
- __futex_atomic_op2_64("xorq %4, %3", ret, oldval, uaddr, oparg);
- break;
- default:
- ret = -ENOSYS;
- }
-
- dec_preempt_count();
-
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
- return ret;
-}
-
-static inline int
futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
{
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
Index: linux-2.6.21-rc6-mm2/include/asm-x86_64/unistd.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-x86_64/unistd.h 2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-x86_64/unistd.h 2007-04-20 13:50:42.000000000 +0200
@@ -619,8 +619,6 @@ __SYSCALL(__NR_sync_file_range, sys_sync
__SYSCALL(__NR_vmsplice, sys_vmsplice)
#define __NR_move_pages 279
__SYSCALL(__NR_move_pages, sys_move_pages)
-#define __NR_futex64 280
-__SYSCALL(__NR_futex64, sys_futex64)
#define __NR_signalfd 281
__SYSCALL(__NR_signalfd, sys_signalfd)
#define __NR_timerfd 282
Index: linux-2.6.21-rc6-mm2/include/linux/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/linux/futex.h 2007-04-20 14:01:24.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/linux/futex.h 2007-04-20 13:47:36.000000000 +0200
@@ -19,6 +19,12 @@ union ktime;
#define FUTEX_TRYLOCK_PI 8
#define FUTEX_CMP_REQUEUE_PI 9
+#ifdef CONFIG_64BIT
+#define FUTEX_64BIT 0x80000000
+#define FUTEX_WAIT64 FUTEX_WAIT|FUTEX_64BIT
+#define FUTEX_CMP_REQUEUE64 FUTEX_CMP_REQUEUE|FUTEX_64BIT
+#endif
+
/*
* Support for robust futexes: the kernel cleans up held futexes at
* thread exit time.
@@ -104,7 +110,7 @@ struct robust_list_head {
#ifdef __KERNEL__
long do_futex(unsigned long __user *uaddr, int op, unsigned long val,
union ktime *timeout, unsigned long __user *uaddr2,
- unsigned long val2, unsigned long val3, int futex64);
+ unsigned long val2, unsigned long val3);
extern int
handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
Index: linux-2.6.21-rc6-mm2/include/linux/syscalls.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/linux/syscalls.h 2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/linux/syscalls.h 2007-04-20 13:48:12.000000000 +0200
@@ -178,9 +178,6 @@ asmlinkage long sys_set_tid_address(int
asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
struct timespec __user *utime, u32 __user *uaddr2,
u32 val3);
-asmlinkage long sys_futex64(u64 __user *uaddr, int op, u64 val,
- struct timespec __user *utime, u64 __user *uaddr2,
- u64 val3);
asmlinkage long sys_init_module(void __user *umod, unsigned long len,
const char __user *uargs);
Index: linux-2.6.21-rc6-mm2/kernel/futex.c
===================================================================
--- linux-2.6.21-rc6-mm2.orig/kernel/futex.c 2007-04-20 14:01:24.000000000 +0200
+++ linux-2.6.21-rc6-mm2/kernel/futex.c 2007-04-20 15:31:23.000000000 +0200
@@ -62,20 +62,6 @@
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
#ifdef CONFIG_64BIT
-static inline unsigned long
-futex_cmpxchg_inatomic(unsigned long __user *uaddr, unsigned long oldval,
- unsigned long newval, int futex64)
-{
- if (futex64)
- return futex_atomic_cmpxchg_inatomic64((u64 __user *)uaddr,
- oldval, newval);
- else {
- u32 ov = oldval, nv = newval;
- return futex_atomic_cmpxchg_inatomic((int __user *)uaddr, ov,
- nv);
- }
-}
-
static inline int
futex_get_user(unsigned long *val, unsigned long __user *uaddr, int futex64)
{
@@ -92,11 +78,7 @@ futex_get_user(unsigned long *val, unsig
}
#else
-#define futex_cmpxchg_inatomic(uaddr, oldval, newval, futex64) \
- futex_atomic_cmpxchg_inatomic((u32*)uaddr, oldval, newval)
-
#define futex_get_user(val, uaddr, futex64) get_user(*val, uaddr)
-
#endif
/*
@@ -606,12 +588,12 @@ static void wake_futex(struct futex_q *q
q->lock_ptr = NULL;
}
-static int wake_futex_pi(unsigned long __user *uaddr, unsigned long uval,
- struct futex_q *this, int futex64)
+static int wake_futex_pi(u32 __user *uaddr, unsigned long uval,
+ struct futex_q *this)
{
struct task_struct *new_owner;
struct futex_pi_state *pi_state = this->pi_state;
- unsigned long curval, newval;
+ u32 curval, newval;
if (!pi_state)
return -EINVAL;
@@ -639,7 +621,7 @@ static int wake_futex_pi(unsigned long _
newval |= (uval & FUTEX_WAITER_REQUEUED);
pagefault_disable();
- curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
pagefault_enable();
if (curval == -EFAULT)
return -EFAULT;
@@ -664,17 +646,16 @@ static int wake_futex_pi(unsigned long _
return 0;
}
-static int unlock_futex_pi(unsigned long __user *uaddr, unsigned long uval,
- int futex64)
+static int unlock_futex_pi(u32 __user *uaddr, unsigned long uval)
{
- unsigned long oldval;
+ u32 oldval;
/*
* There is no waiter, so we unlock the futex. The owner died
* bit has not to be preserved here. We are the owner:
*/
pagefault_disable();
- oldval = futex_cmpxchg_inatomic(uaddr, uval, 0, futex64);
+ oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
pagefault_enable();
if (oldval == -EFAULT)
@@ -748,19 +729,20 @@ out:
* or create a new one without owner.
*/
static inline int
-lookup_pi_state_for_requeue(unsigned long __user *uaddr,
+lookup_pi_state_for_requeue(u32 __user *uaddr,
struct futex_hash_bucket *hb,
union futex_key *key,
- struct futex_pi_state **pi_state, int futex64)
+ struct futex_pi_state **pi_state)
{
- unsigned long curval, uval, newval;
+ u32 curval, uval, newval;
retry:
/*
* We can't handle a fault cleanly because we can't
* release the locks here. Simply return the fault.
*/
- if (get_futex_value_locked(&curval, uaddr, futex64))
+ if (get_futex_value_locked((unsigned long *)&curval,
+ (unsigned long __user *)uaddr, 0))
return -EFAULT;
/* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
@@ -774,7 +756,7 @@ retry:
newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
pagefault_disable();
- curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
pagefault_enable();
if (unlikely(curval == -EFAULT))
@@ -806,8 +788,8 @@ retry:
* one physical page to another physical page (PI-futex uaddr2)
*/
static int
-futex_requeue_pi(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
- int nr_wake, int nr_requeue, unsigned long *cmpval, int futex64)
+futex_requeue_pi(u32 __user *uaddr1, u32 __user *uaddr2,
+ int nr_wake, int nr_requeue, unsigned long *cmpval)
{
union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2;
@@ -840,9 +822,10 @@ retry:
double_lock_hb(hb1, hb2);
if (likely(cmpval != NULL)) {
- unsigned long curval;
+ u32 curval;
- ret = get_futex_value_locked(&curval, uaddr1, futex64);
+ ret = get_futex_value_locked((unsigned long*)&curval,
+ (unsigned long __user *)uaddr1, 0);
if (unlikely(ret)) {
spin_unlock(&hb1->lock);
@@ -855,7 +838,7 @@ retry:
*/
up_read(¤t->mm->mmap_sem);
- ret = futex_get_user(&curval, uaddr1, futex64);
+ ret = get_user(curval, uaddr1);
if (!ret)
goto retry;
@@ -882,8 +865,7 @@ retry:
int s;
/* do this only the first time we requeue someone */
s = lookup_pi_state_for_requeue(uaddr2, hb2,
- &key2, &pi_state2,
- futex64);
+ &key2, &pi_state2);
if (s) {
ret = s;
goto out_unlock;
@@ -998,7 +980,7 @@ out:
*/
static int
futex_wake_op(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
- int nr_wake, int nr_wake2, int op, int futex64)
+ int nr_wake, int nr_wake2, int op)
{
union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2;
@@ -1022,16 +1004,10 @@ retryfull:
retry:
double_lock_hb(hb1, hb2);
-#ifdef CONFIG_64BIT
- if (futex64)
- op_ret = futex_atomic_op_inuser64(op, (u64 __user *)uaddr2);
- else
- op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
-#else
op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
-#endif
+
if (unlikely(op_ret < 0)) {
- unsigned long dummy;
+ u32 dummy;
spin_unlock(&hb1->lock);
if (hb1 != hb2)
@@ -1073,7 +1049,7 @@ retry:
*/
up_read(¤t->mm->mmap_sem);
- ret = futex_get_user(&dummy, uaddr2, futex64);
+ ret = get_user(dummy, uaddr2);
if (ret)
return ret;
@@ -1379,8 +1355,18 @@ static int fixup_pi_state_owner(unsigned
while (!ret) {
newval = (uval & FUTEX_OWNER_DIED) | newtid;
newval |= (uval & FUTEX_WAITER_REQUEUED);
- curval = futex_cmpxchg_inatomic(uaddr,uval,
- newval, futex64);
+#ifdef CONFIG_64BIT
+ if (futex64)
+ curval = futex_atomic_cmpxchg_inatomic64(
+ (u64 __user *)uaddr,
+ uval, newval);
+ else
+#endif
+ curval = futex_atomic_cmpxchg_inatomic(
+ (u32 __user *)uaddr,
+ (u32)uval,
+ (u32)newval);
+
if (curval == -EFAULT)
ret = -EFAULT;
if (curval == uval)
@@ -1673,13 +1659,13 @@ static void set_pi_futex_owner(struct fu
* if there are waiters then it will block, it does PI, etc. (Due to
* races the kernel might see a 0 value of the futex too.)
*/
-static int futex_lock_pi(unsigned long __user *uaddr, int detect, ktime_t *time,
- int trylock, int futex64)
+static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
+ int trylock)
{
struct hrtimer_sleeper timeout, *to = NULL;
struct task_struct *curr = current;
struct futex_hash_bucket *hb;
- unsigned long uval, newval, curval;
+ u32 uval, newval, curval;
struct futex_q q;
int ret, lock_held, attempt = 0;
@@ -1714,7 +1700,7 @@ static int futex_lock_pi(unsigned long _
newval = current->pid;
pagefault_disable();
- curval = futex_cmpxchg_inatomic(uaddr, 0, newval, futex64);
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
pagefault_enable();
if (unlikely(curval == -EFAULT))
@@ -1759,7 +1745,7 @@ static int futex_lock_pi(unsigned long _
newval = curval | FUTEX_WAITERS;
pagefault_disable();
- curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
pagefault_enable();
if (unlikely(curval == -EFAULT))
@@ -1796,8 +1782,8 @@ static int futex_lock_pi(unsigned long _
FUTEX_OWNER_DIED | FUTEX_WAITERS;
pagefault_disable();
- curval = futex_cmpxchg_inatomic(uaddr, uval,
- newval, futex64);
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval,
+ newval);
pagefault_enable();
if (unlikely(curval == -EFAULT))
@@ -1841,7 +1827,8 @@ static int futex_lock_pi(unsigned long _
*/
if (!ret && q.pi_state->owner != curr)
/* mmap_sem is unlocked at return of this function */
- ret = fixup_pi_state_owner(uaddr, &q, hb, curr, futex64);
+ ret = fixup_pi_state_owner((unsigned long __user *)uaddr,
+ &q, hb, curr, 0);
else {
/*
* Catch the rare case, where the lock was released
@@ -1887,7 +1874,7 @@ static int futex_lock_pi(unsigned long _
queue_unlock(&q, hb);
up_read(&curr->mm->mmap_sem);
- ret = futex_get_user(&uval, uaddr, futex64);
+ ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT))
goto retry;
@@ -1899,17 +1886,17 @@ static int futex_lock_pi(unsigned long _
* This is the in-kernel slowpath: we look up the PI state (if any),
* and do the rt-mutex unlock.
*/
-static int futex_unlock_pi(unsigned long __user *uaddr, int futex64)
+static int futex_unlock_pi(u32 __user *uaddr)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
- unsigned long uval;
+ u32 uval;
struct plist_head *head;
union futex_key key;
int ret, attempt = 0;
retry:
- if (futex_get_user(&uval, uaddr, futex64))
+ if (get_user(uval, uaddr))
return -EFAULT;
/*
* We release only a lock we actually own:
@@ -1936,7 +1923,7 @@ retry_locked:
*/
if (!(uval & FUTEX_OWNER_DIED)) {
pagefault_disable();
- uval = futex_cmpxchg_inatomic(uaddr, current->pid, 0, futex64);
+ uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
pagefault_enable();
}
@@ -1958,7 +1945,7 @@ retry_locked:
plist_for_each_entry_safe(this, next, head, list) {
if (!match_futex (&this->key, &key))
continue;
- ret = wake_futex_pi(uaddr, uval, this, futex64);
+ ret = wake_futex_pi(uaddr, uval, this);
/*
* The atomic access to the futex value
* generated a pagefault, so retry the
@@ -1972,7 +1959,7 @@ retry_locked:
* No waiters - kernel unlocks the futex:
*/
if (!(uval & FUTEX_OWNER_DIED)) {
- ret = unlock_futex_pi(uaddr, uval, futex64);
+ ret = unlock_futex_pi(uaddr, uval);
if (ret == -EFAULT)
goto pi_faulted;
}
@@ -2002,7 +1989,7 @@ pi_faulted:
spin_unlock(&hb->lock);
up_read(¤t->mm->mmap_sem);
- ret = futex_get_user(&uval, uaddr, futex64);
+ ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT))
goto retry;
@@ -2323,92 +2310,70 @@ void exit_robust_list(struct task_struct
long do_futex(unsigned long __user *uaddr, int op, unsigned long val,
ktime_t *timeout, unsigned long __user *uaddr2,
- unsigned long val2, unsigned long val3, int fut64)
+ unsigned long val2, unsigned long val3)
{
int ret;
switch (op) {
case FUTEX_WAIT:
- ret = futex_wait(uaddr, val, timeout, fut64);
+ ret = futex_wait(uaddr, val, timeout, 0);
break;
case FUTEX_WAKE:
ret = futex_wake(uaddr, val);
break;
case FUTEX_FD:
- if (fut64)
- ret = -ENOSYS;
- else
- /* non-zero val means F_SETOWN(getpid())&F_SETSIG(val) */
- ret = futex_fd((u32 __user *)uaddr, val);
+ /* non-zero val means F_SETOWN(getpid())&F_SETSIG(val) */
+ ret = futex_fd((u32 __user *)uaddr, val);
break;
case FUTEX_REQUEUE:
- ret = futex_requeue(uaddr, uaddr2, val, val2, NULL, fut64);
+ ret = futex_requeue(uaddr, uaddr2, val, val2, NULL, 0);
break;
case FUTEX_CMP_REQUEUE:
- ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, fut64);
+ ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, 0);
break;
case FUTEX_WAKE_OP:
- ret = futex_wake_op(uaddr, uaddr2, val, val2, val3, fut64);
+ ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
break;
case FUTEX_LOCK_PI:
- ret = futex_lock_pi(uaddr, val, timeout, 0, fut64);
+ ret = futex_lock_pi((u32 __user *)uaddr, val, timeout, 0);
break;
case FUTEX_UNLOCK_PI:
- ret = futex_unlock_pi(uaddr, fut64);
+ ret = futex_unlock_pi((u32 __user *)uaddr);
break;
case FUTEX_TRYLOCK_PI:
- ret = futex_lock_pi(uaddr, 0, timeout, 1, fut64);
+ ret = futex_lock_pi((u32 __user *)uaddr, 0, timeout, 1);
break;
case FUTEX_CMP_REQUEUE_PI:
- ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3, fut64);
+ ret = futex_requeue_pi((u32 __user *)uaddr,
+ (u32 __user *)uaddr2,
+ val, val2, &val3);
+ break;
+#ifdef CONFIG_64BIT
+ case FUTEX_WAIT64:
+ ret = futex_wait(uaddr, val, timeout, 1);
+ break;
+ case FUTEX_CMP_REQUEUE64:
+ ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, 1);
break;
+#endif
default:
ret = -ENOSYS;
}
return ret;
}
-#ifdef CONFIG_64BIT
-
-asmlinkage long
-sys_futex64(u64 __user *uaddr, int op, u64 val,
- struct timespec __user *utime, u64 __user *uaddr2, u64 val3)
-{
- struct timespec ts;
- ktime_t t, *tp = NULL;
- u64 val2 = 0;
-
- if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
- if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
- return -EFAULT;
- if (!timespec_valid(&ts))
- return -EINVAL;
-
- t = timespec_to_ktime(ts);
- if (op == FUTEX_WAIT)
- t = ktime_add(ktime_get(), t);
- tp = &t;
- }
- /*
- * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
- */
- if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
- || op == FUTEX_CMP_REQUEUE_PI)
- val2 = (unsigned long) utime;
-
- return do_futex((unsigned long __user*)uaddr, op, val, tp,
- (unsigned long __user*)uaddr2, val2, val3, 1);
-}
-
-#endif
-
asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
struct timespec __user *utime, u32 __user *uaddr2,
u32 val3)
{
struct timespec ts;
ktime_t t, *tp = NULL;
- u32 val2 = 0;
+ unsigned long val2 = 0;
+ int op2 = op;
+
+#ifdef CONFIG_64BIT
+ op &= ~FUTEX_64BIT;
+#endif
if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
@@ -2426,10 +2391,10 @@ asmlinkage long sys_futex(u32 __user *ua
*/
if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
|| op == FUTEX_CMP_REQUEUE_PI)
- val2 = (u32) (unsigned long) utime;
+ val2 = (unsigned long) utime;
- return do_futex((unsigned long __user*)uaddr, op, val, tp,
- (unsigned long __user*)uaddr2, val2, val3, 0);
+ return do_futex((unsigned long __user*)uaddr, op2, val, tp,
+ (unsigned long __user*)uaddr2, val2, val3);
}
static int futexfs_get_sb(struct file_system_type *fs_type,
Index: linux-2.6.21-rc6-mm2/kernel/futex_compat.c
===================================================================
--- linux-2.6.21-rc6-mm2.orig/kernel/futex_compat.c 2007-04-20 14:01:24.000000000 +0200
+++ linux-2.6.21-rc6-mm2/kernel/futex_compat.c 2007-04-20 13:41:57.000000000 +0200
@@ -161,5 +161,5 @@ asmlinkage long compat_sys_futex(u32 __u
val2 = (int) (unsigned long) utime;
return do_futex((unsigned long __user*)uaddr, op, val, tp,
- (unsigned long __user*)uaddr2, val2, val3, 0);
+ (unsigned long __user*)uaddr2, val2, val3);
}
Index: linux-2.6.21-rc6-mm2/kernel/sys_ni.c
===================================================================
--- linux-2.6.21-rc6-mm2.orig/kernel/sys_ni.c 2007-04-20 14:01:24.000000000 +0200
+++ linux-2.6.21-rc6-mm2/kernel/sys_ni.c 2007-04-20 13:41:54.000000000 +0200
@@ -41,7 +41,6 @@ cond_syscall(sys_sendmsg);
cond_syscall(sys_recvmsg);
cond_syscall(sys_socketcall);
cond_syscall(sys_futex);
-cond_syscall(sys_futex64);
cond_syscall(compat_sys_futex);
cond_syscall(sys_set_robust_list);
cond_syscall(compat_sys_set_robust_list);
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH -mm] 64bit-futex - provide new commands instead of new syscall
2007-04-23 14:35 ` [PATCH -mm] 64bit-futex - provide new commands instead of new syscall Pierre Peiffer
@ 2007-04-23 15:30 ` Ulrich Drepper
2007-04-24 8:07 ` [PATCH -mm take2] " Pierre Peiffer
0 siblings, 1 reply; 12+ messages in thread
From: Ulrich Drepper @ 2007-04-23 15:30 UTC (permalink / raw)
To: Pierre Peiffer
Cc: Jakub Jelinek, akpm, mingo, drepper, linux-kernel, jean-pierre.dion
On 4/23/07, Pierre Peiffer <pierre.peiffer@bull.net> wrote:
> Following this mail sent few weeks ago, here is a patch which should meet your
> requirements. [...]
It looks mostly good. I wouldn't use the high bit to differentiate
the 64-bit operations, though. Since we do not allow to apply it to
all operations the only effect will be that the compiler has a harder
time generating the code for the switch statement. If you use
continuous values a simple jump table can be used and no conditionals.
Smaller and faster.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH -mm take2] 64bit-futex - provide new commands instead of new syscall
2007-04-23 15:30 ` Ulrich Drepper
@ 2007-04-24 8:07 ` Pierre Peiffer
2007-04-24 13:25 ` Ulrich Drepper
0 siblings, 1 reply; 12+ messages in thread
From: Pierre Peiffer @ 2007-04-24 8:07 UTC (permalink / raw)
To: Ulrich Drepper
Cc: Jakub Jelinek, akpm, mingo, drepper, linux-kernel, jean-pierre.dion
[-- Attachment #1: Type: text/plain, Size: 505 bytes --]
Ulrich Drepper a écrit :
>
> It looks mostly good. I wouldn't use the high bit to differentiate
> the 64-bit operations, though. Since we do not allow to apply it to
> all operations the only effect will be that the compiler has a harder
> time generating the code for the switch statement. If you use
> continuous values a simple jump table can be used and no conditionals.
> Smaller and faster.
>
Something like that may be...
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
--
Pierre
[-- Attachment #2: futex-64bits-command.patch --]
[-- Type: text/x-patch, Size: 22911 bytes --]
---
include/asm-ia64/futex.h | 8 -
include/asm-powerpc/futex.h | 6 -
include/asm-s390/futex.h | 8 -
include/asm-sparc64/futex.h | 8 -
include/asm-um/futex.h | 9 -
include/asm-x86_64/futex.h | 86 ------------------
include/asm-x86_64/unistd.h | 2
include/linux/futex.h | 6 +
include/linux/syscalls.h | 3
kernel/futex.c | 203 ++++++++++++++++++--------------------------
kernel/futex_compat.c | 2
kernel/sys_ni.c | 1
12 files changed, 95 insertions(+), 247 deletions(-)
Index: b/include/asm-ia64/futex.h
===================================================================
--- a/include/asm-ia64/futex.h
+++ b/include/asm-ia64/futex.h
@@ -124,13 +124,7 @@ futex_atomic_cmpxchg_inatomic(int __user
static inline u64
futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
{
- return 0;
-}
-
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
- return 0;
+ return -ENOSYS;
}
#endif /* _ASM_FUTEX_H */
Index: b/include/asm-powerpc/futex.h
===================================================================
--- a/include/asm-powerpc/futex.h
+++ b/include/asm-powerpc/futex.h
@@ -119,11 +119,5 @@ futex_atomic_cmpxchg_inatomic64(u64 __us
return 0;
}
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
- return 0;
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_FUTEX_H */
Index: b/include/asm-s390/futex.h
===================================================================
--- a/include/asm-s390/futex.h
+++ b/include/asm-s390/futex.h
@@ -51,13 +51,7 @@ static inline int futex_atomic_cmpxchg_i
static inline u64
futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
{
- return 0;
-}
-
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
- return 0;
+ return -ENOSYS;
}
#endif /* __KERNEL__ */
Index: b/include/asm-sparc64/futex.h
===================================================================
--- a/include/asm-sparc64/futex.h
+++ b/include/asm-sparc64/futex.h
@@ -108,13 +108,7 @@ futex_atomic_cmpxchg_inatomic(int __user
static inline u64
futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
{
- return 0;
-}
-
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
- return 0;
+ return -ENOSYS;
}
#endif /* !(_SPARC64_FUTEX_H) */
Index: b/include/asm-um/futex.h
===================================================================
--- a/include/asm-um/futex.h
+++ b/include/asm-um/futex.h
@@ -6,14 +6,7 @@
static inline u64
futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
{
- return 0;
+ return -ENOSYS;
}
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
- return 0;
-}
-
-
#endif
Index: b/include/asm-x86_64/futex.h
===================================================================
--- a/include/asm-x86_64/futex.h
+++ b/include/asm-x86_64/futex.h
@@ -41,38 +41,6 @@
"=&r" (tem) \
: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
-#define __futex_atomic_op1_64(insn, ret, oldval, uaddr, oparg) \
- __asm__ __volatile ( \
-"1: " insn "\n" \
-"2: .section .fixup,\"ax\"\n\
-3: movq %3, %1\n\
- jmp 2b\n\
- .previous\n\
- .section __ex_table,\"a\"\n\
- .align 8\n\
- .quad 1b,3b\n\
- .previous" \
- : "=r" (oldval), "=r" (ret), "=m" (*uaddr) \
- : "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0))
-
-#define __futex_atomic_op2_64(insn, ret, oldval, uaddr, oparg) \
- __asm__ __volatile ( \
-"1: movq %2, %0\n\
- movq %0, %3\n" \
- insn "\n" \
-"2: " LOCK_PREFIX "cmpxchgq %3, %2\n\
- jnz 1b\n\
-3: .section .fixup,\"ax\"\n\
-4: movq %5, %1\n\
- jmp 3b\n\
- .previous\n\
- .section __ex_table,\"a\"\n\
- .align 8\n\
- .quad 1b,4b,2b,4b\n\
- .previous" \
- : "=&a" (oldval), "=&r" (ret), "=m" (*uaddr), \
- "=&r" (tem) \
- : "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
static inline int
futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
@@ -128,60 +96,6 @@ futex_atomic_op_inuser (int encoded_op,
}
static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- u64 oparg = (encoded_op << 8) >> 20;
- u64 cmparg = (encoded_op << 20) >> 20;
- u64 oldval = 0, ret, tem;
-
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u64)))
- return -EFAULT;
-
- inc_preempt_count();
-
- switch (op) {
- case FUTEX_OP_SET:
- __futex_atomic_op1_64("xchgq %0, %2", ret, oldval, uaddr, oparg);
- break;
- case FUTEX_OP_ADD:
- __futex_atomic_op1_64(LOCK_PREFIX "xaddq %0, %2", ret, oldval,
- uaddr, oparg);
- break;
- case FUTEX_OP_OR:
- __futex_atomic_op2_64("orq %4, %3", ret, oldval, uaddr, oparg);
- break;
- case FUTEX_OP_ANDN:
- __futex_atomic_op2_64("andq %4, %3", ret, oldval, uaddr, ~oparg);
- break;
- case FUTEX_OP_XOR:
- __futex_atomic_op2_64("xorq %4, %3", ret, oldval, uaddr, oparg);
- break;
- default:
- ret = -ENOSYS;
- }
-
- dec_preempt_count();
-
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
- return ret;
-}
-
-static inline int
futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
{
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
Index: b/include/asm-x86_64/unistd.h
===================================================================
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -619,8 +619,6 @@ __SYSCALL(__NR_sync_file_range, sys_sync
__SYSCALL(__NR_vmsplice, sys_vmsplice)
#define __NR_move_pages 279
__SYSCALL(__NR_move_pages, sys_move_pages)
-#define __NR_futex64 280
-__SYSCALL(__NR_futex64, sys_futex64)
#define __NR_signalfd 281
__SYSCALL(__NR_signalfd, sys_signalfd)
#define __NR_timerfd 282
Index: b/include/linux/futex.h
===================================================================
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -18,6 +18,10 @@ union ktime;
#define FUTEX_UNLOCK_PI 7
#define FUTEX_TRYLOCK_PI 8
#define FUTEX_CMP_REQUEUE_PI 9
+#ifdef CONFIG_64BIT
+#define FUTEX_WAIT64 10
+#define FUTEX_CMP_REQUEUE64 11
+#endif
/*
* Support for robust futexes: the kernel cleans up held futexes at
@@ -104,7 +108,7 @@ struct robust_list_head {
#ifdef __KERNEL__
long do_futex(unsigned long __user *uaddr, int op, unsigned long val,
union ktime *timeout, unsigned long __user *uaddr2,
- unsigned long val2, unsigned long val3, int futex64);
+ unsigned long val2, unsigned long val3);
extern int
handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
Index: b/include/linux/syscalls.h
===================================================================
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -178,9 +178,6 @@ asmlinkage long sys_set_tid_address(int
asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
struct timespec __user *utime, u32 __user *uaddr2,
u32 val3);
-asmlinkage long sys_futex64(u64 __user *uaddr, int op, u64 val,
- struct timespec __user *utime, u64 __user *uaddr2,
- u64 val3);
asmlinkage long sys_init_module(void __user *umod, unsigned long len,
const char __user *uargs);
Index: b/kernel/futex.c
===================================================================
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -62,20 +62,6 @@
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
#ifdef CONFIG_64BIT
-static inline unsigned long
-futex_cmpxchg_inatomic(unsigned long __user *uaddr, unsigned long oldval,
- unsigned long newval, int futex64)
-{
- if (futex64)
- return futex_atomic_cmpxchg_inatomic64((u64 __user *)uaddr,
- oldval, newval);
- else {
- u32 ov = oldval, nv = newval;
- return futex_atomic_cmpxchg_inatomic((int __user *)uaddr, ov,
- nv);
- }
-}
-
static inline int
futex_get_user(unsigned long *val, unsigned long __user *uaddr, int futex64)
{
@@ -92,11 +78,7 @@ futex_get_user(unsigned long *val, unsig
}
#else
-#define futex_cmpxchg_inatomic(uaddr, oldval, newval, futex64) \
- futex_atomic_cmpxchg_inatomic((u32*)uaddr, oldval, newval)
-
#define futex_get_user(val, uaddr, futex64) get_user(*val, uaddr)
-
#endif
/*
@@ -606,12 +588,12 @@ static void wake_futex(struct futex_q *q
q->lock_ptr = NULL;
}
-static int wake_futex_pi(unsigned long __user *uaddr, unsigned long uval,
- struct futex_q *this, int futex64)
+static int wake_futex_pi(u32 __user *uaddr, unsigned long uval,
+ struct futex_q *this)
{
struct task_struct *new_owner;
struct futex_pi_state *pi_state = this->pi_state;
- unsigned long curval, newval;
+ u32 curval, newval;
if (!pi_state)
return -EINVAL;
@@ -639,7 +621,7 @@ static int wake_futex_pi(unsigned long _
newval |= (uval & FUTEX_WAITER_REQUEUED);
pagefault_disable();
- curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
pagefault_enable();
if (curval == -EFAULT)
return -EFAULT;
@@ -664,17 +646,16 @@ static int wake_futex_pi(unsigned long _
return 0;
}
-static int unlock_futex_pi(unsigned long __user *uaddr, unsigned long uval,
- int futex64)
+static int unlock_futex_pi(u32 __user *uaddr, unsigned long uval)
{
- unsigned long oldval;
+ u32 oldval;
/*
* There is no waiter, so we unlock the futex. The owner died
* bit has not to be preserved here. We are the owner:
*/
pagefault_disable();
- oldval = futex_cmpxchg_inatomic(uaddr, uval, 0, futex64);
+ oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
pagefault_enable();
if (oldval == -EFAULT)
@@ -748,19 +729,20 @@ out:
* or create a new one without owner.
*/
static inline int
-lookup_pi_state_for_requeue(unsigned long __user *uaddr,
+lookup_pi_state_for_requeue(u32 __user *uaddr,
struct futex_hash_bucket *hb,
union futex_key *key,
- struct futex_pi_state **pi_state, int futex64)
+ struct futex_pi_state **pi_state)
{
- unsigned long curval, uval, newval;
+ u32 curval, uval, newval;
retry:
/*
* We can't handle a fault cleanly because we can't
* release the locks here. Simply return the fault.
*/
- if (get_futex_value_locked(&curval, uaddr, futex64))
+ if (get_futex_value_locked((unsigned long *)&curval,
+ (unsigned long __user *)uaddr, 0))
return -EFAULT;
/* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
@@ -774,7 +756,7 @@ retry:
newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
pagefault_disable();
- curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
pagefault_enable();
if (unlikely(curval == -EFAULT))
@@ -806,8 +788,8 @@ retry:
* one physical page to another physical page (PI-futex uaddr2)
*/
static int
-futex_requeue_pi(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
- int nr_wake, int nr_requeue, unsigned long *cmpval, int futex64)
+futex_requeue_pi(u32 __user *uaddr1, u32 __user *uaddr2,
+ int nr_wake, int nr_requeue, unsigned long *cmpval)
{
union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2;
@@ -840,9 +822,10 @@ retry:
double_lock_hb(hb1, hb2);
if (likely(cmpval != NULL)) {
- unsigned long curval;
+ u32 curval;
- ret = get_futex_value_locked(&curval, uaddr1, futex64);
+ ret = get_futex_value_locked((unsigned long*)&curval,
+ (unsigned long __user *)uaddr1, 0);
if (unlikely(ret)) {
spin_unlock(&hb1->lock);
@@ -855,7 +838,7 @@ retry:
*/
up_read(¤t->mm->mmap_sem);
- ret = futex_get_user(&curval, uaddr1, futex64);
+ ret = get_user(curval, uaddr1);
if (!ret)
goto retry;
@@ -882,8 +865,7 @@ retry:
int s;
/* do this only the first time we requeue someone */
s = lookup_pi_state_for_requeue(uaddr2, hb2,
- &key2, &pi_state2,
- futex64);
+ &key2, &pi_state2);
if (s) {
ret = s;
goto out_unlock;
@@ -998,7 +980,7 @@ out:
*/
static int
futex_wake_op(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
- int nr_wake, int nr_wake2, int op, int futex64)
+ int nr_wake, int nr_wake2, int op)
{
union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2;
@@ -1022,16 +1004,10 @@ retryfull:
retry:
double_lock_hb(hb1, hb2);
-#ifdef CONFIG_64BIT
- if (futex64)
- op_ret = futex_atomic_op_inuser64(op, (u64 __user *)uaddr2);
- else
- op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
-#else
op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
-#endif
+
if (unlikely(op_ret < 0)) {
- unsigned long dummy;
+ u32 dummy;
spin_unlock(&hb1->lock);
if (hb1 != hb2)
@@ -1073,7 +1049,7 @@ retry:
*/
up_read(¤t->mm->mmap_sem);
- ret = futex_get_user(&dummy, uaddr2, futex64);
+ ret = get_user(dummy, uaddr2);
if (ret)
return ret;
@@ -1379,8 +1355,18 @@ static int fixup_pi_state_owner(unsigned
while (!ret) {
newval = (uval & FUTEX_OWNER_DIED) | newtid;
newval |= (uval & FUTEX_WAITER_REQUEUED);
- curval = futex_cmpxchg_inatomic(uaddr,uval,
- newval, futex64);
+#ifdef CONFIG_64BIT
+ if (futex64)
+ curval = futex_atomic_cmpxchg_inatomic64(
+ (u64 __user *)uaddr,
+ uval, newval);
+ else
+#endif
+ curval = futex_atomic_cmpxchg_inatomic(
+ (u32 __user *)uaddr,
+ (u32)uval,
+ (u32)newval);
+
if (curval == -EFAULT)
ret = -EFAULT;
if (curval == uval)
@@ -1673,13 +1659,13 @@ static void set_pi_futex_owner(struct fu
* if there are waiters then it will block, it does PI, etc. (Due to
* races the kernel might see a 0 value of the futex too.)
*/
-static int futex_lock_pi(unsigned long __user *uaddr, int detect, ktime_t *time,
- int trylock, int futex64)
+static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
+ int trylock)
{
struct hrtimer_sleeper timeout, *to = NULL;
struct task_struct *curr = current;
struct futex_hash_bucket *hb;
- unsigned long uval, newval, curval;
+ u32 uval, newval, curval;
struct futex_q q;
int ret, lock_held, attempt = 0;
@@ -1714,7 +1700,7 @@ static int futex_lock_pi(unsigned long _
newval = current->pid;
pagefault_disable();
- curval = futex_cmpxchg_inatomic(uaddr, 0, newval, futex64);
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
pagefault_enable();
if (unlikely(curval == -EFAULT))
@@ -1759,7 +1745,7 @@ static int futex_lock_pi(unsigned long _
newval = curval | FUTEX_WAITERS;
pagefault_disable();
- curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
pagefault_enable();
if (unlikely(curval == -EFAULT))
@@ -1796,8 +1782,8 @@ static int futex_lock_pi(unsigned long _
FUTEX_OWNER_DIED | FUTEX_WAITERS;
pagefault_disable();
- curval = futex_cmpxchg_inatomic(uaddr, uval,
- newval, futex64);
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval,
+ newval);
pagefault_enable();
if (unlikely(curval == -EFAULT))
@@ -1841,7 +1827,8 @@ static int futex_lock_pi(unsigned long _
*/
if (!ret && q.pi_state->owner != curr)
/* mmap_sem is unlocked at return of this function */
- ret = fixup_pi_state_owner(uaddr, &q, hb, curr, futex64);
+ ret = fixup_pi_state_owner((unsigned long __user *)uaddr,
+ &q, hb, curr, 0);
else {
/*
* Catch the rare case, where the lock was released
@@ -1887,7 +1874,7 @@ static int futex_lock_pi(unsigned long _
queue_unlock(&q, hb);
up_read(&curr->mm->mmap_sem);
- ret = futex_get_user(&uval, uaddr, futex64);
+ ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT))
goto retry;
@@ -1899,17 +1886,17 @@ static int futex_lock_pi(unsigned long _
* This is the in-kernel slowpath: we look up the PI state (if any),
* and do the rt-mutex unlock.
*/
-static int futex_unlock_pi(unsigned long __user *uaddr, int futex64)
+static int futex_unlock_pi(u32 __user *uaddr)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
- unsigned long uval;
+ u32 uval;
struct plist_head *head;
union futex_key key;
int ret, attempt = 0;
retry:
- if (futex_get_user(&uval, uaddr, futex64))
+ if (get_user(uval, uaddr))
return -EFAULT;
/*
* We release only a lock we actually own:
@@ -1936,7 +1923,7 @@ retry_locked:
*/
if (!(uval & FUTEX_OWNER_DIED)) {
pagefault_disable();
- uval = futex_cmpxchg_inatomic(uaddr, current->pid, 0, futex64);
+ uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
pagefault_enable();
}
@@ -1958,7 +1945,7 @@ retry_locked:
plist_for_each_entry_safe(this, next, head, list) {
if (!match_futex (&this->key, &key))
continue;
- ret = wake_futex_pi(uaddr, uval, this, futex64);
+ ret = wake_futex_pi(uaddr, uval, this);
/*
* The atomic access to the futex value
* generated a pagefault, so retry the
@@ -1972,7 +1959,7 @@ retry_locked:
* No waiters - kernel unlocks the futex:
*/
if (!(uval & FUTEX_OWNER_DIED)) {
- ret = unlock_futex_pi(uaddr, uval, futex64);
+ ret = unlock_futex_pi(uaddr, uval);
if (ret == -EFAULT)
goto pi_faulted;
}
@@ -2002,7 +1989,7 @@ pi_faulted:
spin_unlock(&hb->lock);
up_read(¤t->mm->mmap_sem);
- ret = futex_get_user(&uval, uaddr, futex64);
+ ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT))
goto retry;
@@ -2323,94 +2310,71 @@ void exit_robust_list(struct task_struct
long do_futex(unsigned long __user *uaddr, int op, unsigned long val,
ktime_t *timeout, unsigned long __user *uaddr2,
- unsigned long val2, unsigned long val3, int fut64)
+ unsigned long val2, unsigned long val3)
{
int ret;
switch (op) {
case FUTEX_WAIT:
- ret = futex_wait(uaddr, val, timeout, fut64);
+ ret = futex_wait(uaddr, val, timeout, 0);
break;
case FUTEX_WAKE:
ret = futex_wake(uaddr, val);
break;
case FUTEX_FD:
- if (fut64)
- ret = -ENOSYS;
- else
- /* non-zero val means F_SETOWN(getpid())&F_SETSIG(val) */
- ret = futex_fd((u32 __user *)uaddr, val);
+ /* non-zero val means F_SETOWN(getpid())&F_SETSIG(val) */
+ ret = futex_fd((u32 __user *)uaddr, val);
break;
case FUTEX_REQUEUE:
- ret = futex_requeue(uaddr, uaddr2, val, val2, NULL, fut64);
+ ret = futex_requeue(uaddr, uaddr2, val, val2, NULL, 0);
break;
case FUTEX_CMP_REQUEUE:
- ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, fut64);
+ ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, 0);
break;
case FUTEX_WAKE_OP:
- ret = futex_wake_op(uaddr, uaddr2, val, val2, val3, fut64);
+ ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
break;
case FUTEX_LOCK_PI:
- ret = futex_lock_pi(uaddr, val, timeout, 0, fut64);
+ ret = futex_lock_pi((u32 __user *)uaddr, val, timeout, 0);
break;
case FUTEX_UNLOCK_PI:
- ret = futex_unlock_pi(uaddr, fut64);
+ ret = futex_unlock_pi((u32 __user *)uaddr);
break;
case FUTEX_TRYLOCK_PI:
- ret = futex_lock_pi(uaddr, 0, timeout, 1, fut64);
+ ret = futex_lock_pi((u32 __user *)uaddr, 0, timeout, 1);
break;
case FUTEX_CMP_REQUEUE_PI:
- ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3, fut64);
+ ret = futex_requeue_pi((u32 __user *)uaddr,
+ (u32 __user *)uaddr2,
+ val, val2, &val3);
+ break;
+#ifdef CONFIG_64BIT
+ case FUTEX_WAIT64:
+ ret = futex_wait(uaddr, val, timeout, 1);
+ break;
+ case FUTEX_CMP_REQUEUE64:
+ ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, 1);
break;
+#endif
default:
ret = -ENOSYS;
}
return ret;
}
-#ifdef CONFIG_64BIT
-
-asmlinkage long
-sys_futex64(u64 __user *uaddr, int op, u64 val,
- struct timespec __user *utime, u64 __user *uaddr2, u64 val3)
-{
- struct timespec ts;
- ktime_t t, *tp = NULL;
- u64 val2 = 0;
-
- if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
- if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
- return -EFAULT;
- if (!timespec_valid(&ts))
- return -EINVAL;
-
- t = timespec_to_ktime(ts);
- if (op == FUTEX_WAIT)
- t = ktime_add(ktime_get(), t);
- tp = &t;
- }
- /*
- * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
- */
- if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
- || op == FUTEX_CMP_REQUEUE_PI)
- val2 = (unsigned long) utime;
-
- return do_futex((unsigned long __user*)uaddr, op, val, tp,
- (unsigned long __user*)uaddr2, val2, val3, 1);
-}
-
-#endif
-
asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
struct timespec __user *utime, u32 __user *uaddr2,
u32 val3)
{
struct timespec ts;
ktime_t t, *tp = NULL;
- u32 val2 = 0;
+ unsigned long val2 = 0;
- if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
+ if (utime && (op == FUTEX_WAIT
+#ifdef CONFIG_64BIT
+ || op == FUTEX_WAIT64
+#endif
+ || op == FUTEX_LOCK_PI)) {
if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
return -EFAULT;
if (!timespec_valid(&ts))
@@ -2425,11 +2389,14 @@ asmlinkage long sys_futex(u32 __user *ua
* requeue parameter in 'utime' if op == FUTEX_REQUEUE.
*/
if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
+#ifdef CONFIG_64BIT
+ || op == FUTEX_CMP_REQUEUE64
+#endif
|| op == FUTEX_CMP_REQUEUE_PI)
- val2 = (u32) (unsigned long) utime;
+ val2 = (unsigned long) utime;
return do_futex((unsigned long __user*)uaddr, op, val, tp,
- (unsigned long __user*)uaddr2, val2, val3, 0);
+ (unsigned long __user*)uaddr2, val2, val3);
}
static int futexfs_get_sb(struct file_system_type *fs_type,
Index: b/kernel/futex_compat.c
===================================================================
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -161,5 +161,5 @@ asmlinkage long compat_sys_futex(u32 __u
val2 = (int) (unsigned long) utime;
return do_futex((unsigned long __user*)uaddr, op, val, tp,
- (unsigned long __user*)uaddr2, val2, val3, 0);
+ (unsigned long __user*)uaddr2, val2, val3);
}
Index: b/kernel/sys_ni.c
===================================================================
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -41,7 +41,6 @@ cond_syscall(sys_sendmsg);
cond_syscall(sys_recvmsg);
cond_syscall(sys_socketcall);
cond_syscall(sys_futex);
-cond_syscall(sys_futex64);
cond_syscall(compat_sys_futex);
cond_syscall(sys_set_robust_list);
cond_syscall(compat_sys_set_robust_list);
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH -mm take2] 64bit-futex - provide new commands instead of new syscall
2007-04-24 8:07 ` [PATCH -mm take2] " Pierre Peiffer
@ 2007-04-24 13:25 ` Ulrich Drepper
0 siblings, 0 replies; 12+ messages in thread
From: Ulrich Drepper @ 2007-04-24 13:25 UTC (permalink / raw)
To: Pierre Peiffer
Cc: Jakub Jelinek, akpm, mingo, drepper, linux-kernel, jean-pierre.dion
On 4/24/07, Pierre Peiffer <pierre.peiffer@bull.net> wrote:
> Something like that may be...
Yep, looks goot to me.
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2007-04-24 13:25 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-03-21 9:54 [PATCH 2.6.21-rc4-mm1 0/4] Futexes functionalities and improvements Pierre.Peiffer
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 1/4] futex priority based wakeup Pierre.Peiffer
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 2/4] Make futex_wait() use an hrtimer for timeout Pierre.Peiffer
2007-03-26 9:57 ` Andrew Morton
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 3/4] futex_requeue_pi optimization Pierre.Peiffer
2007-03-21 9:54 ` [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes Pierre.Peiffer
2007-03-26 11:20 ` Andrew Morton
2007-03-27 11:07 ` Jakub Jelinek
2007-04-23 14:35 ` [PATCH -mm] 64bit-futex - provide new commands instead of new syscall Pierre Peiffer
2007-04-23 15:30 ` Ulrich Drepper
2007-04-24 8:07 ` [PATCH -mm take2] " Pierre Peiffer
2007-04-24 13:25 ` Ulrich Drepper
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).