LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 2.6.21-rc4-mm1 0/4] Futexes functionalities and improvements
@ 2007-03-21  9:54 Pierre.Peiffer
  2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 1/4] futex priority based wakeup Pierre.Peiffer
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Pierre.Peiffer @ 2007-03-21  9:54 UTC (permalink / raw)
  To: akpm; +Cc: mingo, drepper, linux-kernel, jean-pierre.dion

Hi Andrew,

	This is a re-send of a series of patches concerning futexes (here
after is a short description).
	I have reworked the patches to take into account the last changes
about futex, and this series should apply cleanly on -mm tree (the changes
mostly affect patch 2 "futex_wait uses hrtimer")
	I also took into account the remark of Peter Zijlstra in patch 3 
concerning futex_requeue_pi.

	Could you consider (again) them for inclusion in -mm tree ?

	All of them have already been discussed in January and have already 
been included in -rt for a while. I think that we agreed to potentially 
include them in the -mm tree.

	And, again, Ulrich is specially interested by sys_futex64.

There are:
    * futex uses prio list : allows RT-threads to be woken in priority order
instead of FIFO order.
    * futex_wait uses hrtimer : allows the use of finer timer resolution.
    * futex_requeue_pi functionality : allows use of requeue optimization for
PI-mutexes/PI-futexes.
    * futex64 syscall : allows use of 64-bit futexes instead of 32-bit. 


Thanks,


-- 
Pierre P.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2.6.21-rc4-mm1 1/4] futex priority based wakeup
  2007-03-21  9:54 [PATCH 2.6.21-rc4-mm1 0/4] Futexes functionalities and improvements Pierre.Peiffer
@ 2007-03-21  9:54 ` Pierre.Peiffer
  2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 2/4] Make futex_wait() use an hrtimer for timeout Pierre.Peiffer
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Pierre.Peiffer @ 2007-03-21  9:54 UTC (permalink / raw)
  To: akpm
  Cc: mingo, drepper, linux-kernel, jean-pierre.dion, Sebastien Dugue,
	Pierre Peiffer

[-- Attachment #1: futex-use-prio-list.diff --]
[-- Type: text/plain, Size: 7985 bytes --]

Today, all threads waiting for a given futex are woken in FIFO order (first
waiter woken first) instead of priority order.

This patch makes use of plist (pirotity ordered lists) instead of simple list in
futex_hash_bucket.

All non-RT threads are stored with priority MAX_RT_PRIO, causing them to be
woken last, in FIFO order (RT-threads are woken first, in priority order).

Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net>
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>

---
 kernel/futex.c |   78 +++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 49 insertions(+), 29 deletions(-)

Index: b/kernel/futex.c
===================================================================
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -81,12 +81,12 @@ struct futex_pi_state {
  * we can wake only the relevant ones (hashed queues may be shared).
  *
  * A futex_q has a woken state, just like tasks have TASK_RUNNING.
- * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0.
+ * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
  * The order of wakup is always to make the first condition true, then
  * wake up q->waiters, then make the second condition true.
  */
 struct futex_q {
-	struct list_head list;
+	struct plist_node list;
 	wait_queue_head_t waiters;
 
 	/* Which hash list lock to use: */
@@ -108,8 +108,8 @@ struct futex_q {
  * Split the global futex_lock into every hash list lock.
  */
 struct futex_hash_bucket {
-       spinlock_t              lock;
-       struct list_head       chain;
+	spinlock_t lock;
+	struct plist_head chain;
 };
 
 static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
@@ -443,13 +443,13 @@ lookup_pi_state(u32 uval, struct futex_h
 {
 	struct futex_pi_state *pi_state = NULL;
 	struct futex_q *this, *next;
-	struct list_head *head;
+	struct plist_head *head;
 	struct task_struct *p;
 	pid_t pid;
 
 	head = &hb->chain;
 
-	list_for_each_entry_safe(this, next, head, list) {
+	plist_for_each_entry_safe(this, next, head, list) {
 		if (match_futex(&this->key, &me->key)) {
 			/*
 			 * Another waiter already exists - bump up
@@ -513,12 +513,12 @@ lookup_pi_state(u32 uval, struct futex_h
  */
 static void wake_futex(struct futex_q *q)
 {
-	list_del_init(&q->list);
+	plist_del(&q->list, &q->list.plist);
 	if (q->filp)
 		send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
 	/*
 	 * The lock in wake_up_all() is a crucial memory barrier after the
-	 * list_del_init() and also before assigning to q->lock_ptr.
+	 * plist_del() and also before assigning to q->lock_ptr.
 	 */
 	wake_up_all(&q->waiters);
 	/*
@@ -633,7 +633,7 @@ static int futex_wake(u32 __user *uaddr,
 {
 	struct futex_hash_bucket *hb;
 	struct futex_q *this, *next;
-	struct list_head *head;
+	struct plist_head *head;
 	union futex_key key;
 	int ret;
 
@@ -647,7 +647,7 @@ static int futex_wake(u32 __user *uaddr,
 	spin_lock(&hb->lock);
 	head = &hb->chain;
 
-	list_for_each_entry_safe(this, next, head, list) {
+	plist_for_each_entry_safe(this, next, head, list) {
 		if (match_futex (&this->key, &key)) {
 			if (this->pi_state) {
 				ret = -EINVAL;
@@ -675,7 +675,7 @@ futex_wake_op(u32 __user *uaddr1, u32 __
 {
 	union futex_key key1, key2;
 	struct futex_hash_bucket *hb1, *hb2;
-	struct list_head *head;
+	struct plist_head *head;
 	struct futex_q *this, *next;
 	int ret, op_ret, attempt = 0;
 
@@ -748,7 +748,7 @@ retry:
 
 	head = &hb1->chain;
 
-	list_for_each_entry_safe(this, next, head, list) {
+	plist_for_each_entry_safe(this, next, head, list) {
 		if (match_futex (&this->key, &key1)) {
 			wake_futex(this);
 			if (++ret >= nr_wake)
@@ -760,7 +760,7 @@ retry:
 		head = &hb2->chain;
 
 		op_ret = 0;
-		list_for_each_entry_safe(this, next, head, list) {
+		plist_for_each_entry_safe(this, next, head, list) {
 			if (match_futex (&this->key, &key2)) {
 				wake_futex(this);
 				if (++op_ret >= nr_wake2)
@@ -787,7 +787,7 @@ static int futex_requeue(u32 __user *uad
 {
 	union futex_key key1, key2;
 	struct futex_hash_bucket *hb1, *hb2;
-	struct list_head *head1;
+	struct plist_head *head1;
 	struct futex_q *this, *next;
 	int ret, drop_count = 0;
 
@@ -836,7 +836,7 @@ static int futex_requeue(u32 __user *uad
 	}
 
 	head1 = &hb1->chain;
-	list_for_each_entry_safe(this, next, head1, list) {
+	plist_for_each_entry_safe(this, next, head1, list) {
 		if (!match_futex (&this->key, &key1))
 			continue;
 		if (++ret <= nr_wake) {
@@ -847,9 +847,13 @@ static int futex_requeue(u32 __user *uad
 			 * requeue.
 			 */
 			if (likely(head1 != &hb2->chain)) {
-				list_move_tail(&this->list, &hb2->chain);
+				plist_del(&this->list, &hb1->chain);
+				plist_add(&this->list, &hb2->chain);
 				this->lock_ptr = &hb2->lock;
-			}
+#ifdef CONFIG_DEBUG_PI_LIST
+				this->list.plist.lock = &hb2->lock;
+#endif
+ 			}
 			this->key = key2;
 			get_futex_key_refs(&key2);
 			drop_count++;
@@ -894,7 +898,23 @@ queue_lock(struct futex_q *q, int fd, st
 
 static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
 {
-	list_add_tail(&q->list, &hb->chain);
+	int prio;
+
+	/*
+	 * The priority used to register this element is
+	 * - either the real thread-priority for the real-time threads
+	 * (i.e. threads with a priority lower than MAX_RT_PRIO)
+	 * - or MAX_RT_PRIO for non-RT threads.
+	 * Thus, all RT-threads are woken first in priority order, and
+	 * the others are woken last, in FIFO order.
+	 */
+	prio = min(current->normal_prio, MAX_RT_PRIO);
+
+	plist_node_init(&q->list, prio);
+#ifdef CONFIG_DEBUG_PI_LIST
+	q->list.plist.lock = &hb->lock;
+#endif
+	plist_add(&q->list, &hb->chain);
 	q->task = current;
 	spin_unlock(&hb->lock);
 }
@@ -949,8 +969,8 @@ static int unqueue_me(struct futex_q *q)
 			spin_unlock(lock_ptr);
 			goto retry;
 		}
-		WARN_ON(list_empty(&q->list));
-		list_del(&q->list);
+		WARN_ON(plist_node_empty(&q->list));
+		plist_del(&q->list, &q->list.plist);
 
 		BUG_ON(q->pi_state);
 
@@ -968,8 +988,8 @@ static int unqueue_me(struct futex_q *q)
  */
 static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb)
 {
-	WARN_ON(list_empty(&q->list));
-	list_del(&q->list);
+	WARN_ON(plist_node_empty(&q->list));
+	plist_del(&q->list, &q->list.plist);
 
 	BUG_ON(!q->pi_state);
 	free_pi_state(q->pi_state);
@@ -1065,11 +1085,11 @@ static int futex_wait_abstime(u32 __user
 	__set_current_state(TASK_INTERRUPTIBLE);
 	add_wait_queue(&q.waiters, &wait);
 	/*
-	 * !list_empty() is safe here without any lock.
+	 * !plist_node_empty() is safe here without any lock.
 	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
 	 */
 	time_left = 0;
-	if (likely(!list_empty(&q.list))) {
+	if (likely(!plist_node_empty(&q.list))) {
 		unsigned long rel_time;
 
 		if (timed) {
@@ -1384,7 +1404,7 @@ static int futex_unlock_pi(u32 __user *u
 	struct futex_hash_bucket *hb;
 	struct futex_q *this, *next;
 	u32 uval;
-	struct list_head *head;
+	struct plist_head *head;
 	union futex_key key;
 	int ret, attempt = 0;
 
@@ -1435,7 +1455,7 @@ retry_locked:
 	 */
 	head = &hb->chain;
 
-	list_for_each_entry_safe(this, next, head, list) {
+	plist_for_each_entry_safe(this, next, head, list) {
 		if (!match_futex (&this->key, &key))
 			continue;
 		ret = wake_futex_pi(uaddr, uval, this);
@@ -1509,10 +1529,10 @@ static unsigned int futex_poll(struct fi
 	poll_wait(filp, &q->waiters, wait);
 
 	/*
-	 * list_empty() is safe here without any lock.
+	 * plist_node_empty() is safe here without any lock.
 	 * q->lock_ptr != 0 is not safe, because of ordering against wakeup.
 	 */
-	if (list_empty(&q->list))
+	if (plist_node_empty(&q->list))
 		ret = POLLIN | POLLRDNORM;
 
 	return ret;
@@ -1895,7 +1915,7 @@ static int __init init(void)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
-		INIT_LIST_HEAD(&futex_queues[i].chain);
+		plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
 		spin_lock_init(&futex_queues[i].lock);
 	}
 	return 0;

-- 
Pierre Peiffer

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2.6.21-rc4-mm1 2/4] Make futex_wait() use an hrtimer for timeout
  2007-03-21  9:54 [PATCH 2.6.21-rc4-mm1 0/4] Futexes functionalities and improvements Pierre.Peiffer
  2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 1/4] futex priority based wakeup Pierre.Peiffer
@ 2007-03-21  9:54 ` Pierre.Peiffer
  2007-03-26  9:57   ` Andrew Morton
  2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 3/4] futex_requeue_pi optimization Pierre.Peiffer
  2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes Pierre.Peiffer
  3 siblings, 1 reply; 12+ messages in thread
From: Pierre.Peiffer @ 2007-03-21  9:54 UTC (permalink / raw)
  To: akpm
  Cc: mingo, drepper, linux-kernel, jean-pierre.dion, Sebastien Dugue,
	Pierre Peiffer

[-- Attachment #1: futex_wait-use-hrtimer.diff --]
[-- Type: text/plain, Size: 8977 bytes --]

This patch modifies futex_wait() to use an hrtimer + schedule() in place of
schedule_timeout().

  schedule_timeout() is tick based, therefore the timeout granularity is
the tick (1 ms, 4 ms or 10 ms depending on HZ). By using a high resolution
timer for timeout wakeup, we can attain a much finer timeout granularity
(in the microsecond range). This parallels what is already done for
futex_lock_pi().

  The timeout passed to the syscall is no longer converted to jiffies
and is therefore passed to do_futex() and futex_wait() as an absolute
ktime_t therefore keeping nanosecond resolution.

  Also this removes the need to pass the nanoseconds timeout part to
futex_lock_pi() in val2.

  In futex_wait(), if there is no timeout then a regular schedule() is
performed. Otherwise, an hrtimer is fired before schedule() is called.

Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net>
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>

---
 include/linux/futex.h |    3 +
 kernel/futex.c        |   85 ++++++++++++++++++++++++--------------------------
 kernel/futex_compat.c |   17 ++++------
 3 files changed, 51 insertions(+), 54 deletions(-)

Index: b/kernel/futex.c
===================================================================
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1001,16 +1001,16 @@ static void unqueue_me_pi(struct futex_q
 }
 
 static long futex_wait_restart(struct restart_block *restart);
-static int futex_wait_abstime(u32 __user *uaddr, u32 val,
-			int timed, unsigned long abs_time)
+static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
 {
 	struct task_struct *curr = current;
 	DECLARE_WAITQUEUE(wait, curr);
 	struct futex_hash_bucket *hb;
 	struct futex_q q;
-	unsigned long time_left = 0;
 	u32 uval;
 	int ret;
+	struct hrtimer_sleeper t;
+	int rem = 0;
 
 	q.pi_state = NULL;
  retry:
@@ -1088,20 +1088,29 @@ static int futex_wait_abstime(u32 __user
 	 * !plist_node_empty() is safe here without any lock.
 	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
 	 */
-	time_left = 0;
 	if (likely(!plist_node_empty(&q.list))) {
-		unsigned long rel_time;
+		if (!abs_time)
+			schedule();
+		else {
+			hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+			hrtimer_init_sleeper(&t, current);
+			t.timer.expires = *abs_time;
+
+			hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS);
+
+			/*
+			 * the timer could have already expired, in which
+			 * case current would be flagged for rescheduling.
+			 * Don't bother calling schedule.
+			 */
+			if (likely(t.task))
+				schedule();
 
-		if (timed) {
-			unsigned long now = jiffies;
-			if (time_after(now, abs_time))
-				rel_time = 0;
-			else
-				rel_time = abs_time - now;
-		} else
-			rel_time = MAX_SCHEDULE_TIMEOUT;
+			hrtimer_cancel(&t.timer);
 
-		time_left = schedule_timeout(rel_time);
+			/* Flag if a timeout occured */
+			rem = (t.task == NULL);
+		}
 	}
 	__set_current_state(TASK_RUNNING);
 
@@ -1113,14 +1122,14 @@ static int futex_wait_abstime(u32 __user
 	/* If we were woken (and unqueued), we succeeded, whatever. */
 	if (!unqueue_me(&q))
 		return 0;
-	if (time_left == 0)
+	if (rem)
 		return -ETIMEDOUT;
 
 	/*
 	 * We expect signal_pending(current), but another thread may
 	 * have handled it for us already.
 	 */
-	if (time_left == MAX_SCHEDULE_TIMEOUT)
+	if (!abs_time)
 		return -ERESTARTSYS;
 	else {
 		struct restart_block *restart;
@@ -1128,8 +1137,7 @@ static int futex_wait_abstime(u32 __user
 		restart->fn = futex_wait_restart;
 		restart->arg0 = (unsigned long)uaddr;
 		restart->arg1 = (unsigned long)val;
-		restart->arg2 = (unsigned long)timed;
-		restart->arg3 = abs_time;
+		restart->arg2 = (unsigned long)abs_time;
 		return -ERESTART_RESTARTBLOCK;
 	}
 
@@ -1141,21 +1149,15 @@ static int futex_wait_abstime(u32 __user
 	return ret;
 }
 
-static int futex_wait(u32 __user *uaddr, u32 val, unsigned long rel_time)
-{
-	int timed = (rel_time != MAX_SCHEDULE_TIMEOUT);
-	return futex_wait_abstime(uaddr, val, timed, jiffies+rel_time);
-}
 
 static long futex_wait_restart(struct restart_block *restart)
 {
 	u32 __user *uaddr = (u32 __user *)restart->arg0;
 	u32 val = (u32)restart->arg1;
-	int timed = (int)restart->arg2;
-	unsigned long abs_time = restart->arg3;
+	ktime_t *abs_time = (ktime_t *)restart->arg2;
 
 	restart->fn = do_no_restart_syscall;
-	return (long)futex_wait_abstime(uaddr, val, timed, abs_time);
+	return (long)futex_wait(uaddr, val, abs_time);
 }
 
 
@@ -1165,8 +1167,8 @@ static long futex_wait_restart(struct re
  * if there are waiters then it will block, it does PI, etc. (Due to
  * races the kernel might see a 0 value of the futex too.)
  */
-static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
-			 long nsec, int trylock)
+static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
+			 int trylock)
 {
 	struct hrtimer_sleeper timeout, *to = NULL;
 	struct task_struct *curr = current;
@@ -1178,11 +1180,11 @@ static int futex_lock_pi(u32 __user *uad
 	if (refill_pi_state_cache())
 		return -ENOMEM;
 
-	if (sec != MAX_SCHEDULE_TIMEOUT) {
+	if (time) {
 		to = &timeout;
 		hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
 		hrtimer_init_sleeper(to, current);
-		to->timer.expires = ktime_set(sec, nsec);
+		to->timer.expires = *time;
 	}
 
 	q.pi_state = NULL;
@@ -1818,7 +1820,7 @@ void exit_robust_list(struct task_struct
 	}
 }
 
-long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 		u32 __user *uaddr2, u32 val2, u32 val3)
 {
 	int ret;
@@ -1844,13 +1846,13 @@ long do_futex(u32 __user *uaddr, int op,
 		ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
 		break;
 	case FUTEX_LOCK_PI:
-		ret = futex_lock_pi(uaddr, val, timeout, val2, 0);
+		ret = futex_lock_pi(uaddr, val, timeout, 0);
 		break;
 	case FUTEX_UNLOCK_PI:
 		ret = futex_unlock_pi(uaddr);
 		break;
 	case FUTEX_TRYLOCK_PI:
-		ret = futex_lock_pi(uaddr, 0, timeout, val2, 1);
+		ret = futex_lock_pi(uaddr, 0, timeout, 1);
 		break;
 	default:
 		ret = -ENOSYS;
@@ -1863,21 +1865,18 @@ asmlinkage long sys_futex(u32 __user *ua
 			  struct timespec __user *utime, u32 __user *uaddr2,
 			  u32 val3)
 {
-	struct timespec t;
-	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+	struct timespec ts;
+	ktime_t t, *tp = NULL;
 	u32 val2 = 0;
 
 	if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
-		if (copy_from_user(&t, utime, sizeof(t)) != 0)
+		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
 			return -EFAULT;
-		if (!timespec_valid(&t))
+		if (!timespec_valid(&ts))
 			return -EINVAL;
 		if (op == FUTEX_WAIT)
-			timeout = timespec_to_jiffies(&t) + 1;
-		else {
-			timeout = t.tv_sec;
-			val2 = t.tv_nsec;
-		}
+			t = ktime_add(ktime_get(), timespec_to_ktime(ts));
+		tp = &t;
 	}
 	/*
 	 * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
@@ -1885,7 +1884,7 @@ asmlinkage long sys_futex(u32 __user *ua
 	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
 		val2 = (u32) (unsigned long) utime;
 
-	return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
+	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
 }
 
 static int futexfs_get_sb(struct file_system_type *fs_type,
Index: b/include/linux/futex.h
===================================================================
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_FUTEX_H
 #define _LINUX_FUTEX_H
 
+#include <linux/ktime.h>
 #include <linux/sched.h>
 
 /* Second argument to futex syscall */
@@ -94,7 +95,7 @@ struct robust_list_head {
 #define ROBUST_LIST_LIMIT	2048
 
 #ifdef __KERNEL__
-long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
 
 extern int
Index: b/kernel/futex_compat.c
===================================================================
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -141,24 +141,21 @@ asmlinkage long compat_sys_futex(u32 __u
 		struct compat_timespec __user *utime, u32 __user *uaddr2,
 		u32 val3)
 {
-	struct timespec t;
-	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+	struct timespec ts;
+	ktime_t t, *tp = NULL;
 	int val2 = 0;
 
 	if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
-		if (get_compat_timespec(&t, utime))
+		if (get_compat_timespec(&ts, utime))
 			return -EFAULT;
-		if (!timespec_valid(&t))
+		if (!timespec_valid(&ts))
 			return -EINVAL;
 		if (op == FUTEX_WAIT)
-			timeout = timespec_to_jiffies(&t) + 1;
-		else {
-			timeout = t.tv_sec;
-			val2 = t.tv_nsec;
-		}
+			t = ktime_add(ktime_get(), timespec_to_ktime(ts));
+		tp = &t;
 	}
 	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
 		val2 = (int) (unsigned long) utime;
 
-	return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
+	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
 }

-- 
Pierre Peiffer

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2.6.21-rc4-mm1 3/4] futex_requeue_pi optimization
  2007-03-21  9:54 [PATCH 2.6.21-rc4-mm1 0/4] Futexes functionalities and improvements Pierre.Peiffer
  2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 1/4] futex priority based wakeup Pierre.Peiffer
  2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 2/4] Make futex_wait() use an hrtimer for timeout Pierre.Peiffer
@ 2007-03-21  9:54 ` Pierre.Peiffer
  2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes Pierre.Peiffer
  3 siblings, 0 replies; 12+ messages in thread
From: Pierre.Peiffer @ 2007-03-21  9:54 UTC (permalink / raw)
  To: akpm; +Cc: mingo, drepper, linux-kernel, jean-pierre.dion, Pierre Peiffer

[-- Attachment #1: futex-requeue-pi.diff --]
[-- Type: text/plain, Size: 26758 bytes --]

This patch provides the futex_requeue_pi functionality, which allows some
threads waiting on a normal futex to be requeued on the wait-queue of
a PI-futex.

This provides an optimization, already used for (normal) futexes, to be used with
the PI-futexes.

This optimization is currently used by the glibc in pthread_broadcast, when
using "normal" mutexes. With futex_requeue_pi, it can be used with PRIO_INHERIT
mutexes too.

Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>

---
 include/linux/futex.h   |    9 
 kernel/futex.c          |  541 +++++++++++++++++++++++++++++++++++++++++++-----
 kernel/futex_compat.c   |    3 
 kernel/rtmutex.c        |   41 ---
 kernel/rtmutex_common.h |   34 +++
 5 files changed, 540 insertions(+), 88 deletions(-)

Index: b/include/linux/futex.h
===================================================================
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -16,6 +16,7 @@
 #define FUTEX_LOCK_PI		6
 #define FUTEX_UNLOCK_PI		7
 #define FUTEX_TRYLOCK_PI	8
+#define FUTEX_CMP_REQUEUE_PI	9
 
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
@@ -84,9 +85,14 @@ struct robust_list_head {
 #define FUTEX_OWNER_DIED	0x40000000
 
 /*
+ * Some processes have been requeued on this PI-futex
+ */
+#define FUTEX_WAITER_REQUEUED	0x20000000
+
+/*
  * The rest of the robust-futex field is for the TID:
  */
-#define FUTEX_TID_MASK		0x3fffffff
+#define FUTEX_TID_MASK		0x0fffffff
 
 /*
  * This limit protects against a deliberately circular list.
@@ -110,6 +116,7 @@ handle_futex_death(u32 __user *uaddr, st
  * We set bit 0 to indicate if it's an inode-based key.
  */
 union futex_key {
+	u32 __user *uaddr;
 	struct {
 		unsigned long pgoff;
 		struct inode *inode;
Index: b/kernel/futex.c
===================================================================
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -53,6 +53,12 @@
 
 #include "rtmutex_common.h"
 
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+# include "rtmutex-debug.h"
+#else
+# include "rtmutex.h"
+#endif
+
 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 
 /*
@@ -102,6 +108,12 @@ struct futex_q {
 	/* Optional priority inheritance state: */
 	struct futex_pi_state *pi_state;
 	struct task_struct *task;
+
+	/*
+	 * This waiter is used in case of requeue from a
+	 * normal futex to a PI-futex
+	 */
+	struct rt_mutex_waiter waiter;
 };
 
 /*
@@ -180,6 +192,9 @@ int get_futex_key(u32 __user *uaddr, uni
 	if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
 		return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
 
+	/* Save the user address in the ley */
+	key->uaddr = uaddr;
+
 	/*
 	 * Private mappings are handled in a simple way.
 	 *
@@ -439,7 +454,8 @@ void exit_pi_state_list(struct task_stru
 }
 
 static int
-lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
+lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
+		union futex_key *key, struct futex_pi_state **ps)
 {
 	struct futex_pi_state *pi_state = NULL;
 	struct futex_q *this, *next;
@@ -450,7 +466,7 @@ lookup_pi_state(u32 uval, struct futex_h
 	head = &hb->chain;
 
 	plist_for_each_entry_safe(this, next, head, list) {
-		if (match_futex(&this->key, &me->key)) {
+		if (match_futex(&this->key, key)) {
 			/*
 			 * Another waiter already exists - bump up
 			 * the refcount and return its pi_state:
@@ -465,7 +481,7 @@ lookup_pi_state(u32 uval, struct futex_h
 			WARN_ON(!atomic_read(&pi_state->refcount));
 
 			atomic_inc(&pi_state->refcount);
-			me->pi_state = pi_state;
+			*ps = pi_state;
 
 			return 0;
 		}
@@ -492,7 +508,7 @@ lookup_pi_state(u32 uval, struct futex_h
 	rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
 
 	/* Store the key for possible exit cleanups: */
-	pi_state->key = me->key;
+	pi_state->key = *key;
 
 	spin_lock_irq(&p->pi_lock);
 	WARN_ON(!list_empty(&pi_state->list));
@@ -502,7 +518,7 @@ lookup_pi_state(u32 uval, struct futex_h
 
 	put_task_struct(p);
 
-	me->pi_state = pi_state;
+	*ps = pi_state;
 
 	return 0;
 }
@@ -562,6 +578,8 @@ static int wake_futex_pi(u32 __user *uad
 	 */
 	if (!(uval & FUTEX_OWNER_DIED)) {
 		newval = FUTEX_WAITERS | new_owner->pid;
+		/* Keep the FUTEX_WAITER_REQUEUED flag if it was set */
+		newval |= (uval & FUTEX_WAITER_REQUEUED);
 
 		pagefault_disable();
 		curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
@@ -666,6 +684,254 @@ out:
 }
 
 /*
+ * Called from futex_requeue_pi.
+ * Set FUTEX_WAITERS and FUTEX_WAITER_REQUEUED flags on the
+ * PI-futex value; search its associated pi_state if an owner exist
+ * or create a new one without owner.
+ */
+static inline int
+lookup_pi_state_for_requeue(u32 __user *uaddr, struct futex_hash_bucket *hb,
+			    union futex_key *key,
+			    struct futex_pi_state **pi_state)
+{
+	u32 curval, uval, newval;
+
+retry:
+	/*
+	 * We can't handle a fault cleanly because we can't
+	 * release the locks here. Simply return the fault.
+	 */
+	if (get_futex_value_locked(&curval, uaddr))
+		return -EFAULT;
+
+	/* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
+	if ((curval & (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED))
+	    != (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED)) {
+		/*
+		 * No waiters yet, we prepare the futex to have some waiters.
+		 */
+
+		uval = curval;
+		newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
+
+		pagefault_disable();
+		curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+		pagefault_enable();
+
+		if (unlikely(curval == -EFAULT))
+			return -EFAULT;
+		if (unlikely(curval != uval))
+			goto retry;
+	}
+
+	if (!(curval & FUTEX_TID_MASK)
+	    || lookup_pi_state(curval, hb, key, pi_state)) {
+		/* the futex has no owner (yet) or the lookup failed:
+		   allocate one pi_state without owner */
+
+		*pi_state = alloc_pi_state();
+
+		/* Already stores the key: */
+		(*pi_state)->key = *key;
+
+		/* init the mutex without owner */
+		__rt_mutex_init(&(*pi_state)->pi_mutex, NULL);
+	}
+
+	return 0;
+}
+
+/*
+ * Keep the first nr_wake waiter from futex1, wake up one,
+ * and requeue the next nr_requeue waiters following hashed on
+ * one physical page to another physical page (PI-futex uaddr2)
+ */
+static int futex_requeue_pi(u32 __user *uaddr1, u32 __user *uaddr2,
+			    int nr_wake, int nr_requeue, u32 *cmpval)
+{
+	union futex_key key1, key2;
+	struct futex_hash_bucket *hb1, *hb2;
+	struct plist_head *head1;
+	struct futex_q *this, *next;
+	struct futex_pi_state *pi_state2 = NULL;
+	struct rt_mutex_waiter *waiter, *top_waiter = NULL;
+	struct rt_mutex *lock2 = NULL;
+	int ret, drop_count = 0;
+
+	if (refill_pi_state_cache())
+		return -ENOMEM;
+
+retry:
+	/*
+	 * First take all the futex related locks:
+	 */
+	down_read(&current->mm->mmap_sem);
+
+	ret = get_futex_key(uaddr1, &key1);
+	if (unlikely(ret != 0))
+		goto out;
+	ret = get_futex_key(uaddr2, &key2);
+	if (unlikely(ret != 0))
+		goto out;
+
+	hb1 = hash_futex(&key1);
+	hb2 = hash_futex(&key2);
+
+	double_lock_hb(hb1, hb2);
+
+	if (likely(cmpval != NULL)) {
+		u32 curval;
+
+		ret = get_futex_value_locked(&curval, uaddr1);
+
+		if (unlikely(ret)) {
+			spin_unlock(&hb1->lock);
+			if (hb1 != hb2)
+				spin_unlock(&hb2->lock);
+
+			/*
+			 * If we would have faulted, release mmap_sem, fault
+			 * it in and start all over again.
+			 */
+			up_read(&current->mm->mmap_sem);
+
+			ret = get_user(curval, uaddr1);
+
+			if (!ret)
+				goto retry;
+
+			return ret;
+		}
+		if (curval != *cmpval) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
+	}
+
+	head1 = &hb1->chain;
+	plist_for_each_entry_safe(this, next, head1, list) {
+		if (!match_futex (&this->key, &key1))
+			continue;
+		if (++ret <= nr_wake) {
+			wake_futex(this);
+		} else {
+			/*
+			 * FIRST: get and set the pi_state
+			 */
+			if (!pi_state2) {
+				int s;
+				/* do this only the first time we requeue someone */
+				s = lookup_pi_state_for_requeue(uaddr2, hb2,
+								&key2, &pi_state2);
+				if (s) {
+					ret = s;
+					goto out_unlock;
+				}
+
+				lock2 = &pi_state2->pi_mutex;
+				spin_lock(&lock2->wait_lock);
+
+				/* Save the top waiter of the wait_list */
+				if (rt_mutex_has_waiters(lock2))
+					top_waiter = rt_mutex_top_waiter(lock2);
+			} else
+				atomic_inc(&pi_state2->refcount);
+
+
+			this->pi_state = pi_state2;
+
+			/*
+			 * SECOND: requeue futex_q to the correct hashbucket
+			 */
+
+			/*
+			 * If key1 and key2 hash to the same bucket, no need to
+			 * requeue.
+			 */
+			if (likely(head1 != &hb2->chain)) {
+				plist_del(&this->list, &hb1->chain);
+				plist_add(&this->list, &hb2->chain);
+				this->lock_ptr = &hb2->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+				this->list.plist.lock = &hb2->lock;
+#endif
+			}
+			this->key = key2;
+			get_futex_key_refs(&key2);
+			drop_count++;
+
+
+			/*
+			 * THIRD: queue it to lock2
+			 */
+			spin_lock_irq(&this->task->pi_lock);
+			waiter = &this->waiter;
+			waiter->task = this->task;
+			waiter->lock = lock2;
+			plist_node_init(&waiter->list_entry, this->task->prio);
+			plist_node_init(&waiter->pi_list_entry, this->task->prio);
+			plist_add(&waiter->list_entry, &lock2->wait_list);
+			this->task->pi_blocked_on = waiter;
+			spin_unlock_irq(&this->task->pi_lock);
+
+			if (ret - nr_wake >= nr_requeue)
+				break;
+		}
+	}
+
+	/* If we've requeued some tasks and the top_waiter of the rt_mutex
+	   has changed, we must adjust the priority of the owner, if any */
+	if (drop_count) {
+		struct task_struct *owner = rt_mutex_owner(lock2);
+		if (owner &&
+		    (top_waiter != (waiter = rt_mutex_top_waiter(lock2)))) {
+			int chain_walk = 0;
+
+			spin_lock_irq(&owner->pi_lock);
+			if (top_waiter)
+				plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
+			else
+				/*
+				 * There was no waiters before the requeue,
+				 * the flag must be updated
+				 */
+				mark_rt_mutex_waiters(lock2);
+
+			plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
+			__rt_mutex_adjust_prio(owner);
+			if (owner->pi_blocked_on) {
+				chain_walk = 1;
+				get_task_struct(owner);
+			}
+
+			spin_unlock_irq(&owner->pi_lock);
+			spin_unlock(&lock2->wait_lock);
+
+			if (chain_walk)
+				rt_mutex_adjust_prio_chain(owner, 0, lock2, NULL,
+							   current);
+		} else {
+			/* No owner or the top_waiter does not change */
+			mark_rt_mutex_waiters(lock2);
+			spin_unlock(&lock2->wait_lock);
+		}
+	}
+
+out_unlock:
+	spin_unlock(&hb1->lock);
+	if (hb1 != hb2)
+		spin_unlock(&hb2->lock);
+
+	/* drop_futex_key_refs() must be called outside the spinlocks. */
+	while (--drop_count >= 0)
+		drop_futex_key_refs(&key1);
+
+out:
+	up_read(&current->mm->mmap_sem);
+	return ret;
+}
+
+/*
  * Wake up all waiters hashed on the physical page that is mapped
  * to this virtual address:
  */
@@ -984,9 +1250,10 @@ static int unqueue_me(struct futex_q *q)
 
 /*
  * PI futexes can not be requeued and must remove themself from the
- * hash bucket. The hash bucket lock is held on entry and dropped here.
+ * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
+ * and dropped here.
  */
-static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb)
+static void unqueue_me_pi(struct futex_q *q)
 {
 	WARN_ON(plist_node_empty(&q->list));
 	plist_del(&q->list, &q->list.plist);
@@ -995,11 +1262,65 @@ static void unqueue_me_pi(struct futex_q
 	free_pi_state(q->pi_state);
 	q->pi_state = NULL;
 
-	spin_unlock(&hb->lock);
+	spin_unlock(q->lock_ptr);
 
 	drop_futex_key_refs(&q->key);
 }
 
+/*
+ * Fixup the pi_state owner with current.
+ *
+ * The cur->mm semaphore must be  held, it is released at return of this
+ * function.
+ */
+static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+				struct futex_hash_bucket *hb,
+				struct task_struct *curr)
+{
+	u32 newtid = curr->pid | FUTEX_WAITERS;
+	struct futex_pi_state *pi_state = q->pi_state;
+	u32 uval, curval, newval;
+	int ret;
+
+	/* Owner died? */
+	if (pi_state->owner != NULL) {
+		spin_lock_irq(&pi_state->owner->pi_lock);
+		WARN_ON(list_empty(&pi_state->list));
+		list_del_init(&pi_state->list);
+		spin_unlock_irq(&pi_state->owner->pi_lock);
+	} else
+		newtid |= FUTEX_OWNER_DIED;
+
+	pi_state->owner = curr;
+
+	spin_lock_irq(&curr->pi_lock);
+	WARN_ON(!list_empty(&pi_state->list));
+	list_add(&pi_state->list, &curr->pi_state_list);
+	spin_unlock_irq(&curr->pi_lock);
+
+	/* Unqueue and drop the lock */
+	unqueue_me_pi(q);
+	up_read(&curr->mm->mmap_sem);
+	/*
+	 * We own it, so we have to replace the pending owner
+	 * TID. This must be atomic as we have preserve the
+	 * owner died bit here.
+	 */
+	ret = get_user(uval, uaddr);
+	while (!ret) {
+		newval = (uval & FUTEX_OWNER_DIED) | newtid;
+		newval |= (uval & FUTEX_WAITER_REQUEUED);
+		curval = futex_atomic_cmpxchg_inatomic(uaddr,
+						       uval, newval);
+		if (curval == -EFAULT)
+ 			ret = -EFAULT;
+		if (curval == uval)
+			break;
+		uval = curval;
+	}
+	return ret;
+}
+
 static long futex_wait_restart(struct restart_block *restart);
 static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
 {
@@ -1009,7 +1330,7 @@ static int futex_wait(u32 __user *uaddr,
 	struct futex_q q;
 	u32 uval;
 	int ret;
-	struct hrtimer_sleeper t;
+	struct hrtimer_sleeper t, *to = NULL;
 	int rem = 0;
 
 	q.pi_state = NULL;
@@ -1063,6 +1384,14 @@ static int futex_wait(u32 __user *uaddr,
 	if (uval != val)
 		goto out_unlock_release_sem;
 
+	/*
+	 * This rt_mutex_waiter structure is prepared here and will
+	 * be used only if this task is requeued from a normal futex to
+	 * a PI-futex with futex_requeue_pi.
+	 */
+	debug_rt_mutex_init_waiter(&q.waiter);
+	q.waiter.task = NULL;
+
 	/* Only actually queue if *uaddr contained val.  */
 	__queue_me(&q, hb);
 
@@ -1092,6 +1421,7 @@ static int futex_wait(u32 __user *uaddr,
 		if (!abs_time)
 			schedule();
 		else {
+			to = &t;
 			hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 			hrtimer_init_sleeper(&t, current);
 			t.timer.expires = *abs_time;
@@ -1119,6 +1449,66 @@ static int futex_wait(u32 __user *uaddr,
 	 * we are the only user of it.
 	 */
 
+	if (q.pi_state) {
+		/*
+		 * We were woken but have been requeued on a PI-futex.
+		 * We have to complete the lock acquisition by taking
+		 * the rtmutex.
+		 */
+
+		struct rt_mutex *lock = &q.pi_state->pi_mutex;
+
+		spin_lock(&lock->wait_lock);
+		if (unlikely(q.waiter.task)) {
+			remove_waiter(lock, &q.waiter);
+		}
+		spin_unlock(&lock->wait_lock);
+
+		if (rem)
+			ret = -ETIMEDOUT;
+		else
+			ret = rt_mutex_timed_lock(lock, to, 1);
+
+		down_read(&curr->mm->mmap_sem);
+		spin_lock(q.lock_ptr);
+
+		/*
+		 * Got the lock. We might not be the anticipated owner if we
+		 * did a lock-steal - fix up the PI-state in that case.
+		 */
+		if (!ret && q.pi_state->owner != curr) {
+			/*
+			 * We MUST play with the futex we were requeued on,
+			 * NOT the current futex.
+			 * We can retrieve it from the key of the pi_state
+			 */
+			uaddr = q.pi_state->key.uaddr;
+
+			/* mmap_sem and hash_bucket lock are unlocked at
+			   return of this function */
+			ret = fixup_pi_state_owner(uaddr, &q, hb, curr);
+		} else {
+			/*
+			 * Catch the rare case, where the lock was released
+			 * when we were on the way back before we locked
+			 * the hash bucket.
+			 */
+			if (ret && q.pi_state->owner == curr) {
+				if (rt_mutex_trylock(&q.pi_state->pi_mutex))
+					ret = 0;
+			}
+			/* Unqueue and drop the lock */
+			unqueue_me_pi(&q);
+			up_read(&curr->mm->mmap_sem);
+		}
+
+		debug_rt_mutex_free_waiter(&q.waiter);
+
+		return ret;
+	}
+
+	debug_rt_mutex_free_waiter(&q.waiter);
+
 	/* If we were woken (and unqueued), we succeeded, whatever. */
 	if (!unqueue_me(&q))
 		return 0;
@@ -1161,6 +1551,51 @@ static long futex_wait_restart(struct re
 }
 
 
+static void set_pi_futex_owner(struct futex_hash_bucket *hb,
+			       union futex_key *key, struct task_struct *p)
+{
+	struct plist_head *head;
+	struct futex_q *this, *next;
+	struct futex_pi_state *pi_state = NULL;
+	struct rt_mutex *lock;
+
+	/* Search a waiter that should already exists */
+
+	head = &hb->chain;
+
+	plist_for_each_entry_safe(this, next, head, list) {
+		if (match_futex (&this->key, key)) {
+			pi_state = this->pi_state;
+			break;
+		}
+	}
+
+	BUG_ON(!pi_state);
+
+	/* set p as pi_state's owner */
+	lock = &pi_state->pi_mutex;
+
+	spin_lock(&lock->wait_lock);
+	spin_lock_irq(&p->pi_lock);
+
+	list_add(&pi_state->list, &p->pi_state_list);
+	pi_state->owner = p;
+
+
+	/* set p as pi_mutex's owner */
+	debug_rt_mutex_proxy_lock(lock, p);
+	WARN_ON(rt_mutex_owner(lock));
+	rt_mutex_set_owner(lock, p, 0);
+	rt_mutex_deadlock_account_lock(lock, p);
+
+	plist_add(&rt_mutex_top_waiter(lock)->pi_list_entry,
+		  &p->pi_waiters);
+	__rt_mutex_adjust_prio(p);
+
+	spin_unlock_irq(&p->pi_lock);
+	spin_unlock(&lock->wait_lock);
+}
+
 /*
  * Userspace tried a 0 -> TID atomic transition of the futex value
  * and failed. The kernel side here does the whole locking operation:
@@ -1175,7 +1610,7 @@ static int futex_lock_pi(u32 __user *uad
 	struct futex_hash_bucket *hb;
 	u32 uval, newval, curval;
 	struct futex_q q;
-	int ret, attempt = 0;
+	int ret, lock_held, attempt = 0;
 
 	if (refill_pi_state_cache())
 		return -ENOMEM;
@@ -1198,6 +1633,8 @@ static int futex_lock_pi(u32 __user *uad
 	hb = queue_lock(&q, -1, NULL);
 
  retry_locked:
+	lock_held = 0;
+
 	/*
 	 * To avoid races, we attempt to take the lock here again
 	 * (by doing a 0 -> TID atomic cmpxchg), while holding all
@@ -1216,7 +1653,16 @@ static int futex_lock_pi(u32 __user *uad
 	if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
 		if (!detect && 0)
 			force_sig(SIGKILL, current);
-		ret = -EDEADLK;
+		/*
+		 * Normally, this check is done in user space.
+		 * In case of requeue, the owner may attempt to lock this futex,
+		 * even if the ownership has already been given by the previous
+		 * waker.
+		 * In the usual case, this is a case of deadlock, but not in case
+		 * of REQUEUE_PI.
+		 */
+		if (!(curval & FUTEX_WAITER_REQUEUED))
+			ret = -EDEADLK;
 		goto out_unlock_release_sem;
 	}
 
@@ -1228,7 +1674,18 @@ static int futex_lock_pi(u32 __user *uad
 		goto out_unlock_release_sem;
 
 	uval = curval;
-	newval = uval | FUTEX_WAITERS;
+	/*
+	 * In case of a requeue, check if there already is an owner
+	 * If not, just take the futex.
+	 */
+	if ((curval & FUTEX_WAITER_REQUEUED) && !(curval & FUTEX_TID_MASK)) {
+		/* set current as futex owner */
+		newval = curval | current->pid;
+		lock_held = 1;
+	} else
+		/* Set the WAITERS flag, so the owner will know it has someone
+		   to wake at next unlock */
+		newval = curval | FUTEX_WAITERS;
 
 	pagefault_disable();
 	curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
@@ -1239,11 +1696,16 @@ static int futex_lock_pi(u32 __user *uad
 	if (unlikely(curval != uval))
 		goto retry_locked;
 
+	if (lock_held) {
+		set_pi_futex_owner(hb, &q.key, curr);
+		goto out_unlock_release_sem;
+	}
+
 	/*
 	 * We dont have the lock. Look up the PI state (or create it if
 	 * we are the first waiter):
 	 */
-	ret = lookup_pi_state(uval, hb, &q);
+	ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
 
 	if (unlikely(ret)) {
 		/*
@@ -1306,45 +1768,10 @@ static int futex_lock_pi(u32 __user *uad
 	 * Got the lock. We might not be the anticipated owner if we
 	 * did a lock-steal - fix up the PI-state in that case.
 	 */
-	if (!ret && q.pi_state->owner != curr) {
-		u32 newtid = current->pid | FUTEX_WAITERS;
-
-		/* Owner died? */
-		if (q.pi_state->owner != NULL) {
-			spin_lock_irq(&q.pi_state->owner->pi_lock);
-			WARN_ON(list_empty(&q.pi_state->list));
-			list_del_init(&q.pi_state->list);
-			spin_unlock_irq(&q.pi_state->owner->pi_lock);
-		} else
-			newtid |= FUTEX_OWNER_DIED;
-
-		q.pi_state->owner = current;
-
-		spin_lock_irq(&current->pi_lock);
-		WARN_ON(!list_empty(&q.pi_state->list));
-		list_add(&q.pi_state->list, &current->pi_state_list);
-		spin_unlock_irq(&current->pi_lock);
-
-		/* Unqueue and drop the lock */
-		unqueue_me_pi(&q, hb);
-		up_read(&curr->mm->mmap_sem);
-		/*
-		 * We own it, so we have to replace the pending owner
-		 * TID. This must be atomic as we have preserve the
-		 * owner died bit here.
-		 */
-		ret = get_user(uval, uaddr);
-		while (!ret) {
-			newval = (uval & FUTEX_OWNER_DIED) | newtid;
-			curval = futex_atomic_cmpxchg_inatomic(uaddr,
-							       uval, newval);
-			if (curval == -EFAULT)
-				ret = -EFAULT;
-			if (curval == uval)
-				break;
-			uval = curval;
-		}
-	} else {
+	if (!ret && q.pi_state->owner != curr)
+		/* mmap_sem is unlocked at return of this function */
+		ret = fixup_pi_state_owner(uaddr, &q, hb, curr);
+	else {
 		/*
 		 * Catch the rare case, where the lock was released
 		 * when we were on the way back before we locked
@@ -1355,7 +1782,7 @@ static int futex_lock_pi(u32 __user *uad
 				ret = 0;
 		}
 		/* Unqueue and drop the lock */
-		unqueue_me_pi(&q, hb);
+		unqueue_me_pi(&q);
 		up_read(&curr->mm->mmap_sem);
 	}
 
@@ -1724,6 +2151,8 @@ retry:
 		 * userspace.
 		 */
 		mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
+		/* Also keep the FUTEX_WAITER_REQUEUED flag if set */
+		mval |= (uval & FUTEX_WAITER_REQUEUED);
 		nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
 
 		if (nval == -EFAULT)
@@ -1854,6 +2283,9 @@ long do_futex(u32 __user *uaddr, int op,
 	case FUTEX_TRYLOCK_PI:
 		ret = futex_lock_pi(uaddr, 0, timeout, 1);
 		break;
+	case FUTEX_CMP_REQUEUE_PI:
+		ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3);
+		break;
 	default:
 		ret = -ENOSYS;
 	}
@@ -1881,7 +2313,8 @@ asmlinkage long sys_futex(u32 __user *ua
 	/*
 	 * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
 	 */
-	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
+	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
+	    || op == FUTEX_CMP_REQUEUE_PI)
 		val2 = (u32) (unsigned long) utime;
 
 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
Index: b/kernel/rtmutex.c
===================================================================
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -56,7 +56,7 @@
  * state.
  */
 
-static void
+void
 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner,
 		   unsigned long mask)
 {
@@ -81,29 +81,6 @@ static void fixup_rt_mutex_waiters(struc
 }
 
 /*
- * We can speed up the acquire/release, if the architecture
- * supports cmpxchg and if there's no debugging state to be set up
- */
-#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
-# define rt_mutex_cmpxchg(l,c,n)	(cmpxchg(&l->owner, c, n) == c)
-static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
-{
-	unsigned long owner, *p = (unsigned long *) &lock->owner;
-
-	do {
-		owner = *p;
-	} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
-}
-#else
-# define rt_mutex_cmpxchg(l,c,n)	(0)
-static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
-{
-	lock->owner = (struct task_struct *)
-			((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
-}
-#endif
-
-/*
  * Calculate task priority from the waiter list priority
  *
  * Return task->normal_prio when the waiter list is empty or when
@@ -123,7 +100,7 @@ int rt_mutex_getprio(struct task_struct 
  *
  * This can be both boosting and unboosting. task->pi_lock must be held.
  */
-static void __rt_mutex_adjust_prio(struct task_struct *task)
+void __rt_mutex_adjust_prio(struct task_struct *task)
 {
 	int prio = rt_mutex_getprio(task);
 
@@ -159,11 +136,11 @@ int max_lock_depth = 1024;
  * Decreases task's usage by one - may thus free the task.
  * Returns 0 or -EDEADLK.
  */
-static int rt_mutex_adjust_prio_chain(struct task_struct *task,
-				      int deadlock_detect,
-				      struct rt_mutex *orig_lock,
-				      struct rt_mutex_waiter *orig_waiter,
-				      struct task_struct *top_task)
+int rt_mutex_adjust_prio_chain(struct task_struct *task,
+			       int deadlock_detect,
+			       struct rt_mutex *orig_lock,
+			       struct rt_mutex_waiter *orig_waiter,
+			       struct task_struct *top_task)
 {
 	struct rt_mutex *lock;
 	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
@@ -524,8 +501,8 @@ static void wakeup_next_waiter(struct rt
  *
  * Must be called with lock->wait_lock held
  */
-static void remove_waiter(struct rt_mutex *lock,
-			  struct rt_mutex_waiter *waiter)
+void remove_waiter(struct rt_mutex *lock,
+		   struct rt_mutex_waiter *waiter)
 {
 	int first = (waiter == rt_mutex_top_waiter(lock));
 	struct task_struct *owner = rt_mutex_owner(lock);
Index: b/kernel/rtmutex_common.h
===================================================================
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -113,6 +113,29 @@ static inline unsigned long rt_mutex_own
 }
 
 /*
+ * We can speed up the acquire/release, if the architecture
+ * supports cmpxchg and if there's no debugging state to be set up
+ */
+#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
+# define rt_mutex_cmpxchg(l,c,n)	(cmpxchg(&l->owner, c, n) == c)
+static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
+{
+	unsigned long owner, *p = (unsigned long *) &lock->owner;
+
+	do {
+		owner = *p;
+	} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
+}
+#else
+# define rt_mutex_cmpxchg(l,c,n)	(0)
+static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
+{
+	lock->owner = (struct task_struct *)
+			((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
+}
+#endif
+
+/*
  * PI-futex support (proxy locking functions, etc.):
  */
 extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
@@ -120,4 +143,15 @@ extern void rt_mutex_init_proxy_locked(s
 				       struct task_struct *proxy_owner);
 extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
 				  struct task_struct *proxy_owner);
+
+extern void rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner,
+			       unsigned long mask);
+extern void __rt_mutex_adjust_prio(struct task_struct *task);
+extern int rt_mutex_adjust_prio_chain(struct task_struct *task,
+				      int deadlock_detect,
+				      struct rt_mutex *orig_lock,
+				      struct rt_mutex_waiter *orig_waiter,
+				      struct task_struct *top_task);
+extern void remove_waiter(struct rt_mutex *lock,
+			  struct rt_mutex_waiter *waiter);
 #endif
Index: b/kernel/futex_compat.c
===================================================================
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -154,7 +154,8 @@ asmlinkage long compat_sys_futex(u32 __u
 			t = ktime_add(ktime_get(), timespec_to_ktime(ts));
 		tp = &t;
 	}
-	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
+	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
+	    || op == FUTEX_CMP_REQUEUE_PI)
 		val2 = (int) (unsigned long) utime;
 
 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);

-- 
Pierre Peiffer

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes
  2007-03-21  9:54 [PATCH 2.6.21-rc4-mm1 0/4] Futexes functionalities and improvements Pierre.Peiffer
                   ` (2 preceding siblings ...)
  2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 3/4] futex_requeue_pi optimization Pierre.Peiffer
@ 2007-03-21  9:54 ` Pierre.Peiffer
  2007-03-26 11:20   ` Andrew Morton
  2007-03-27 11:07   ` Jakub Jelinek
  3 siblings, 2 replies; 12+ messages in thread
From: Pierre.Peiffer @ 2007-03-21  9:54 UTC (permalink / raw)
  To: akpm; +Cc: mingo, drepper, linux-kernel, jean-pierre.dion, Pierre Peiffer

[-- Attachment #1: futex-64bit.patch --]
[-- Type: text/plain, Size: 27462 bytes --]

This last patch is an adaptation of the sys_futex64 syscall provided in -rt
patch (originally written by Ingo Molnar). It allows the use of 64-bit futex.

I have re-worked most of the code to avoid the duplication of the code.

It does not provide the functionality for all architectures (only for x64 for now).

Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>

---
 include/asm-x86_64/futex.h  |  113 ++++++++++++++++++
 include/asm-x86_64/unistd.h |    4 
 include/linux/futex.h       |    9 -
 include/linux/syscalls.h    |    3 
 kernel/futex.c              |  264 +++++++++++++++++++++++++++++++-------------
 kernel/futex_compat.c       |    3 
 kernel/sys_ni.c             |    1 
 7 files changed, 313 insertions(+), 84 deletions(-)

Index: b/include/asm-x86_64/futex.h
===================================================================
--- a/include/asm-x86_64/futex.h
+++ b/include/asm-x86_64/futex.h
@@ -41,6 +41,39 @@
 	  "=&r" (tem)						\
 	: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
 
+#define __futex_atomic_op1_64(insn, ret, oldval, uaddr, oparg) \
+  __asm__ __volatile (						\
+"1:	" insn "\n"						\
+"2:	.section .fixup,\"ax\"\n\
+3:	movq	%3, %1\n\
+	jmp	2b\n\
+	.previous\n\
+	.section __ex_table,\"a\"\n\
+	.align	8\n\
+	.quad	1b,3b\n\
+	.previous"						\
+	: "=r" (oldval), "=r" (ret), "=m" (*uaddr)		\
+	: "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0))
+
+#define __futex_atomic_op2_64(insn, ret, oldval, uaddr, oparg) \
+  __asm__ __volatile (						\
+"1:	movq	%2, %0\n\
+	movq	%0, %3\n"					\
+	insn "\n"						\
+"2:	" LOCK_PREFIX "cmpxchgq %3, %2\n\
+	jnz	1b\n\
+3:	.section .fixup,\"ax\"\n\
+4:	movq	%5, %1\n\
+	jmp	3b\n\
+	.previous\n\
+	.section __ex_table,\"a\"\n\
+	.align	8\n\
+	.quad	1b,4b,2b,4b\n\
+	.previous"						\
+	: "=&a" (oldval), "=&r" (ret), "=m" (*uaddr),		\
+	  "=&r" (tem)						\
+	: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
+
 static inline int
 futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 {
@@ -95,6 +128,60 @@ futex_atomic_op_inuser (int encoded_op, 
 }
 
 static inline int
+futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	u64 oparg = (encoded_op << 8) >> 20;
+	u64 cmparg = (encoded_op << 20) >> 20;
+	u64 oldval = 0, ret, tem;
+
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u64)))
+		return -EFAULT;
+
+	inc_preempt_count();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op1_64("xchgq %0, %2", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op1_64(LOCK_PREFIX "xaddq %0, %2", ret, oldval,
+				   uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op2_64("orq %4, %3", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op2_64("andq %4, %3", ret, oldval, uaddr, ~oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op2_64("xorq %4, %3", ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	dec_preempt_count();
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+		default: ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int
 futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
@@ -121,5 +208,31 @@ futex_atomic_cmpxchg_inatomic(int __user
 	return oldval;
 }
 
+static inline u64
+futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
+{
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u64)))
+		return -EFAULT;
+
+	__asm__ __volatile__(
+		"1:	" LOCK_PREFIX "cmpxchgq %3, %1		\n"
+
+		"2:	.section .fixup, \"ax\"			\n"
+		"3:	mov     %2, %0				\n"
+		"	jmp     2b				\n"
+		"	.previous				\n"
+
+		"	.section __ex_table, \"a\"		\n"
+		"	.align  8				\n"
+		"	.quad   1b,3b				\n"
+		"	.previous				\n"
+
+		: "=a" (oldval), "=m" (*uaddr)
+		: "i" (-EFAULT), "r" (newval), "0" (oldval)
+		: "memory"
+	);
+
+	return oldval;
+}
 #endif
 #endif
Index: b/include/asm-x86_64/unistd.h
===================================================================
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -619,8 +619,10 @@ __SYSCALL(__NR_sync_file_range, sys_sync
 __SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_move_pages		279
 __SYSCALL(__NR_move_pages, sys_move_pages)
+#define __NR_futex64		280
+__SYSCALL(__NR_futex64, sys_futex64)
 
-#define __NR_syscall_max __NR_move_pages
+#define __NR_syscall_max __NR_futex64
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
Index: b/include/linux/syscalls.h
===================================================================
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -178,6 +178,9 @@ asmlinkage long sys_set_tid_address(int 
 asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
 			struct timespec __user *utime, u32 __user *uaddr2,
 			u32 val3);
+asmlinkage long sys_futex64(u64 __user *uaddr, int op, u64 val,
+			struct timespec __user *utime, u64 __user *uaddr2,
+			u64 val3);
 
 asmlinkage long sys_init_module(void __user *umod, unsigned long len,
 				const char __user *uargs);
Index: b/kernel/futex.c
===================================================================
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -61,6 +61,44 @@
 
 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 
+#ifdef CONFIG_64BIT
+static inline unsigned long
+futex_cmpxchg_inatomic(unsigned long __user *uaddr, unsigned long oldval,
+		       unsigned long newval, int futex64)
+{
+	if (futex64)
+		return futex_atomic_cmpxchg_inatomic64((u64 __user *)uaddr,
+						       oldval, newval);
+	else {
+		u32 ov = oldval, nv = newval;
+		return futex_atomic_cmpxchg_inatomic((int __user *)uaddr, ov,
+						     nv);
+	}
+}
+
+static inline int
+futex_get_user(unsigned long *val, unsigned long __user *uaddr, int futex64)
+{
+	int ret;
+
+	if (futex64)
+		ret = get_user(*val, uaddr);
+	else {
+		u32 __user *addr = (u32 __user *)uaddr;
+
+		ret = get_user(*val, addr);
+	}
+	return ret;
+}
+
+#else
+#define futex_cmpxchg_inatomic(uaddr, oldval, newval, futex64)	\
+	futex_atomic_cmpxchg_inatomic((u32*)uaddr, oldval, newval)
+
+#define futex_get_user(val, uaddr, futex64) get_user(*val, uaddr)
+
+#endif
+
 /*
  * Priority Inheritance state:
  */
@@ -140,6 +178,7 @@ static struct futex_hash_bucket *hash_fu
 	return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
 }
 
+
 /*
  * Return 1 if two futex_keys are equal, 0 otherwise.
  */
@@ -162,7 +201,7 @@ static inline int match_futex(union fute
  *
  * Should be called with &current->mm->mmap_sem but NOT any spinlocks.
  */
-int get_futex_key(u32 __user *uaddr, union futex_key *key)
+int get_futex_key(void __user *uaddr, union futex_key *key)
 {
 	unsigned long address = (unsigned long)uaddr;
 	struct mm_struct *mm = current->mm;
@@ -271,13 +310,30 @@ void drop_futex_key_refs(union futex_key
 }
 EXPORT_SYMBOL_GPL(drop_futex_key_refs);
 
-static inline int get_futex_value_locked(u32 *dest, u32 __user *from)
+static inline int
+get_futex_value_locked(unsigned long *dest, unsigned long __user *from,
+		       int futex64)
 {
 	int ret;
 
+#ifdef CONFIG_64BIT
+	if (futex64) {
+		pagefault_disable();
+		ret = __copy_from_user_inatomic(dest, from, sizeof(u64));
+		pagefault_enable();
+	} else {
+		u32 d;
+		pagefault_disable();
+		ret = __copy_from_user_inatomic(&d, from, sizeof(u32));
+		pagefault_enable();
+
+		*dest = d;
+	}
+#else
 	pagefault_disable();
 	ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
 	pagefault_enable();
+#endif
 
 	return ret ? -EFAULT : 0;
 }
@@ -550,11 +606,12 @@ static void wake_futex(struct futex_q *q
 	q->lock_ptr = NULL;
 }
 
-static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
+static int wake_futex_pi(unsigned long __user *uaddr, unsigned long uval,
+			 struct futex_q *this, int futex64)
 {
 	struct task_struct *new_owner;
 	struct futex_pi_state *pi_state = this->pi_state;
-	u32 curval, newval;
+	unsigned long curval, newval;
 
 	if (!pi_state)
 		return -EINVAL;
@@ -582,7 +639,7 @@ static int wake_futex_pi(u32 __user *uad
 		newval |= (uval & FUTEX_WAITER_REQUEUED);
 
 		pagefault_disable();
-		curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+		curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
 		pagefault_enable();
 		if (curval == -EFAULT)
 			return -EFAULT;
@@ -607,16 +664,17 @@ static int wake_futex_pi(u32 __user *uad
 	return 0;
 }
 
-static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
+static int unlock_futex_pi(unsigned long __user *uaddr, unsigned long uval,
+			   int futex64)
 {
-	u32 oldval;
+	unsigned long oldval;
 
 	/*
 	 * There is no waiter, so we unlock the futex. The owner died
 	 * bit has not to be preserved here. We are the owner:
 	 */
 	pagefault_disable();
-	oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
+	oldval = futex_cmpxchg_inatomic(uaddr, uval, 0, futex64);
 	pagefault_enable();
 
 	if (oldval == -EFAULT)
@@ -647,7 +705,7 @@ double_lock_hb(struct futex_hash_bucket 
  * Wake up all waiters hashed on the physical page that is mapped
  * to this virtual address:
  */
-static int futex_wake(u32 __user *uaddr, int nr_wake)
+static int futex_wake(unsigned long __user *uaddr, int nr_wake)
 {
 	struct futex_hash_bucket *hb;
 	struct futex_q *this, *next;
@@ -690,18 +748,19 @@ out:
  * or create a new one without owner.
  */
 static inline int
-lookup_pi_state_for_requeue(u32 __user *uaddr, struct futex_hash_bucket *hb,
+lookup_pi_state_for_requeue(unsigned long __user *uaddr,
+			    struct futex_hash_bucket *hb,
 			    union futex_key *key,
-			    struct futex_pi_state **pi_state)
+			    struct futex_pi_state **pi_state, int futex64)
 {
-	u32 curval, uval, newval;
+	unsigned long curval, uval, newval;
 
 retry:
 	/*
 	 * We can't handle a fault cleanly because we can't
 	 * release the locks here. Simply return the fault.
 	 */
-	if (get_futex_value_locked(&curval, uaddr))
+	if (get_futex_value_locked(&curval, uaddr, futex64))
 		return -EFAULT;
 
 	/* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
@@ -715,7 +774,7 @@ retry:
 		newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
 
 		pagefault_disable();
-		curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+		curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
 		pagefault_enable();
 
 		if (unlikely(curval == -EFAULT))
@@ -746,8 +805,9 @@ retry:
  * and requeue the next nr_requeue waiters following hashed on
  * one physical page to another physical page (PI-futex uaddr2)
  */
-static int futex_requeue_pi(u32 __user *uaddr1, u32 __user *uaddr2,
-			    int nr_wake, int nr_requeue, u32 *cmpval)
+static int
+futex_requeue_pi(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
+		 int nr_wake, int nr_requeue, unsigned long *cmpval, int futex64)
 {
 	union futex_key key1, key2;
 	struct futex_hash_bucket *hb1, *hb2;
@@ -780,9 +840,9 @@ retry:
 	double_lock_hb(hb1, hb2);
 
 	if (likely(cmpval != NULL)) {
-		u32 curval;
+		unsigned long curval;
 
-		ret = get_futex_value_locked(&curval, uaddr1);
+		ret = get_futex_value_locked(&curval, uaddr1, futex64);
 
 		if (unlikely(ret)) {
 			spin_unlock(&hb1->lock);
@@ -795,7 +855,7 @@ retry:
 			 */
 			up_read(&current->mm->mmap_sem);
 
-			ret = get_user(curval, uaddr1);
+			ret = futex_get_user(&curval, uaddr1, futex64);
 
 			if (!ret)
 				goto retry;
@@ -822,7 +882,8 @@ retry:
 				int s;
 				/* do this only the first time we requeue someone */
 				s = lookup_pi_state_for_requeue(uaddr2, hb2,
-								&key2, &pi_state2);
+								&key2, &pi_state2,
+								futex64);
 				if (s) {
 					ret = s;
 					goto out_unlock;
@@ -936,8 +997,8 @@ out:
  * to this virtual address:
  */
 static int
-futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2,
-	      int nr_wake, int nr_wake2, int op)
+futex_wake_op(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
+	      int nr_wake, int nr_wake2, int op, int futex64)
 {
 	union futex_key key1, key2;
 	struct futex_hash_bucket *hb1, *hb2;
@@ -961,9 +1022,16 @@ retryfull:
 retry:
 	double_lock_hb(hb1, hb2);
 
-	op_ret = futex_atomic_op_inuser(op, uaddr2);
+#ifdef CONFIG_64BIT
+	if (futex64)
+		op_ret = futex_atomic_op_inuser64(op, (u64 __user *)uaddr2);
+	else
+		op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
+#else
+	op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
+#endif
 	if (unlikely(op_ret < 0)) {
-		u32 dummy;
+		unsigned long dummy;
 
 		spin_unlock(&hb1->lock);
 		if (hb1 != hb2)
@@ -1005,7 +1073,7 @@ retry:
 		 */
 		up_read(&current->mm->mmap_sem);
 
-		ret = get_user(dummy, uaddr2);
+		ret = futex_get_user(&dummy, uaddr2, futex64);
 		if (ret)
 			return ret;
 
@@ -1048,8 +1116,9 @@ out:
  * Requeue all waiters hashed on one physical page to another
  * physical page.
  */
-static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
-			 int nr_wake, int nr_requeue, u32 *cmpval)
+static int
+futex_requeue(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
+	      int nr_wake, int nr_requeue, unsigned long *cmpval, int futex64)
 {
 	union futex_key key1, key2;
 	struct futex_hash_bucket *hb1, *hb2;
@@ -1073,9 +1142,9 @@ static int futex_requeue(u32 __user *uad
 	double_lock_hb(hb1, hb2);
 
 	if (likely(cmpval != NULL)) {
-		u32 curval;
+		unsigned long curval;
 
-		ret = get_futex_value_locked(&curval, uaddr1);
+		ret = get_futex_value_locked(&curval, uaddr1, futex64);
 
 		if (unlikely(ret)) {
 			spin_unlock(&hb1->lock);
@@ -1088,7 +1157,7 @@ static int futex_requeue(u32 __user *uad
 			 */
 			up_read(&current->mm->mmap_sem);
 
-			ret = get_user(curval, uaddr1);
+			ret = futex_get_user(&curval, uaddr1, futex64);
 
 			if (!ret)
 				goto retry;
@@ -1273,13 +1342,13 @@ static void unqueue_me_pi(struct futex_q
  * The cur->mm semaphore must be  held, it is released at return of this
  * function.
  */
-static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+static int fixup_pi_state_owner(unsigned long  __user *uaddr, struct futex_q *q,
 				struct futex_hash_bucket *hb,
-				struct task_struct *curr)
+				struct task_struct *curr, int futex64)
 {
-	u32 newtid = curr->pid | FUTEX_WAITERS;
+	unsigned long newtid = curr->pid | FUTEX_WAITERS;
 	struct futex_pi_state *pi_state = q->pi_state;
-	u32 uval, curval, newval;
+	unsigned long uval, curval, newval;
 	int ret;
 
 	/* Owner died? */
@@ -1306,12 +1375,12 @@ static int fixup_pi_state_owner(u32 __us
 	 * TID. This must be atomic as we have preserve the
 	 * owner died bit here.
 	 */
-	ret = get_user(uval, uaddr);
+	ret = futex_get_user(&uval, uaddr, futex64);
 	while (!ret) {
 		newval = (uval & FUTEX_OWNER_DIED) | newtid;
 		newval |= (uval & FUTEX_WAITER_REQUEUED);
-		curval = futex_atomic_cmpxchg_inatomic(uaddr,
-						       uval, newval);
+		curval = futex_cmpxchg_inatomic(uaddr,uval,
+						newval, futex64);
 		if (curval == -EFAULT)
  			ret = -EFAULT;
 		if (curval == uval)
@@ -1322,13 +1391,14 @@ static int fixup_pi_state_owner(u32 __us
 }
 
 static long futex_wait_restart(struct restart_block *restart);
-static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
+static int futex_wait(unsigned long __user *uaddr, unsigned long val,
+		      ktime_t *abs_time, int futex64)
 {
 	struct task_struct *curr = current;
 	DECLARE_WAITQUEUE(wait, curr);
 	struct futex_hash_bucket *hb;
 	struct futex_q q;
-	u32 uval;
+	unsigned long uval;
 	int ret;
 	struct hrtimer_sleeper t, *to = NULL;
 	int rem = 0;
@@ -1363,7 +1433,7 @@ static int futex_wait(u32 __user *uaddr,
 	 * We hold the mmap semaphore, so the mapping cannot have changed
 	 * since we looked it up in get_futex_key.
 	 */
-	ret = get_futex_value_locked(&uval, uaddr);
+	ret = get_futex_value_locked(&uval, uaddr, futex64);
 
 	if (unlikely(ret)) {
 		queue_unlock(&q, hb);
@@ -1373,8 +1443,7 @@ static int futex_wait(u32 __user *uaddr,
 		 * start all over again.
 		 */
 		up_read(&curr->mm->mmap_sem);
-
-		ret = get_user(uval, uaddr);
+		ret = futex_get_user(&uval, uaddr, futex64);
 
 		if (!ret)
 			goto retry;
@@ -1486,7 +1555,7 @@ static int futex_wait(u32 __user *uaddr,
 
 			/* mmap_sem and hash_bucket lock are unlocked at
 			   return of this function */
-			ret = fixup_pi_state_owner(uaddr, &q, hb, curr);
+			ret = fixup_pi_state_owner(uaddr, &q, hb, curr, futex64);
 		} else {
 			/*
 			 * Catch the rare case, where the lock was released
@@ -1526,8 +1595,9 @@ static int futex_wait(u32 __user *uaddr,
 		restart = &current_thread_info()->restart_block;
 		restart->fn = futex_wait_restart;
 		restart->arg0 = (unsigned long)uaddr;
-		restart->arg1 = (unsigned long)val;
+		restart->arg1 = val;
 		restart->arg2 = (unsigned long)abs_time;
+		restart->arg3 = (unsigned long)futex64;
 		return -ERESTART_RESTARTBLOCK;
 	}
 
@@ -1542,12 +1612,13 @@ static int futex_wait(u32 __user *uaddr,
 
 static long futex_wait_restart(struct restart_block *restart)
 {
-	u32 __user *uaddr = (u32 __user *)restart->arg0;
-	u32 val = (u32)restart->arg1;
+	unsigned long __user *uaddr = (unsigned long __user *)restart->arg0;
+	unsigned long val = restart->arg1;
 	ktime_t *abs_time = (ktime_t *)restart->arg2;
+	int futex64 = (int)restart->arg3;
 
 	restart->fn = do_no_restart_syscall;
-	return (long)futex_wait(uaddr, val, abs_time);
+	return (long)futex_wait(uaddr, val, abs_time, futex64);
 }
 
 
@@ -1602,13 +1673,13 @@ static void set_pi_futex_owner(struct fu
  * if there are waiters then it will block, it does PI, etc. (Due to
  * races the kernel might see a 0 value of the futex too.)
  */
-static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
-			 int trylock)
+static int futex_lock_pi(unsigned long __user *uaddr, int detect, ktime_t *time,
+			 int trylock, int futex64)
 {
 	struct hrtimer_sleeper timeout, *to = NULL;
 	struct task_struct *curr = current;
 	struct futex_hash_bucket *hb;
-	u32 uval, newval, curval;
+	unsigned long uval, newval, curval;
 	struct futex_q q;
 	int ret, lock_held, attempt = 0;
 
@@ -1643,7 +1714,7 @@ static int futex_lock_pi(u32 __user *uad
 	newval = current->pid;
 
 	pagefault_disable();
-	curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
+	curval = futex_cmpxchg_inatomic(uaddr, 0, newval, futex64);
 	pagefault_enable();
 
 	if (unlikely(curval == -EFAULT))
@@ -1688,7 +1759,7 @@ static int futex_lock_pi(u32 __user *uad
 		newval = curval | FUTEX_WAITERS;
 
 	pagefault_disable();
-	curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+	curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
 	pagefault_enable();
 
 	if (unlikely(curval == -EFAULT))
@@ -1725,8 +1796,8 @@ static int futex_lock_pi(u32 __user *uad
 				FUTEX_OWNER_DIED | FUTEX_WAITERS;
 
 			pagefault_disable();
-			curval = futex_atomic_cmpxchg_inatomic(uaddr,
-							       uval, newval);
+			curval = futex_cmpxchg_inatomic(uaddr, uval,
+							newval, futex64);
 			pagefault_enable();
 
 			if (unlikely(curval == -EFAULT))
@@ -1770,7 +1841,7 @@ static int futex_lock_pi(u32 __user *uad
 	 */
 	if (!ret && q.pi_state->owner != curr)
 		/* mmap_sem is unlocked at return of this function */
-		ret = fixup_pi_state_owner(uaddr, &q, hb, curr);
+		ret = fixup_pi_state_owner(uaddr, &q, hb, curr, futex64);
 	else {
 		/*
 		 * Catch the rare case, where the lock was released
@@ -1816,7 +1887,7 @@ static int futex_lock_pi(u32 __user *uad
 	queue_unlock(&q, hb);
 	up_read(&curr->mm->mmap_sem);
 
-	ret = get_user(uval, uaddr);
+	ret = futex_get_user(&uval, uaddr, futex64);
 	if (!ret && (uval != -EFAULT))
 		goto retry;
 
@@ -1828,17 +1899,17 @@ static int futex_lock_pi(u32 __user *uad
  * This is the in-kernel slowpath: we look up the PI state (if any),
  * and do the rt-mutex unlock.
  */
-static int futex_unlock_pi(u32 __user *uaddr)
+static int futex_unlock_pi(unsigned long __user *uaddr, int futex64)
 {
 	struct futex_hash_bucket *hb;
 	struct futex_q *this, *next;
-	u32 uval;
+	unsigned long uval;
 	struct plist_head *head;
 	union futex_key key;
 	int ret, attempt = 0;
 
 retry:
-	if (get_user(uval, uaddr))
+	if (futex_get_user(&uval, uaddr, futex64))
 		return -EFAULT;
 	/*
 	 * We release only a lock we actually own:
@@ -1865,7 +1936,7 @@ retry_locked:
 	 */
 	if (!(uval & FUTEX_OWNER_DIED)) {
 		pagefault_disable();
-		uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
+		uval = futex_cmpxchg_inatomic(uaddr, current->pid, 0, futex64);
 		pagefault_enable();
 	}
 
@@ -1887,7 +1958,7 @@ retry_locked:
 	plist_for_each_entry_safe(this, next, head, list) {
 		if (!match_futex (&this->key, &key))
 			continue;
-		ret = wake_futex_pi(uaddr, uval, this);
+		ret = wake_futex_pi(uaddr, uval, this, futex64);
 		/*
 		 * The atomic access to the futex value
 		 * generated a pagefault, so retry the
@@ -1901,7 +1972,7 @@ retry_locked:
 	 * No waiters - kernel unlocks the futex:
 	 */
 	if (!(uval & FUTEX_OWNER_DIED)) {
-		ret = unlock_futex_pi(uaddr, uval);
+		ret = unlock_futex_pi(uaddr, uval, futex64);
 		if (ret == -EFAULT)
 			goto pi_faulted;
 	}
@@ -1931,7 +2002,7 @@ pi_faulted:
 	spin_unlock(&hb->lock);
 	up_read(&current->mm->mmap_sem);
 
-	ret = get_user(uval, uaddr);
+	ret = futex_get_user(&uval, uaddr, futex64);
 	if (!ret && (uval != -EFAULT))
 		goto retry;
 
@@ -2167,7 +2238,7 @@ retry:
 		 */
 		if (!pi) {
 			if (uval & FUTEX_WAITERS)
-				futex_wake(uaddr, 1);
+				futex_wake((unsigned long __user *)uaddr, 1);
 		}
 	}
 	return 0;
@@ -2223,7 +2294,8 @@ void exit_robust_list(struct task_struct
 		return;
 
 	if (pending)
-		handle_futex_death((void __user *)pending + futex_offset, curr, pip);
+		handle_futex_death((void __user *)pending + futex_offset,
+				   curr, pip);
 
 	while (entry != &head->list) {
 		/*
@@ -2249,42 +2321,46 @@ void exit_robust_list(struct task_struct
 	}
 }
 
-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
-		u32 __user *uaddr2, u32 val2, u32 val3)
+long do_futex(unsigned long __user *uaddr, int op, unsigned long val,
+	      ktime_t *timeout, unsigned long __user *uaddr2,
+	      unsigned long val2, unsigned long val3, int fut64)
 {
 	int ret;
 
 	switch (op) {
 	case FUTEX_WAIT:
-		ret = futex_wait(uaddr, val, timeout);
+		ret = futex_wait(uaddr, val, timeout, fut64);
 		break;
 	case FUTEX_WAKE:
 		ret = futex_wake(uaddr, val);
 		break;
 	case FUTEX_FD:
-		/* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
-		ret = futex_fd(uaddr, val);
+		if (fut64)
+			ret = -ENOSYS;
+		else
+			/* non-zero val means F_SETOWN(getpid())&F_SETSIG(val) */
+			ret = futex_fd((u32 __user *)uaddr, val);
 		break;
 	case FUTEX_REQUEUE:
-		ret = futex_requeue(uaddr, uaddr2, val, val2, NULL);
+		ret = futex_requeue(uaddr, uaddr2, val, val2, NULL, fut64);
 		break;
 	case FUTEX_CMP_REQUEUE:
-		ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
+		ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, fut64);
 		break;
 	case FUTEX_WAKE_OP:
-		ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
+		ret = futex_wake_op(uaddr, uaddr2, val, val2, val3, fut64);
 		break;
 	case FUTEX_LOCK_PI:
-		ret = futex_lock_pi(uaddr, val, timeout, 0);
+		ret = futex_lock_pi(uaddr, val, timeout, 0, fut64);
 		break;
 	case FUTEX_UNLOCK_PI:
-		ret = futex_unlock_pi(uaddr);
+		ret = futex_unlock_pi(uaddr, fut64);
 		break;
 	case FUTEX_TRYLOCK_PI:
-		ret = futex_lock_pi(uaddr, 0, timeout, 1);
+		ret = futex_lock_pi(uaddr, 0, timeout, 1, fut64);
 		break;
 	case FUTEX_CMP_REQUEUE_PI:
-		ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3);
+		ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3, fut64);
 		break;
 	default:
 		ret = -ENOSYS;
@@ -2292,6 +2368,37 @@ long do_futex(u32 __user *uaddr, int op,
 	return ret;
 }
 
+#ifdef CONFIG_64BIT
+
+asmlinkage long
+sys_futex64(u64 __user *uaddr, int op, u64 val,
+	    struct timespec __user *utime, u64 __user *uaddr2, u64 val3)
+{
+	struct timespec ts;
+	ktime_t t, *tp = NULL;
+	u64 val2 = 0;
+
+	if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
+		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
+			return -EFAULT;
+		if (!timespec_valid(&ts))
+			return -EINVAL;
+		if (op == FUTEX_WAIT)
+			t = ktime_add(ktime_get(), timespec_to_ktime(ts));
+		tp = &t;
+	}
+	/*
+	 * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
+	 */
+	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
+	    || op == FUTEX_CMP_REQUEUE_PI)
+		val2 = (unsigned long) utime;
+
+	return do_futex((unsigned long __user*)uaddr, op, val, tp,
+			(unsigned long __user*)uaddr2, val2, val3, 1);
+}
+
+#endif
 
 asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
 			  struct timespec __user *utime, u32 __user *uaddr2,
@@ -2317,7 +2424,8 @@ asmlinkage long sys_futex(u32 __user *ua
 	    || op == FUTEX_CMP_REQUEUE_PI)
 		val2 = (u32) (unsigned long) utime;
 
-	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+	return do_futex((unsigned long __user*)uaddr, op, val, tp,
+			(unsigned long __user*)uaddr2, val2, val3, 0);
 }
 
 static int futexfs_get_sb(struct file_system_type *fs_type,
Index: b/kernel/sys_ni.c
===================================================================
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -41,6 +41,7 @@ cond_syscall(sys_sendmsg);
 cond_syscall(sys_recvmsg);
 cond_syscall(sys_socketcall);
 cond_syscall(sys_futex);
+cond_syscall(sys_futex64);
 cond_syscall(compat_sys_futex);
 cond_syscall(sys_set_robust_list);
 cond_syscall(compat_sys_set_robust_list);
Index: b/include/linux/futex.h
===================================================================
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -101,8 +101,9 @@ struct robust_list_head {
 #define ROBUST_LIST_LIMIT	2048
 
 #ifdef __KERNEL__
-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
-	      u32 __user *uaddr2, u32 val2, u32 val3);
+long do_futex(unsigned long __user *uaddr, int op, unsigned long val,
+	      ktime_t *timeout, unsigned long __user *uaddr2,
+	      unsigned long val2, unsigned long val3, int futex64);
 
 extern int
 handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
@@ -116,7 +117,7 @@ handle_futex_death(u32 __user *uaddr, st
  * We set bit 0 to indicate if it's an inode-based key.
  */
 union futex_key {
-	u32 __user *uaddr;
+	unsigned long __user *uaddr;
 	struct {
 		unsigned long pgoff;
 		struct inode *inode;
@@ -133,7 +134,7 @@ union futex_key {
 		int offset;
 	} both;
 };
-int get_futex_key(u32 __user *uaddr, union futex_key *key);
+int get_futex_key(void __user *uaddr, union futex_key *key);
 void get_futex_key_refs(union futex_key *key);
 void drop_futex_key_refs(union futex_key *key);
 
Index: b/kernel/futex_compat.c
===================================================================
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -158,5 +158,6 @@ asmlinkage long compat_sys_futex(u32 __u
 	    || op == FUTEX_CMP_REQUEUE_PI)
 		val2 = (int) (unsigned long) utime;
 
-	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+	return do_futex((unsigned long __user*)uaddr, op, val, tp,
+			(unsigned long __user*)uaddr2, val2, val3, 0);
 }

-- 
Pierre Peiffer

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2.6.21-rc4-mm1 2/4] Make futex_wait() use an hrtimer for timeout
  2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 2/4] Make futex_wait() use an hrtimer for timeout Pierre.Peiffer
@ 2007-03-26  9:57   ` Andrew Morton
  0 siblings, 0 replies; 12+ messages in thread
From: Andrew Morton @ 2007-03-26  9:57 UTC (permalink / raw)
  To: Pierre.Peiffer
  Cc: mingo, drepper, linux-kernel, jean-pierre.dion, Sebastien Dugue,
	Pierre Peiffer

On Wed, 21 Mar 2007 10:54:34 +0100 Pierre.Peiffer@bull.net wrote:

> This patch modifies futex_wait() to use an hrtimer + schedule() in place of
> schedule_timeout().
> 
>   schedule_timeout() is tick based, therefore the timeout granularity is
> the tick (1 ms, 4 ms or 10 ms depending on HZ). By using a high resolution
> timer for timeout wakeup, we can attain a much finer timeout granularity
> (in the microsecond range). This parallels what is already done for
> futex_lock_pi().
> 
>   The timeout passed to the syscall is no longer converted to jiffies
> and is therefore passed to do_futex() and futex_wait() as an absolute
> ktime_t therefore keeping nanosecond resolution.
> 
>   Also this removes the need to pass the nanoseconds timeout part to
> futex_lock_pi() in val2.
> 
>   In futex_wait(), if there is no timeout then a regular schedule() is
> performed. Otherwise, an hrtimer is fired before schedule() is called.
> 

Problem.

> --- a/include/linux/futex.h
> +++ b/include/linux/futex.h
> @@ -1,6 +1,7 @@
>  #ifndef _LINUX_FUTEX_H
>  #define _LINUX_FUTEX_H
>  
> +#include <linux/ktime.h>
>  #include <linux/sched.h>
>  

For a start, please print out a copy of Documentation/SubmitChecklist and
tape it to your monitor.  It's really good.

`make headers_check' fails with

/usr/src/devel/usr/include/linux/futex.h requires linux/ktime.h, which does not exist in exported headers



This fixes it:


diff -puN include/linux/Kbuild~make-futex_wait-use-an-hrtimer-for-timeout-fix include/linux/Kbuild
--- a/include/linux/Kbuild~make-futex_wait-use-an-hrtimer-for-timeout-fix
+++ a/include/linux/Kbuild
@@ -40,6 +40,7 @@ header-y += baycom.h
 header-y += bfs_fs.h
 header-y += blkpg.h
 header-y += bpqether.h
+header-y += calc64.h
 header-y += cdk.h
 header-y += chio.h
 header-y += coda_psdev.h
@@ -99,7 +100,9 @@ header-y += isdn_divertif.h
 header-y += iso_fs.h
 header-y += ixjuser.h
 header-y += jffs2.h
+header-y += jiffies.h
 header-y += keyctl.h
+header-y += ktime.h
 header-y += kvm.h
 header-y += limits.h
 header-y += lock_dlm_plock.h
diff -puN include/asm-i386/Kbuild~make-futex_wait-use-an-hrtimer-for-timeout-fix include/asm-i386/Kbuild
--- a/include/asm-i386/Kbuild~make-futex_wait-use-an-hrtimer-for-timeout-fix
+++ a/include/asm-i386/Kbuild
@@ -2,6 +2,7 @@ include include/asm-generic/Kbuild.asm
 
 header-y += boot.h
 header-y += debugreg.h
+header-y += div64.h
 header-y += ldt.h
 header-y += ptrace-abi.h
 header-y += ucontext.h
_

But only for i386, and no way do we want to export all those headers.


Now.  What blithering idiot carefully went and made ktime_t a typedef so we
cannot forward declare it?  Sigh.  We tell 'em, but they don't listen.


This fixes ktime:


diff -puN include/linux/ktime.h~declare-struct-ktime include/linux/ktime.h
--- a/include/linux/ktime.h~declare-struct-ktime
+++ a/include/linux/ktime.h
@@ -43,7 +43,7 @@
  * plain scalar nanosecond based representation can be selected by the
  * config switch CONFIG_KTIME_SCALAR.
  */
-typedef union {
+union ktime {
 	s64	tv64;
 #if BITS_PER_LONG != 64 && !defined(CONFIG_KTIME_SCALAR)
 	struct {
@@ -54,7 +54,9 @@ typedef union {
 # endif
 	} tv;
 #endif
-} ktime_t;
+};
+
+typedef union ktime ktime_t;		/* Kill this */
 
 #define KTIME_MAX			((s64)~((u64)1 << 63))
 #if (BITS_PER_LONG == 64)
_

And this fixes your patch:

--- a/include/linux/futex.h~make-futex_wait-use-an-hrtimer-for-timeout-fix
+++ a/include/linux/futex.h
@@ -1,9 +1,10 @@
 #ifndef _LINUX_FUTEX_H
 #define _LINUX_FUTEX_H
 
-#include <linux/ktime.h>
 #include <linux/sched.h>
 
+union ktime;
+
 /* Second argument to futex syscall */
 
 
@@ -95,7 +96,7 @@ struct robust_list_head {
 #define ROBUST_LIST_LIMIT	2048
 
 #ifdef __KERNEL__
-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
 
 extern int
_


And now someone needs to go all over the kernel and do a s/ktime_t/union ktime/g.
Again.  How often must we do this?


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes
  2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes Pierre.Peiffer
@ 2007-03-26 11:20   ` Andrew Morton
  2007-03-27 11:07   ` Jakub Jelinek
  1 sibling, 0 replies; 12+ messages in thread
From: Andrew Morton @ 2007-03-26 11:20 UTC (permalink / raw)
  To: Pierre.Peiffer
  Cc: mingo, drepper, linux-kernel, jean-pierre.dion, Pierre Peiffer

On Wed, 21 Mar 2007 10:54:36 +0100 Pierre.Peiffer@bull.net wrote:

> It does not provide the functionality for all architectures (only for x64 for now).

Well that scuppers our chances of getting -mm kernels tested on ia64, s390
and sparc64.  Which is a problem - people do test s390 and ia64 and so these
patches impact the testing quality of everyone else's work.


Do we have a plan to fix this (promptly, please)?

kernel/built-in.o(.text+0x683a2): In function `futex_requeue_pi':
: undefined reference to `futex_atomic_cmpxchg_inatomic64'

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes
  2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes Pierre.Peiffer
  2007-03-26 11:20   ` Andrew Morton
@ 2007-03-27 11:07   ` Jakub Jelinek
  2007-04-23 14:35     ` [PATCH -mm] 64bit-futex - provide new commands instead of new syscall Pierre Peiffer
  1 sibling, 1 reply; 12+ messages in thread
From: Jakub Jelinek @ 2007-03-27 11:07 UTC (permalink / raw)
  To: Pierre.Peiffer; +Cc: akpm, mingo, drepper, linux-kernel, jean-pierre.dion

On Wed, Mar 21, 2007 at 10:54:36AM +0100, Pierre.Peiffer@bull.net wrote:
> This last patch is an adaptation of the sys_futex64 syscall provided in -rt
> patch (originally written by Ingo Molnar). It allows the use of 64-bit futex.
> 
> I have re-worked most of the code to avoid the duplication of the code.
> 
> It does not provide the functionality for all architectures (only for x64 for now).

I don't think you should blindly add all operations to sys_futex64 without
thinking what they really do.
E.g. FUTEX_{{,UN,TRY}LOCK,CMP_REQUEUE}_PI doesn't really make any sense for 64-bit
futexes, the format of PI futexes is hardcoded in the kernel and is always
32-bit, see FUTEX_TID_MASK, FUTEX_WAITERS, FUTEX_OWNER_DIED definitions.
exit_robust_list/handle_futex_death will handle 32-bit PI futexes anyway.
Similarly, sys_futex64 shouldn't support the obsolete operations that
are there solely for compatibility (e.g. FUTEX_REQUEUE or FUTEX_FD).

When you just -ENOSYS on the PI ops, there is no need to implement
futex_atomic_cmpxchg_inatomic64.

FUTEX_WAKE_OP is questionable for 64-bit, IMHO it is better to just
-ENOSYS on it and only if anyone ever finds actual uses for it, add it.

For 64-bit futexes the only needed operations are actually
FUTEX_WAIT and perhaps FUTEX_CMP_REQUEUE, so I wonder if it isn't
better to just add FUTEX_WAIT64 and FUTEX_CMP_REQUEUE64 ops to sys_futex
instead of adding a new syscall.

But the FUTEX_{{,UN,TRY}LOCK,CMP_REQUEUE}_PI removal for 64-bit futexes
is IMHO the most important part of my complain.

	Jakub

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH -mm] 64bit-futex - provide new commands instead of new syscall
  2007-03-27 11:07   ` Jakub Jelinek
@ 2007-04-23 14:35     ` Pierre Peiffer
  2007-04-23 15:30       ` Ulrich Drepper
  0 siblings, 1 reply; 12+ messages in thread
From: Pierre Peiffer @ 2007-04-23 14:35 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: akpm, mingo, drepper, linux-kernel, jean-pierre.dion

[-- Attachment #1: Type: text/plain, Size: 1717 bytes --]

Hi,

Jakub Jelinek a écrit :
> 
> I don't think you should blindly add all operations to sys_futex64 without
> thinking what they really do.
> E.g. FUTEX_{{,UN,TRY}LOCK,CMP_REQUEUE}_PI doesn't really make any sense for 64-bit
> futexes, the format of PI futexes is hardcoded in the kernel and is always
> 32-bit, see FUTEX_TID_MASK, FUTEX_WAITERS, FUTEX_OWNER_DIED definitions.
> exit_robust_list/handle_futex_death will handle 32-bit PI futexes anyway.
> Similarly, sys_futex64 shouldn't support the obsolete operations that
> are there solely for compatibility (e.g. FUTEX_REQUEUE or FUTEX_FD).
> 
> When you just -ENOSYS on the PI ops, there is no need to implement
> futex_atomic_cmpxchg_inatomic64.
> 
> FUTEX_WAKE_OP is questionable for 64-bit, IMHO it is better to just
> -ENOSYS on it and only if anyone ever finds actual uses for it, add it.
> 
> For 64-bit futexes the only needed operations are actually
> FUTEX_WAIT and perhaps FUTEX_CMP_REQUEUE, so I wonder if it isn't
> better to just add FUTEX_WAIT64 and FUTEX_CMP_REQUEUE64 ops to sys_futex
> instead of adding a new syscall.
> 
> But the FUTEX_{{,UN,TRY}LOCK,CMP_REQUEUE}_PI removal for 64-bit futexes
> is IMHO the most important part of my complain.
> 

Following this mail sent few weeks ago, here is a patch which should meet your 
requirements.
I've quickly done it on top of the latest -mm (2.6.21-rc6-mm2) and a little bit 
tested.
To be honest, as I'm not really aware of your exact needs and as I don't know 
the exact usage which will be done with 64bit futexes, I can't really maintain 
it. So I'll let you take/modify/adapt this patch following your needs.

Thanks,

Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>



-- 
Pierre

[-- Attachment #2: futex-64bits-command.patch --]
[-- Type: text/x-patch, Size: 24427 bytes --]

---
 include/asm-ia64/futex.h    |    8 -
 include/asm-powerpc/futex.h |    6 -
 include/asm-s390/futex.h    |    8 -
 include/asm-sparc64/futex.h |    8 -
 include/asm-um/futex.h      |    9 -
 include/asm-x86_64/futex.h  |   86 -------------------
 include/asm-x86_64/unistd.h |    2 
 include/linux/futex.h       |    8 +
 include/linux/syscalls.h    |    3 
 kernel/futex.c              |  199 +++++++++++++++++---------------------------
 kernel/futex_compat.c       |    2 
 kernel/sys_ni.c             |    1 
 12 files changed, 93 insertions(+), 247 deletions(-)

Index: linux-2.6.21-rc6-mm2/include/asm-ia64/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-ia64/futex.h	2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-ia64/futex.h	2007-04-20 13:50:00.000000000 +0200
@@ -124,13 +124,7 @@ futex_atomic_cmpxchg_inatomic(int __user
 static inline u64
 futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
 {
-	return 0;
-}
-
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
-	return 0;
+	return -ENOSYS;
 }
 
 #endif /* _ASM_FUTEX_H */
Index: linux-2.6.21-rc6-mm2/include/asm-powerpc/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-powerpc/futex.h	2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-powerpc/futex.h	2007-04-20 13:51:49.000000000 +0200
@@ -119,11 +119,5 @@ futex_atomic_cmpxchg_inatomic64(u64 __us
 	return 0;
 }
 
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
-	return 0;
-}
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_FUTEX_H */
Index: linux-2.6.21-rc6-mm2/include/asm-s390/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-s390/futex.h	2007-04-20 14:01:24.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-s390/futex.h	2007-04-20 13:47:30.000000000 +0200
@@ -51,13 +51,7 @@ static inline int futex_atomic_cmpxchg_i
 static inline u64
 futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
 {
-	return 0;
-}
-
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
-	return 0;
+	return -ENOSYS;
 }
 
 #endif /* __KERNEL__ */
Index: linux-2.6.21-rc6-mm2/include/asm-sparc64/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-sparc64/futex.h	2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-sparc64/futex.h	2007-04-20 13:48:48.000000000 +0200
@@ -108,13 +108,7 @@ futex_atomic_cmpxchg_inatomic(int __user
 static inline u64
 futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
 {
-	return 0;
-}
-
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
-	return 0;
+	return -ENOSYS;
 }
 
 #endif /* !(_SPARC64_FUTEX_H) */
Index: linux-2.6.21-rc6-mm2/include/asm-um/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-um/futex.h	2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-um/futex.h	2007-04-20 13:51:42.000000000 +0200
@@ -6,14 +6,7 @@
 static inline u64
 futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
 {
-	return 0;
+	return -ENOSYS;
 }
 
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
-	return 0;
-}
-
-
 #endif
Index: linux-2.6.21-rc6-mm2/include/asm-x86_64/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-x86_64/futex.h	2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-x86_64/futex.h	2007-04-20 13:50:38.000000000 +0200
@@ -41,38 +41,6 @@
 	  "=&r" (tem)						\
 	: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
 
-#define __futex_atomic_op1_64(insn, ret, oldval, uaddr, oparg) \
-  __asm__ __volatile (						\
-"1:	" insn "\n"						\
-"2:	.section .fixup,\"ax\"\n\
-3:	movq	%3, %1\n\
-	jmp	2b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.quad	1b,3b\n\
-	.previous"						\
-	: "=r" (oldval), "=r" (ret), "=m" (*uaddr)		\
-	: "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0))
-
-#define __futex_atomic_op2_64(insn, ret, oldval, uaddr, oparg) \
-  __asm__ __volatile (						\
-"1:	movq	%2, %0\n\
-	movq	%0, %3\n"					\
-	insn "\n"						\
-"2:	" LOCK_PREFIX "cmpxchgq %3, %2\n\
-	jnz	1b\n\
-3:	.section .fixup,\"ax\"\n\
-4:	movq	%5, %1\n\
-	jmp	3b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.quad	1b,4b,2b,4b\n\
-	.previous"						\
-	: "=&a" (oldval), "=&r" (ret), "=m" (*uaddr),		\
-	  "=&r" (tem)						\
-	: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
 
 static inline int
 futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
@@ -128,60 +96,6 @@ futex_atomic_op_inuser (int encoded_op, 
 }
 
 static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	u64 oparg = (encoded_op << 8) >> 20;
-	u64 cmparg = (encoded_op << 20) >> 20;
-	u64 oldval = 0, ret, tem;
-
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u64)))
-		return -EFAULT;
-
-	inc_preempt_count();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-		__futex_atomic_op1_64("xchgq %0, %2", ret, oldval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ADD:
-		__futex_atomic_op1_64(LOCK_PREFIX "xaddq %0, %2", ret, oldval,
-				   uaddr, oparg);
-		break;
-	case FUTEX_OP_OR:
-		__futex_atomic_op2_64("orq %4, %3", ret, oldval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ANDN:
-		__futex_atomic_op2_64("andq %4, %3", ret, oldval, uaddr, ~oparg);
-		break;
-	case FUTEX_OP_XOR:
-		__futex_atomic_op2_64("xorq %4, %3", ret, oldval, uaddr, oparg);
-		break;
-	default:
-		ret = -ENOSYS;
-	}
-
-	dec_preempt_count();
-
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
-	return ret;
-}
-
-static inline int
 futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
Index: linux-2.6.21-rc6-mm2/include/asm-x86_64/unistd.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/asm-x86_64/unistd.h	2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/asm-x86_64/unistd.h	2007-04-20 13:50:42.000000000 +0200
@@ -619,8 +619,6 @@ __SYSCALL(__NR_sync_file_range, sys_sync
 __SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_move_pages		279
 __SYSCALL(__NR_move_pages, sys_move_pages)
-#define __NR_futex64		280
-__SYSCALL(__NR_futex64, sys_futex64)
 #define __NR_signalfd		281
 __SYSCALL(__NR_signalfd, sys_signalfd)
 #define __NR_timerfd		282
Index: linux-2.6.21-rc6-mm2/include/linux/futex.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/linux/futex.h	2007-04-20 14:01:24.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/linux/futex.h	2007-04-20 13:47:36.000000000 +0200
@@ -19,6 +19,12 @@ union ktime;
 #define FUTEX_TRYLOCK_PI	8
 #define FUTEX_CMP_REQUEUE_PI	9
 
+#ifdef CONFIG_64BIT
+#define FUTEX_64BIT		0x80000000
+#define FUTEX_WAIT64		FUTEX_WAIT|FUTEX_64BIT
+#define FUTEX_CMP_REQUEUE64	FUTEX_CMP_REQUEUE|FUTEX_64BIT
+#endif
+
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
  * thread exit time.
@@ -104,7 +110,7 @@ struct robust_list_head {
 #ifdef __KERNEL__
 long do_futex(unsigned long __user *uaddr, int op, unsigned long val,
 	      union ktime *timeout, unsigned long __user *uaddr2,
-	      unsigned long val2, unsigned long val3, int futex64);
+	      unsigned long val2, unsigned long val3);
 
 extern int
 handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
Index: linux-2.6.21-rc6-mm2/include/linux/syscalls.h
===================================================================
--- linux-2.6.21-rc6-mm2.orig/include/linux/syscalls.h	2007-04-20 14:01:25.000000000 +0200
+++ linux-2.6.21-rc6-mm2/include/linux/syscalls.h	2007-04-20 13:48:12.000000000 +0200
@@ -178,9 +178,6 @@ asmlinkage long sys_set_tid_address(int 
 asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
 			struct timespec __user *utime, u32 __user *uaddr2,
 			u32 val3);
-asmlinkage long sys_futex64(u64 __user *uaddr, int op, u64 val,
-			struct timespec __user *utime, u64 __user *uaddr2,
-			u64 val3);
 
 asmlinkage long sys_init_module(void __user *umod, unsigned long len,
 				const char __user *uargs);
Index: linux-2.6.21-rc6-mm2/kernel/futex.c
===================================================================
--- linux-2.6.21-rc6-mm2.orig/kernel/futex.c	2007-04-20 14:01:24.000000000 +0200
+++ linux-2.6.21-rc6-mm2/kernel/futex.c	2007-04-20 15:31:23.000000000 +0200
@@ -62,20 +62,6 @@
 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 
 #ifdef CONFIG_64BIT
-static inline unsigned long
-futex_cmpxchg_inatomic(unsigned long __user *uaddr, unsigned long oldval,
-		       unsigned long newval, int futex64)
-{
-	if (futex64)
-		return futex_atomic_cmpxchg_inatomic64((u64 __user *)uaddr,
-						       oldval, newval);
-	else {
-		u32 ov = oldval, nv = newval;
-		return futex_atomic_cmpxchg_inatomic((int __user *)uaddr, ov,
-						     nv);
-	}
-}
-
 static inline int
 futex_get_user(unsigned long *val, unsigned long __user *uaddr, int futex64)
 {
@@ -92,11 +78,7 @@ futex_get_user(unsigned long *val, unsig
 }
 
 #else
-#define futex_cmpxchg_inatomic(uaddr, oldval, newval, futex64)	\
-	futex_atomic_cmpxchg_inatomic((u32*)uaddr, oldval, newval)
-
 #define futex_get_user(val, uaddr, futex64) get_user(*val, uaddr)
-
 #endif
 
 /*
@@ -606,12 +588,12 @@ static void wake_futex(struct futex_q *q
 	q->lock_ptr = NULL;
 }
 
-static int wake_futex_pi(unsigned long __user *uaddr, unsigned long uval,
-			 struct futex_q *this, int futex64)
+static int wake_futex_pi(u32 __user *uaddr, unsigned long uval,
+			 struct futex_q *this)
 {
 	struct task_struct *new_owner;
 	struct futex_pi_state *pi_state = this->pi_state;
-	unsigned long curval, newval;
+	u32 curval, newval;
 
 	if (!pi_state)
 		return -EINVAL;
@@ -639,7 +621,7 @@ static int wake_futex_pi(unsigned long _
 		newval |= (uval & FUTEX_WAITER_REQUEUED);
 
 		pagefault_disable();
-		curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
+		curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
 		pagefault_enable();
 		if (curval == -EFAULT)
 			return -EFAULT;
@@ -664,17 +646,16 @@ static int wake_futex_pi(unsigned long _
 	return 0;
 }
 
-static int unlock_futex_pi(unsigned long __user *uaddr, unsigned long uval,
-			   int futex64)
+static int unlock_futex_pi(u32 __user *uaddr, unsigned long uval)
 {
-	unsigned long oldval;
+	u32 oldval;
 
 	/*
 	 * There is no waiter, so we unlock the futex. The owner died
 	 * bit has not to be preserved here. We are the owner:
 	 */
 	pagefault_disable();
-	oldval = futex_cmpxchg_inatomic(uaddr, uval, 0, futex64);
+	oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
 	pagefault_enable();
 
 	if (oldval == -EFAULT)
@@ -748,19 +729,20 @@ out:
  * or create a new one without owner.
  */
 static inline int
-lookup_pi_state_for_requeue(unsigned long __user *uaddr,
+lookup_pi_state_for_requeue(u32 __user *uaddr,
 			    struct futex_hash_bucket *hb,
 			    union futex_key *key,
-			    struct futex_pi_state **pi_state, int futex64)
+			    struct futex_pi_state **pi_state)
 {
-	unsigned long curval, uval, newval;
+	u32 curval, uval, newval;
 
 retry:
 	/*
 	 * We can't handle a fault cleanly because we can't
 	 * release the locks here. Simply return the fault.
 	 */
-	if (get_futex_value_locked(&curval, uaddr, futex64))
+	if (get_futex_value_locked((unsigned long *)&curval,
+				   (unsigned long __user *)uaddr, 0))
 		return -EFAULT;
 
 	/* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
@@ -774,7 +756,7 @@ retry:
 		newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
 
 		pagefault_disable();
-		curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
+		curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
 		pagefault_enable();
 
 		if (unlikely(curval == -EFAULT))
@@ -806,8 +788,8 @@ retry:
  * one physical page to another physical page (PI-futex uaddr2)
  */
 static int
-futex_requeue_pi(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
-		 int nr_wake, int nr_requeue, unsigned long *cmpval, int futex64)
+futex_requeue_pi(u32 __user *uaddr1, u32 __user *uaddr2,
+		 int nr_wake, int nr_requeue, unsigned long *cmpval)
 {
 	union futex_key key1, key2;
 	struct futex_hash_bucket *hb1, *hb2;
@@ -840,9 +822,10 @@ retry:
 	double_lock_hb(hb1, hb2);
 
 	if (likely(cmpval != NULL)) {
-		unsigned long curval;
+		u32 curval;
 
-		ret = get_futex_value_locked(&curval, uaddr1, futex64);
+		ret = get_futex_value_locked((unsigned long*)&curval,
+					     (unsigned long __user *)uaddr1, 0);
 
 		if (unlikely(ret)) {
 			spin_unlock(&hb1->lock);
@@ -855,7 +838,7 @@ retry:
 			 */
 			up_read(&current->mm->mmap_sem);
 
-			ret = futex_get_user(&curval, uaddr1, futex64);
+			ret = get_user(curval, uaddr1);
 
 			if (!ret)
 				goto retry;
@@ -882,8 +865,7 @@ retry:
 				int s;
 				/* do this only the first time we requeue someone */
 				s = lookup_pi_state_for_requeue(uaddr2, hb2,
-								&key2, &pi_state2,
-								futex64);
+								&key2, &pi_state2);
 				if (s) {
 					ret = s;
 					goto out_unlock;
@@ -998,7 +980,7 @@ out:
  */
 static int
 futex_wake_op(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
-	      int nr_wake, int nr_wake2, int op, int futex64)
+	      int nr_wake, int nr_wake2, int op)
 {
 	union futex_key key1, key2;
 	struct futex_hash_bucket *hb1, *hb2;
@@ -1022,16 +1004,10 @@ retryfull:
 retry:
 	double_lock_hb(hb1, hb2);
 
-#ifdef CONFIG_64BIT
-	if (futex64)
-		op_ret = futex_atomic_op_inuser64(op, (u64 __user *)uaddr2);
-	else
-		op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
-#else
 	op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
-#endif
+
 	if (unlikely(op_ret < 0)) {
-		unsigned long dummy;
+		u32 dummy;
 
 		spin_unlock(&hb1->lock);
 		if (hb1 != hb2)
@@ -1073,7 +1049,7 @@ retry:
 		 */
 		up_read(&current->mm->mmap_sem);
 
-		ret = futex_get_user(&dummy, uaddr2, futex64);
+		ret = get_user(dummy, uaddr2);
 		if (ret)
 			return ret;
 
@@ -1379,8 +1355,18 @@ static int fixup_pi_state_owner(unsigned
 	while (!ret) {
 		newval = (uval & FUTEX_OWNER_DIED) | newtid;
 		newval |= (uval & FUTEX_WAITER_REQUEUED);
-		curval = futex_cmpxchg_inatomic(uaddr,uval,
-						newval, futex64);
+#ifdef CONFIG_64BIT
+		if (futex64)
+			curval = futex_atomic_cmpxchg_inatomic64(
+				                       (u64 __user *)uaddr,
+						       uval, newval);
+		else
+#endif
+			curval = futex_atomic_cmpxchg_inatomic(
+				                       (u32 __user *)uaddr,
+						       (u32)uval,
+						       (u32)newval);
+
 		if (curval == -EFAULT)
  			ret = -EFAULT;
 		if (curval == uval)
@@ -1673,13 +1659,13 @@ static void set_pi_futex_owner(struct fu
  * if there are waiters then it will block, it does PI, etc. (Due to
  * races the kernel might see a 0 value of the futex too.)
  */
-static int futex_lock_pi(unsigned long __user *uaddr, int detect, ktime_t *time,
-			 int trylock, int futex64)
+static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
+			 int trylock)
 {
 	struct hrtimer_sleeper timeout, *to = NULL;
 	struct task_struct *curr = current;
 	struct futex_hash_bucket *hb;
-	unsigned long uval, newval, curval;
+	u32 uval, newval, curval;
 	struct futex_q q;
 	int ret, lock_held, attempt = 0;
 
@@ -1714,7 +1700,7 @@ static int futex_lock_pi(unsigned long _
 	newval = current->pid;
 
 	pagefault_disable();
-	curval = futex_cmpxchg_inatomic(uaddr, 0, newval, futex64);
+	curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
 	pagefault_enable();
 
 	if (unlikely(curval == -EFAULT))
@@ -1759,7 +1745,7 @@ static int futex_lock_pi(unsigned long _
 		newval = curval | FUTEX_WAITERS;
 
 	pagefault_disable();
-	curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
+	curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
 	pagefault_enable();
 
 	if (unlikely(curval == -EFAULT))
@@ -1796,8 +1782,8 @@ static int futex_lock_pi(unsigned long _
 				FUTEX_OWNER_DIED | FUTEX_WAITERS;
 
 			pagefault_disable();
-			curval = futex_cmpxchg_inatomic(uaddr, uval,
-							newval, futex64);
+			curval = futex_atomic_cmpxchg_inatomic(uaddr, uval,
+							       newval);
 			pagefault_enable();
 
 			if (unlikely(curval == -EFAULT))
@@ -1841,7 +1827,8 @@ static int futex_lock_pi(unsigned long _
 	 */
 	if (!ret && q.pi_state->owner != curr)
 		/* mmap_sem is unlocked at return of this function */
-		ret = fixup_pi_state_owner(uaddr, &q, hb, curr, futex64);
+		ret = fixup_pi_state_owner((unsigned long  __user *)uaddr,
+					   &q, hb, curr, 0);
 	else {
 		/*
 		 * Catch the rare case, where the lock was released
@@ -1887,7 +1874,7 @@ static int futex_lock_pi(unsigned long _
 	queue_unlock(&q, hb);
 	up_read(&curr->mm->mmap_sem);
 
-	ret = futex_get_user(&uval, uaddr, futex64);
+	ret = get_user(uval, uaddr);
 	if (!ret && (uval != -EFAULT))
 		goto retry;
 
@@ -1899,17 +1886,17 @@ static int futex_lock_pi(unsigned long _
  * This is the in-kernel slowpath: we look up the PI state (if any),
  * and do the rt-mutex unlock.
  */
-static int futex_unlock_pi(unsigned long __user *uaddr, int futex64)
+static int futex_unlock_pi(u32 __user *uaddr)
 {
 	struct futex_hash_bucket *hb;
 	struct futex_q *this, *next;
-	unsigned long uval;
+	u32 uval;
 	struct plist_head *head;
 	union futex_key key;
 	int ret, attempt = 0;
 
 retry:
-	if (futex_get_user(&uval, uaddr, futex64))
+	if (get_user(uval, uaddr))
 		return -EFAULT;
 	/*
 	 * We release only a lock we actually own:
@@ -1936,7 +1923,7 @@ retry_locked:
 	 */
 	if (!(uval & FUTEX_OWNER_DIED)) {
 		pagefault_disable();
-		uval = futex_cmpxchg_inatomic(uaddr, current->pid, 0, futex64);
+		uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
 		pagefault_enable();
 	}
 
@@ -1958,7 +1945,7 @@ retry_locked:
 	plist_for_each_entry_safe(this, next, head, list) {
 		if (!match_futex (&this->key, &key))
 			continue;
-		ret = wake_futex_pi(uaddr, uval, this, futex64);
+		ret = wake_futex_pi(uaddr, uval, this);
 		/*
 		 * The atomic access to the futex value
 		 * generated a pagefault, so retry the
@@ -1972,7 +1959,7 @@ retry_locked:
 	 * No waiters - kernel unlocks the futex:
 	 */
 	if (!(uval & FUTEX_OWNER_DIED)) {
-		ret = unlock_futex_pi(uaddr, uval, futex64);
+		ret = unlock_futex_pi(uaddr, uval);
 		if (ret == -EFAULT)
 			goto pi_faulted;
 	}
@@ -2002,7 +1989,7 @@ pi_faulted:
 	spin_unlock(&hb->lock);
 	up_read(&current->mm->mmap_sem);
 
-	ret = futex_get_user(&uval, uaddr, futex64);
+	ret = get_user(uval, uaddr);
 	if (!ret && (uval != -EFAULT))
 		goto retry;
 
@@ -2323,92 +2310,70 @@ void exit_robust_list(struct task_struct
 
 long do_futex(unsigned long __user *uaddr, int op, unsigned long val,
 	      ktime_t *timeout, unsigned long __user *uaddr2,
-	      unsigned long val2, unsigned long val3, int fut64)
+	      unsigned long val2, unsigned long val3)
 {
 	int ret;
 
 	switch (op) {
 	case FUTEX_WAIT:
-		ret = futex_wait(uaddr, val, timeout, fut64);
+		ret = futex_wait(uaddr, val, timeout, 0);
 		break;
 	case FUTEX_WAKE:
 		ret = futex_wake(uaddr, val);
 		break;
 	case FUTEX_FD:
-		if (fut64)
-			ret = -ENOSYS;
-		else
-			/* non-zero val means F_SETOWN(getpid())&F_SETSIG(val) */
-			ret = futex_fd((u32 __user *)uaddr, val);
+		/* non-zero val means F_SETOWN(getpid())&F_SETSIG(val) */
+		ret = futex_fd((u32 __user *)uaddr, val);
 		break;
 	case FUTEX_REQUEUE:
-		ret = futex_requeue(uaddr, uaddr2, val, val2, NULL, fut64);
+		ret = futex_requeue(uaddr, uaddr2, val, val2, NULL, 0);
 		break;
 	case FUTEX_CMP_REQUEUE:
-		ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, fut64);
+		ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, 0);
 		break;
 	case FUTEX_WAKE_OP:
-		ret = futex_wake_op(uaddr, uaddr2, val, val2, val3, fut64);
+		ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
 		break;
 	case FUTEX_LOCK_PI:
-		ret = futex_lock_pi(uaddr, val, timeout, 0, fut64);
+		ret = futex_lock_pi((u32 __user *)uaddr, val, timeout, 0);
 		break;
 	case FUTEX_UNLOCK_PI:
-		ret = futex_unlock_pi(uaddr, fut64);
+		ret = futex_unlock_pi((u32 __user *)uaddr);
 		break;
 	case FUTEX_TRYLOCK_PI:
-		ret = futex_lock_pi(uaddr, 0, timeout, 1, fut64);
+		ret = futex_lock_pi((u32 __user *)uaddr, 0, timeout, 1);
 		break;
 	case FUTEX_CMP_REQUEUE_PI:
-		ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3, fut64);
+		ret = futex_requeue_pi((u32 __user *)uaddr,
+				       (u32 __user *)uaddr2,
+				       val, val2, &val3);
+		break;
+#ifdef CONFIG_64BIT
+	case FUTEX_WAIT64:
+		ret = futex_wait(uaddr, val, timeout, 1);
+		break;
+	case FUTEX_CMP_REQUEUE64:
+		ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, 1);
 		break;
+#endif
 	default:
 		ret = -ENOSYS;
 	}
 	return ret;
 }
 
-#ifdef CONFIG_64BIT
-
-asmlinkage long
-sys_futex64(u64 __user *uaddr, int op, u64 val,
-	    struct timespec __user *utime, u64 __user *uaddr2, u64 val3)
-{
-	struct timespec ts;
-	ktime_t t, *tp = NULL;
-	u64 val2 = 0;
-
-	if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
-		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
-			return -EFAULT;
-		if (!timespec_valid(&ts))
-			return -EINVAL;
-
-		t = timespec_to_ktime(ts);
-		if (op == FUTEX_WAIT)
-			t = ktime_add(ktime_get(), t);
-		tp = &t;
-	}
-	/*
-	 * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
-	 */
-	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
-	    || op == FUTEX_CMP_REQUEUE_PI)
-		val2 = (unsigned long) utime;
-
-	return do_futex((unsigned long __user*)uaddr, op, val, tp,
-			(unsigned long __user*)uaddr2, val2, val3, 1);
-}
-
-#endif
-
 asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
 			  struct timespec __user *utime, u32 __user *uaddr2,
 			  u32 val3)
 {
 	struct timespec ts;
 	ktime_t t, *tp = NULL;
-	u32 val2 = 0;
+	unsigned long val2 = 0;
+	int op2 = op;
+
+#ifdef CONFIG_64BIT
+	op &= ~FUTEX_64BIT;
+#endif
 
 	if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
 		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
@@ -2426,10 +2391,10 @@ asmlinkage long sys_futex(u32 __user *ua
 	 */
 	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
 	    || op == FUTEX_CMP_REQUEUE_PI)
-		val2 = (u32) (unsigned long) utime;
+		val2 = (unsigned long) utime;
 
-	return do_futex((unsigned long __user*)uaddr, op, val, tp,
-			(unsigned long __user*)uaddr2, val2, val3, 0);
+	return do_futex((unsigned long __user*)uaddr, op2, val, tp,
+			(unsigned long __user*)uaddr2, val2, val3);
 }
 
 static int futexfs_get_sb(struct file_system_type *fs_type,
Index: linux-2.6.21-rc6-mm2/kernel/futex_compat.c
===================================================================
--- linux-2.6.21-rc6-mm2.orig/kernel/futex_compat.c	2007-04-20 14:01:24.000000000 +0200
+++ linux-2.6.21-rc6-mm2/kernel/futex_compat.c	2007-04-20 13:41:57.000000000 +0200
@@ -161,5 +161,5 @@ asmlinkage long compat_sys_futex(u32 __u
 		val2 = (int) (unsigned long) utime;
 
 	return do_futex((unsigned long __user*)uaddr, op, val, tp,
-			(unsigned long __user*)uaddr2, val2, val3, 0);
+			(unsigned long __user*)uaddr2, val2, val3);
 }
Index: linux-2.6.21-rc6-mm2/kernel/sys_ni.c
===================================================================
--- linux-2.6.21-rc6-mm2.orig/kernel/sys_ni.c	2007-04-20 14:01:24.000000000 +0200
+++ linux-2.6.21-rc6-mm2/kernel/sys_ni.c	2007-04-20 13:41:54.000000000 +0200
@@ -41,7 +41,6 @@ cond_syscall(sys_sendmsg);
 cond_syscall(sys_recvmsg);
 cond_syscall(sys_socketcall);
 cond_syscall(sys_futex);
-cond_syscall(sys_futex64);
 cond_syscall(compat_sys_futex);
 cond_syscall(sys_set_robust_list);
 cond_syscall(compat_sys_set_robust_list);

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -mm] 64bit-futex - provide new commands instead of new syscall
  2007-04-23 14:35     ` [PATCH -mm] 64bit-futex - provide new commands instead of new syscall Pierre Peiffer
@ 2007-04-23 15:30       ` Ulrich Drepper
  2007-04-24  8:07         ` [PATCH -mm take2] " Pierre Peiffer
  0 siblings, 1 reply; 12+ messages in thread
From: Ulrich Drepper @ 2007-04-23 15:30 UTC (permalink / raw)
  To: Pierre Peiffer
  Cc: Jakub Jelinek, akpm, mingo, drepper, linux-kernel, jean-pierre.dion

On 4/23/07, Pierre Peiffer <pierre.peiffer@bull.net> wrote:
> Following this mail sent few weeks ago, here is a patch which should meet your
> requirements. [...]

It looks mostly good.  I wouldn't use the high bit to differentiate
the 64-bit operations, though.  Since we do not allow to apply it to
all operations the only effect will be that the compiler has a harder
time generating the code for the switch statement.  If you use
continuous values a simple jump table can be used and no conditionals.
 Smaller and faster.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH -mm take2] 64bit-futex - provide new commands instead of new syscall
  2007-04-23 15:30       ` Ulrich Drepper
@ 2007-04-24  8:07         ` Pierre Peiffer
  2007-04-24 13:25           ` Ulrich Drepper
  0 siblings, 1 reply; 12+ messages in thread
From: Pierre Peiffer @ 2007-04-24  8:07 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: Jakub Jelinek, akpm, mingo, drepper, linux-kernel, jean-pierre.dion

[-- Attachment #1: Type: text/plain, Size: 505 bytes --]

Ulrich Drepper a écrit :
> 
> It looks mostly good.  I wouldn't use the high bit to differentiate
> the 64-bit operations, though.  Since we do not allow to apply it to
> all operations the only effect will be that the compiler has a harder
> time generating the code for the switch statement.  If you use
> continuous values a simple jump table can be used and no conditionals.
> Smaller and faster.
> 

Something like that may be...

Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>


-- 
Pierre

[-- Attachment #2: futex-64bits-command.patch --]
[-- Type: text/x-patch, Size: 22911 bytes --]

---
 include/asm-ia64/futex.h    |    8 -
 include/asm-powerpc/futex.h |    6 -
 include/asm-s390/futex.h    |    8 -
 include/asm-sparc64/futex.h |    8 -
 include/asm-um/futex.h      |    9 -
 include/asm-x86_64/futex.h  |   86 ------------------
 include/asm-x86_64/unistd.h |    2 
 include/linux/futex.h       |    6 +
 include/linux/syscalls.h    |    3 
 kernel/futex.c              |  203 ++++++++++++++++++--------------------------
 kernel/futex_compat.c       |    2 
 kernel/sys_ni.c             |    1 
 12 files changed, 95 insertions(+), 247 deletions(-)

Index: b/include/asm-ia64/futex.h
===================================================================
--- a/include/asm-ia64/futex.h
+++ b/include/asm-ia64/futex.h
@@ -124,13 +124,7 @@ futex_atomic_cmpxchg_inatomic(int __user
 static inline u64
 futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
 {
-	return 0;
-}
-
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
-	return 0;
+	return -ENOSYS;
 }
 
 #endif /* _ASM_FUTEX_H */
Index: b/include/asm-powerpc/futex.h
===================================================================
--- a/include/asm-powerpc/futex.h
+++ b/include/asm-powerpc/futex.h
@@ -119,11 +119,5 @@ futex_atomic_cmpxchg_inatomic64(u64 __us
 	return 0;
 }
 
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
-	return 0;
-}
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_FUTEX_H */
Index: b/include/asm-s390/futex.h
===================================================================
--- a/include/asm-s390/futex.h
+++ b/include/asm-s390/futex.h
@@ -51,13 +51,7 @@ static inline int futex_atomic_cmpxchg_i
 static inline u64
 futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
 {
-	return 0;
-}
-
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
-	return 0;
+	return -ENOSYS;
 }
 
 #endif /* __KERNEL__ */
Index: b/include/asm-sparc64/futex.h
===================================================================
--- a/include/asm-sparc64/futex.h
+++ b/include/asm-sparc64/futex.h
@@ -108,13 +108,7 @@ futex_atomic_cmpxchg_inatomic(int __user
 static inline u64
 futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
 {
-	return 0;
-}
-
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
-	return 0;
+	return -ENOSYS;
 }
 
 #endif /* !(_SPARC64_FUTEX_H) */
Index: b/include/asm-um/futex.h
===================================================================
--- a/include/asm-um/futex.h
+++ b/include/asm-um/futex.h
@@ -6,14 +6,7 @@
 static inline u64
 futex_atomic_cmpxchg_inatomic64(u64 __user *uaddr, u64 oldval, u64 newval)
 {
-	return 0;
+	return -ENOSYS;
 }
 
-static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
-	return 0;
-}
-
-
 #endif
Index: b/include/asm-x86_64/futex.h
===================================================================
--- a/include/asm-x86_64/futex.h
+++ b/include/asm-x86_64/futex.h
@@ -41,38 +41,6 @@
 	  "=&r" (tem)						\
 	: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
 
-#define __futex_atomic_op1_64(insn, ret, oldval, uaddr, oparg) \
-  __asm__ __volatile (						\
-"1:	" insn "\n"						\
-"2:	.section .fixup,\"ax\"\n\
-3:	movq	%3, %1\n\
-	jmp	2b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.quad	1b,3b\n\
-	.previous"						\
-	: "=r" (oldval), "=r" (ret), "=m" (*uaddr)		\
-	: "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0))
-
-#define __futex_atomic_op2_64(insn, ret, oldval, uaddr, oparg) \
-  __asm__ __volatile (						\
-"1:	movq	%2, %0\n\
-	movq	%0, %3\n"					\
-	insn "\n"						\
-"2:	" LOCK_PREFIX "cmpxchgq %3, %2\n\
-	jnz	1b\n\
-3:	.section .fixup,\"ax\"\n\
-4:	movq	%5, %1\n\
-	jmp	3b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.quad	1b,4b,2b,4b\n\
-	.previous"						\
-	: "=&a" (oldval), "=&r" (ret), "=m" (*uaddr),		\
-	  "=&r" (tem)						\
-	: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
 
 static inline int
 futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
@@ -128,60 +96,6 @@ futex_atomic_op_inuser (int encoded_op, 
 }
 
 static inline int
-futex_atomic_op_inuser64 (int encoded_op, u64 __user *uaddr)
-{
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	u64 oparg = (encoded_op << 8) >> 20;
-	u64 cmparg = (encoded_op << 20) >> 20;
-	u64 oldval = 0, ret, tem;
-
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u64)))
-		return -EFAULT;
-
-	inc_preempt_count();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-		__futex_atomic_op1_64("xchgq %0, %2", ret, oldval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ADD:
-		__futex_atomic_op1_64(LOCK_PREFIX "xaddq %0, %2", ret, oldval,
-				   uaddr, oparg);
-		break;
-	case FUTEX_OP_OR:
-		__futex_atomic_op2_64("orq %4, %3", ret, oldval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ANDN:
-		__futex_atomic_op2_64("andq %4, %3", ret, oldval, uaddr, ~oparg);
-		break;
-	case FUTEX_OP_XOR:
-		__futex_atomic_op2_64("xorq %4, %3", ret, oldval, uaddr, oparg);
-		break;
-	default:
-		ret = -ENOSYS;
-	}
-
-	dec_preempt_count();
-
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
-	return ret;
-}
-
-static inline int
 futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
Index: b/include/asm-x86_64/unistd.h
===================================================================
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -619,8 +619,6 @@ __SYSCALL(__NR_sync_file_range, sys_sync
 __SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_move_pages		279
 __SYSCALL(__NR_move_pages, sys_move_pages)
-#define __NR_futex64		280
-__SYSCALL(__NR_futex64, sys_futex64)
 #define __NR_signalfd		281
 __SYSCALL(__NR_signalfd, sys_signalfd)
 #define __NR_timerfd		282
Index: b/include/linux/futex.h
===================================================================
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -18,6 +18,10 @@ union ktime;
 #define FUTEX_UNLOCK_PI		7
 #define FUTEX_TRYLOCK_PI	8
 #define FUTEX_CMP_REQUEUE_PI	9
+#ifdef CONFIG_64BIT
+#define FUTEX_WAIT64		10
+#define FUTEX_CMP_REQUEUE64	11
+#endif
 
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
@@ -104,7 +108,7 @@ struct robust_list_head {
 #ifdef __KERNEL__
 long do_futex(unsigned long __user *uaddr, int op, unsigned long val,
 	      union ktime *timeout, unsigned long __user *uaddr2,
-	      unsigned long val2, unsigned long val3, int futex64);
+	      unsigned long val2, unsigned long val3);
 
 extern int
 handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
Index: b/include/linux/syscalls.h
===================================================================
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -178,9 +178,6 @@ asmlinkage long sys_set_tid_address(int 
 asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
 			struct timespec __user *utime, u32 __user *uaddr2,
 			u32 val3);
-asmlinkage long sys_futex64(u64 __user *uaddr, int op, u64 val,
-			struct timespec __user *utime, u64 __user *uaddr2,
-			u64 val3);
 
 asmlinkage long sys_init_module(void __user *umod, unsigned long len,
 				const char __user *uargs);
Index: b/kernel/futex.c
===================================================================
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -62,20 +62,6 @@
 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 
 #ifdef CONFIG_64BIT
-static inline unsigned long
-futex_cmpxchg_inatomic(unsigned long __user *uaddr, unsigned long oldval,
-		       unsigned long newval, int futex64)
-{
-	if (futex64)
-		return futex_atomic_cmpxchg_inatomic64((u64 __user *)uaddr,
-						       oldval, newval);
-	else {
-		u32 ov = oldval, nv = newval;
-		return futex_atomic_cmpxchg_inatomic((int __user *)uaddr, ov,
-						     nv);
-	}
-}
-
 static inline int
 futex_get_user(unsigned long *val, unsigned long __user *uaddr, int futex64)
 {
@@ -92,11 +78,7 @@ futex_get_user(unsigned long *val, unsig
 }
 
 #else
-#define futex_cmpxchg_inatomic(uaddr, oldval, newval, futex64)	\
-	futex_atomic_cmpxchg_inatomic((u32*)uaddr, oldval, newval)
-
 #define futex_get_user(val, uaddr, futex64) get_user(*val, uaddr)
-
 #endif
 
 /*
@@ -606,12 +588,12 @@ static void wake_futex(struct futex_q *q
 	q->lock_ptr = NULL;
 }
 
-static int wake_futex_pi(unsigned long __user *uaddr, unsigned long uval,
-			 struct futex_q *this, int futex64)
+static int wake_futex_pi(u32 __user *uaddr, unsigned long uval,
+			 struct futex_q *this)
 {
 	struct task_struct *new_owner;
 	struct futex_pi_state *pi_state = this->pi_state;
-	unsigned long curval, newval;
+	u32 curval, newval;
 
 	if (!pi_state)
 		return -EINVAL;
@@ -639,7 +621,7 @@ static int wake_futex_pi(unsigned long _
 		newval |= (uval & FUTEX_WAITER_REQUEUED);
 
 		pagefault_disable();
-		curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
+		curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
 		pagefault_enable();
 		if (curval == -EFAULT)
 			return -EFAULT;
@@ -664,17 +646,16 @@ static int wake_futex_pi(unsigned long _
 	return 0;
 }
 
-static int unlock_futex_pi(unsigned long __user *uaddr, unsigned long uval,
-			   int futex64)
+static int unlock_futex_pi(u32 __user *uaddr, unsigned long uval)
 {
-	unsigned long oldval;
+	u32 oldval;
 
 	/*
 	 * There is no waiter, so we unlock the futex. The owner died
 	 * bit has not to be preserved here. We are the owner:
 	 */
 	pagefault_disable();
-	oldval = futex_cmpxchg_inatomic(uaddr, uval, 0, futex64);
+	oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
 	pagefault_enable();
 
 	if (oldval == -EFAULT)
@@ -748,19 +729,20 @@ out:
  * or create a new one without owner.
  */
 static inline int
-lookup_pi_state_for_requeue(unsigned long __user *uaddr,
+lookup_pi_state_for_requeue(u32 __user *uaddr,
 			    struct futex_hash_bucket *hb,
 			    union futex_key *key,
-			    struct futex_pi_state **pi_state, int futex64)
+			    struct futex_pi_state **pi_state)
 {
-	unsigned long curval, uval, newval;
+	u32 curval, uval, newval;
 
 retry:
 	/*
 	 * We can't handle a fault cleanly because we can't
 	 * release the locks here. Simply return the fault.
 	 */
-	if (get_futex_value_locked(&curval, uaddr, futex64))
+	if (get_futex_value_locked((unsigned long *)&curval,
+				   (unsigned long __user *)uaddr, 0))
 		return -EFAULT;
 
 	/* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
@@ -774,7 +756,7 @@ retry:
 		newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
 
 		pagefault_disable();
-		curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
+		curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
 		pagefault_enable();
 
 		if (unlikely(curval == -EFAULT))
@@ -806,8 +788,8 @@ retry:
  * one physical page to another physical page (PI-futex uaddr2)
  */
 static int
-futex_requeue_pi(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
-		 int nr_wake, int nr_requeue, unsigned long *cmpval, int futex64)
+futex_requeue_pi(u32 __user *uaddr1, u32 __user *uaddr2,
+		 int nr_wake, int nr_requeue, unsigned long *cmpval)
 {
 	union futex_key key1, key2;
 	struct futex_hash_bucket *hb1, *hb2;
@@ -840,9 +822,10 @@ retry:
 	double_lock_hb(hb1, hb2);
 
 	if (likely(cmpval != NULL)) {
-		unsigned long curval;
+		u32 curval;
 
-		ret = get_futex_value_locked(&curval, uaddr1, futex64);
+		ret = get_futex_value_locked((unsigned long*)&curval,
+					     (unsigned long __user *)uaddr1, 0);
 
 		if (unlikely(ret)) {
 			spin_unlock(&hb1->lock);
@@ -855,7 +838,7 @@ retry:
 			 */
 			up_read(&current->mm->mmap_sem);
 
-			ret = futex_get_user(&curval, uaddr1, futex64);
+			ret = get_user(curval, uaddr1);
 
 			if (!ret)
 				goto retry;
@@ -882,8 +865,7 @@ retry:
 				int s;
 				/* do this only the first time we requeue someone */
 				s = lookup_pi_state_for_requeue(uaddr2, hb2,
-								&key2, &pi_state2,
-								futex64);
+								&key2, &pi_state2);
 				if (s) {
 					ret = s;
 					goto out_unlock;
@@ -998,7 +980,7 @@ out:
  */
 static int
 futex_wake_op(unsigned long __user *uaddr1, unsigned long __user *uaddr2,
-	      int nr_wake, int nr_wake2, int op, int futex64)
+	      int nr_wake, int nr_wake2, int op)
 {
 	union futex_key key1, key2;
 	struct futex_hash_bucket *hb1, *hb2;
@@ -1022,16 +1004,10 @@ retryfull:
 retry:
 	double_lock_hb(hb1, hb2);
 
-#ifdef CONFIG_64BIT
-	if (futex64)
-		op_ret = futex_atomic_op_inuser64(op, (u64 __user *)uaddr2);
-	else
-		op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
-#else
 	op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
-#endif
+
 	if (unlikely(op_ret < 0)) {
-		unsigned long dummy;
+		u32 dummy;
 
 		spin_unlock(&hb1->lock);
 		if (hb1 != hb2)
@@ -1073,7 +1049,7 @@ retry:
 		 */
 		up_read(&current->mm->mmap_sem);
 
-		ret = futex_get_user(&dummy, uaddr2, futex64);
+		ret = get_user(dummy, uaddr2);
 		if (ret)
 			return ret;
 
@@ -1379,8 +1355,18 @@ static int fixup_pi_state_owner(unsigned
 	while (!ret) {
 		newval = (uval & FUTEX_OWNER_DIED) | newtid;
 		newval |= (uval & FUTEX_WAITER_REQUEUED);
-		curval = futex_cmpxchg_inatomic(uaddr,uval,
-						newval, futex64);
+#ifdef CONFIG_64BIT
+		if (futex64)
+			curval = futex_atomic_cmpxchg_inatomic64(
+				                       (u64 __user *)uaddr,
+						       uval, newval);
+		else
+#endif
+			curval = futex_atomic_cmpxchg_inatomic(
+				                       (u32 __user *)uaddr,
+						       (u32)uval,
+						       (u32)newval);
+
 		if (curval == -EFAULT)
  			ret = -EFAULT;
 		if (curval == uval)
@@ -1673,13 +1659,13 @@ static void set_pi_futex_owner(struct fu
  * if there are waiters then it will block, it does PI, etc. (Due to
  * races the kernel might see a 0 value of the futex too.)
  */
-static int futex_lock_pi(unsigned long __user *uaddr, int detect, ktime_t *time,
-			 int trylock, int futex64)
+static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
+			 int trylock)
 {
 	struct hrtimer_sleeper timeout, *to = NULL;
 	struct task_struct *curr = current;
 	struct futex_hash_bucket *hb;
-	unsigned long uval, newval, curval;
+	u32 uval, newval, curval;
 	struct futex_q q;
 	int ret, lock_held, attempt = 0;
 
@@ -1714,7 +1700,7 @@ static int futex_lock_pi(unsigned long _
 	newval = current->pid;
 
 	pagefault_disable();
-	curval = futex_cmpxchg_inatomic(uaddr, 0, newval, futex64);
+	curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
 	pagefault_enable();
 
 	if (unlikely(curval == -EFAULT))
@@ -1759,7 +1745,7 @@ static int futex_lock_pi(unsigned long _
 		newval = curval | FUTEX_WAITERS;
 
 	pagefault_disable();
-	curval = futex_cmpxchg_inatomic(uaddr, uval, newval, futex64);
+	curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
 	pagefault_enable();
 
 	if (unlikely(curval == -EFAULT))
@@ -1796,8 +1782,8 @@ static int futex_lock_pi(unsigned long _
 				FUTEX_OWNER_DIED | FUTEX_WAITERS;
 
 			pagefault_disable();
-			curval = futex_cmpxchg_inatomic(uaddr, uval,
-							newval, futex64);
+			curval = futex_atomic_cmpxchg_inatomic(uaddr, uval,
+							       newval);
 			pagefault_enable();
 
 			if (unlikely(curval == -EFAULT))
@@ -1841,7 +1827,8 @@ static int futex_lock_pi(unsigned long _
 	 */
 	if (!ret && q.pi_state->owner != curr)
 		/* mmap_sem is unlocked at return of this function */
-		ret = fixup_pi_state_owner(uaddr, &q, hb, curr, futex64);
+		ret = fixup_pi_state_owner((unsigned long  __user *)uaddr,
+					   &q, hb, curr, 0);
 	else {
 		/*
 		 * Catch the rare case, where the lock was released
@@ -1887,7 +1874,7 @@ static int futex_lock_pi(unsigned long _
 	queue_unlock(&q, hb);
 	up_read(&curr->mm->mmap_sem);
 
-	ret = futex_get_user(&uval, uaddr, futex64);
+	ret = get_user(uval, uaddr);
 	if (!ret && (uval != -EFAULT))
 		goto retry;
 
@@ -1899,17 +1886,17 @@ static int futex_lock_pi(unsigned long _
  * This is the in-kernel slowpath: we look up the PI state (if any),
  * and do the rt-mutex unlock.
  */
-static int futex_unlock_pi(unsigned long __user *uaddr, int futex64)
+static int futex_unlock_pi(u32 __user *uaddr)
 {
 	struct futex_hash_bucket *hb;
 	struct futex_q *this, *next;
-	unsigned long uval;
+	u32 uval;
 	struct plist_head *head;
 	union futex_key key;
 	int ret, attempt = 0;
 
 retry:
-	if (futex_get_user(&uval, uaddr, futex64))
+	if (get_user(uval, uaddr))
 		return -EFAULT;
 	/*
 	 * We release only a lock we actually own:
@@ -1936,7 +1923,7 @@ retry_locked:
 	 */
 	if (!(uval & FUTEX_OWNER_DIED)) {
 		pagefault_disable();
-		uval = futex_cmpxchg_inatomic(uaddr, current->pid, 0, futex64);
+		uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
 		pagefault_enable();
 	}
 
@@ -1958,7 +1945,7 @@ retry_locked:
 	plist_for_each_entry_safe(this, next, head, list) {
 		if (!match_futex (&this->key, &key))
 			continue;
-		ret = wake_futex_pi(uaddr, uval, this, futex64);
+		ret = wake_futex_pi(uaddr, uval, this);
 		/*
 		 * The atomic access to the futex value
 		 * generated a pagefault, so retry the
@@ -1972,7 +1959,7 @@ retry_locked:
 	 * No waiters - kernel unlocks the futex:
 	 */
 	if (!(uval & FUTEX_OWNER_DIED)) {
-		ret = unlock_futex_pi(uaddr, uval, futex64);
+		ret = unlock_futex_pi(uaddr, uval);
 		if (ret == -EFAULT)
 			goto pi_faulted;
 	}
@@ -2002,7 +1989,7 @@ pi_faulted:
 	spin_unlock(&hb->lock);
 	up_read(&current->mm->mmap_sem);
 
-	ret = futex_get_user(&uval, uaddr, futex64);
+	ret = get_user(uval, uaddr);
 	if (!ret && (uval != -EFAULT))
 		goto retry;
 
@@ -2323,94 +2310,71 @@ void exit_robust_list(struct task_struct
 
 long do_futex(unsigned long __user *uaddr, int op, unsigned long val,
 	      ktime_t *timeout, unsigned long __user *uaddr2,
-	      unsigned long val2, unsigned long val3, int fut64)
+	      unsigned long val2, unsigned long val3)
 {
 	int ret;
 
 	switch (op) {
 	case FUTEX_WAIT:
-		ret = futex_wait(uaddr, val, timeout, fut64);
+		ret = futex_wait(uaddr, val, timeout, 0);
 		break;
 	case FUTEX_WAKE:
 		ret = futex_wake(uaddr, val);
 		break;
 	case FUTEX_FD:
-		if (fut64)
-			ret = -ENOSYS;
-		else
-			/* non-zero val means F_SETOWN(getpid())&F_SETSIG(val) */
-			ret = futex_fd((u32 __user *)uaddr, val);
+		/* non-zero val means F_SETOWN(getpid())&F_SETSIG(val) */
+		ret = futex_fd((u32 __user *)uaddr, val);
 		break;
 	case FUTEX_REQUEUE:
-		ret = futex_requeue(uaddr, uaddr2, val, val2, NULL, fut64);
+		ret = futex_requeue(uaddr, uaddr2, val, val2, NULL, 0);
 		break;
 	case FUTEX_CMP_REQUEUE:
-		ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, fut64);
+		ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, 0);
 		break;
 	case FUTEX_WAKE_OP:
-		ret = futex_wake_op(uaddr, uaddr2, val, val2, val3, fut64);
+		ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
 		break;
 	case FUTEX_LOCK_PI:
-		ret = futex_lock_pi(uaddr, val, timeout, 0, fut64);
+		ret = futex_lock_pi((u32 __user *)uaddr, val, timeout, 0);
 		break;
 	case FUTEX_UNLOCK_PI:
-		ret = futex_unlock_pi(uaddr, fut64);
+		ret = futex_unlock_pi((u32 __user *)uaddr);
 		break;
 	case FUTEX_TRYLOCK_PI:
-		ret = futex_lock_pi(uaddr, 0, timeout, 1, fut64);
+		ret = futex_lock_pi((u32 __user *)uaddr, 0, timeout, 1);
 		break;
 	case FUTEX_CMP_REQUEUE_PI:
-		ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3, fut64);
+		ret = futex_requeue_pi((u32 __user *)uaddr,
+				       (u32 __user *)uaddr2,
+				       val, val2, &val3);
+		break;
+#ifdef CONFIG_64BIT
+	case FUTEX_WAIT64:
+		ret = futex_wait(uaddr, val, timeout, 1);
+		break;
+	case FUTEX_CMP_REQUEUE64:
+		ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, 1);
 		break;
+#endif
 	default:
 		ret = -ENOSYS;
 	}
 	return ret;
 }
 
-#ifdef CONFIG_64BIT
-
-asmlinkage long
-sys_futex64(u64 __user *uaddr, int op, u64 val,
-	    struct timespec __user *utime, u64 __user *uaddr2, u64 val3)
-{
-	struct timespec ts;
-	ktime_t t, *tp = NULL;
-	u64 val2 = 0;
-
-	if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
-		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
-			return -EFAULT;
-		if (!timespec_valid(&ts))
-			return -EINVAL;
-
-		t = timespec_to_ktime(ts);
-		if (op == FUTEX_WAIT)
-			t = ktime_add(ktime_get(), t);
-		tp = &t;
-	}
-	/*
-	 * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
-	 */
-	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
-	    || op == FUTEX_CMP_REQUEUE_PI)
-		val2 = (unsigned long) utime;
-
-	return do_futex((unsigned long __user*)uaddr, op, val, tp,
-			(unsigned long __user*)uaddr2, val2, val3, 1);
-}
-
-#endif
-
 asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
 			  struct timespec __user *utime, u32 __user *uaddr2,
 			  u32 val3)
 {
 	struct timespec ts;
 	ktime_t t, *tp = NULL;
-	u32 val2 = 0;
+	unsigned long val2 = 0;
 
-	if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
+	if (utime && (op == FUTEX_WAIT
+#ifdef CONFIG_64BIT
+		      || op == FUTEX_WAIT64
+#endif
+		      || op == FUTEX_LOCK_PI)) {
 		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
 			return -EFAULT;
 		if (!timespec_valid(&ts))
@@ -2425,11 +2389,14 @@ asmlinkage long sys_futex(u32 __user *ua
 	 * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
 	 */
 	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
+#ifdef CONFIG_64BIT
+	    || op == FUTEX_CMP_REQUEUE64
+#endif
 	    || op == FUTEX_CMP_REQUEUE_PI)
-		val2 = (u32) (unsigned long) utime;
+		val2 = (unsigned long) utime;
 
 	return do_futex((unsigned long __user*)uaddr, op, val, tp,
-			(unsigned long __user*)uaddr2, val2, val3, 0);
+			(unsigned long __user*)uaddr2, val2, val3);
 }
 
 static int futexfs_get_sb(struct file_system_type *fs_type,
Index: b/kernel/futex_compat.c
===================================================================
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -161,5 +161,5 @@ asmlinkage long compat_sys_futex(u32 __u
 		val2 = (int) (unsigned long) utime;
 
 	return do_futex((unsigned long __user*)uaddr, op, val, tp,
-			(unsigned long __user*)uaddr2, val2, val3, 0);
+			(unsigned long __user*)uaddr2, val2, val3);
 }
Index: b/kernel/sys_ni.c
===================================================================
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -41,7 +41,6 @@ cond_syscall(sys_sendmsg);
 cond_syscall(sys_recvmsg);
 cond_syscall(sys_socketcall);
 cond_syscall(sys_futex);
-cond_syscall(sys_futex64);
 cond_syscall(compat_sys_futex);
 cond_syscall(sys_set_robust_list);
 cond_syscall(compat_sys_set_robust_list);

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -mm take2] 64bit-futex - provide new commands instead of new syscall
  2007-04-24  8:07         ` [PATCH -mm take2] " Pierre Peiffer
@ 2007-04-24 13:25           ` Ulrich Drepper
  0 siblings, 0 replies; 12+ messages in thread
From: Ulrich Drepper @ 2007-04-24 13:25 UTC (permalink / raw)
  To: Pierre Peiffer
  Cc: Jakub Jelinek, akpm, mingo, drepper, linux-kernel, jean-pierre.dion

On 4/24/07, Pierre Peiffer <pierre.peiffer@bull.net> wrote:
> Something like that may be...

Yep, looks goot to me.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2007-04-24 13:25 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-03-21  9:54 [PATCH 2.6.21-rc4-mm1 0/4] Futexes functionalities and improvements Pierre.Peiffer
2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 1/4] futex priority based wakeup Pierre.Peiffer
2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 2/4] Make futex_wait() use an hrtimer for timeout Pierre.Peiffer
2007-03-26  9:57   ` Andrew Morton
2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 3/4] futex_requeue_pi optimization Pierre.Peiffer
2007-03-21  9:54 ` [PATCH 2.6.21-rc4-mm1 4/4] sys_futex64 : allows 64bit futexes Pierre.Peiffer
2007-03-26 11:20   ` Andrew Morton
2007-03-27 11:07   ` Jakub Jelinek
2007-04-23 14:35     ` [PATCH -mm] 64bit-futex - provide new commands instead of new syscall Pierre Peiffer
2007-04-23 15:30       ` Ulrich Drepper
2007-04-24  8:07         ` [PATCH -mm take2] " Pierre Peiffer
2007-04-24 13:25           ` Ulrich Drepper

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).