LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Pierre Peiffer <pierre.peiffer@bull.net>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>, Ulrich Drepper <drepper@redhat.com>,
	Jakub Jelinek <jakub@redhat.com>,
	Jean-Pierre Dion <jean-pierre.dion@bull.net>
Subject: [PATCH 2.6.20-rc5 2/4] Make futex_wait() use an hrtimer for timeout
Date: Wed, 17 Jan 2007 10:01:04 +0100	[thread overview]
Message-ID: <45ADE5D0.5080509@bull.net> (raw)
In-Reply-To: <45ADDF60.5080704@bull.net>

   Hi,

   This patch modifies futex_wait() to use an hrtimer + schedule() in place of
schedule_timeout() in an RT kernel.

   More details in the patch header.


--------------------------------------------------------------------------------

   This patch modifies futex_wait() to use an hrtimer + schedule() in place of
schedule_timeout().

   schedule_timeout() is tick based, therefore the timeout granularity is
the tick (1 ms, 4 ms or 10 ms depending on HZ). By using a high resolution
timer for timeout wakeup, we can attain a much finer timeout granularity
(in the microsecond range). This parallels what is already done for
futex_lock_pi().

   The timeout passed to the syscall is no longer converted to jiffies
and is therefore passed to do_futex() and futex_wait() as a timespec
therefore keeping nanosecond resolution.

   Also this removes the need to pass the nanoseconds timeout part to
futex_lock_pi() in val2.

   In futex_wait(), if the timeout is zero then a regular schedule() is
performed. Otherwise, an hrtimer is fired before schedule() is called.

---
  include/linux/futex.h |    2 -
  kernel/futex.c        |   58 ++++++++++++++++++++++++++++++++------------------
  kernel/futex_compat.c |   11 +--------
  3 files changed, 41 insertions(+), 30 deletions(-)

---

Signed-off-by: Sébastien Dugué <sebastien.dugue@bull.net>
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>

---
Index: linux-2.6/kernel/futex.c
===================================================================
--- linux-2.6.orig/kernel/futex.c	2007-01-17 09:44:25.000000000 +0100
+++ linux-2.6/kernel/futex.c	2007-01-17 09:44:42.000000000 +0100
@@ -1020,7 +1020,7 @@ static void unqueue_me_pi(struct futex_q
  	drop_key_refs(&q->key);
  }

-static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
+static int futex_wait(u32 __user *uaddr, u32 val, struct timespec *time)
  {
  	struct task_struct *curr = current;
  	DECLARE_WAITQUEUE(wait, curr);
@@ -1028,6 +1028,8 @@ static int futex_wait(u32 __user *uaddr,
  	struct futex_q q;
  	u32 uval;
  	int ret;
+	struct hrtimer_sleeper t;
+	int rem = 0;

  	q.pi_state = NULL;
   retry:
@@ -1105,8 +1107,31 @@ static int futex_wait(u32 __user *uaddr,
  	 * !plist_node_empty() is safe here without any lock.
  	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
  	 */
-	if (likely(!plist_node_empty(&q.list)))
-		time = schedule_timeout(time);
+	if (likely(!plist_node_empty(&q.list))) {
+		if (time->tv_sec == 0 && time->tv_nsec == 0)
+			schedule();
+		else {
+			hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_REL);
+			hrtimer_init_sleeper(&t, current);
+			t.timer.expires = timespec_to_ktime(*time);
+
+			hrtimer_start(&t.timer, t.timer.expires, HRTIMER_REL);
+
+			/*
+			 * the timer could have already expired, in which
+			 * case current would be flagged for rescheduling.
+			 * Don't bother calling schedule.
+			 */
+			if (likely(t.task))
+				schedule();
+
+			hrtimer_cancel(&t.timer);
+
+			/* Flag if a timeout occured */
+			rem = (t.task == NULL);
+		}
+	}
+
  	__set_current_state(TASK_RUNNING);

  	/*
@@ -1117,7 +1142,7 @@ static int futex_wait(u32 __user *uaddr,
  	/* If we were woken (and unqueued), we succeeded, whatever. */
  	if (!unqueue_me(&q))
  		return 0;
-	if (time == 0)
+	if (rem)
  		return -ETIMEDOUT;
  	/*
  	 * We expect signal_pending(current), but another thread may
@@ -1139,8 +1164,8 @@ static int futex_wait(u32 __user *uaddr,
   * if there are waiters then it will block, it does PI, etc. (Due to
   * races the kernel might see a 0 value of the futex too.)
   */
-static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
-			 long nsec, int trylock)
+static int futex_lock_pi(u32 __user *uaddr, int detect, struct timespec *time,
+			 int trylock)
  {
  	struct hrtimer_sleeper timeout, *to = NULL;
  	struct task_struct *curr = current;
@@ -1152,11 +1177,11 @@ static int futex_lock_pi(u32 __user *uad
  	if (refill_pi_state_cache())
  		return -ENOMEM;

-	if (sec != MAX_SCHEDULE_TIMEOUT) {
+	if (time->tv_sec || time->tv_nsec) {
  		to = &timeout;
  		hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
  		hrtimer_init_sleeper(to, current);
-		to->timer.expires = ktime_set(sec, nsec);
+		to->timer.expires = timespec_to_ktime(*time);
  	}

  	q.pi_state = NULL;
@@ -1792,7 +1817,7 @@ void exit_robust_list(struct task_struct
  	}
  }

-long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
+long do_futex(u32 __user *uaddr, int op, u32 val, struct timespec *timeout,
  		u32 __user *uaddr2, u32 val2, u32 val3)
  {
  	int ret;
@@ -1818,13 +1843,13 @@ long do_futex(u32 __user *uaddr, int op,
  		ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
  		break;
  	case FUTEX_LOCK_PI:
-		ret = futex_lock_pi(uaddr, val, timeout, val2, 0);
+		ret = futex_lock_pi(uaddr, val, timeout, 0);
  		break;
  	case FUTEX_UNLOCK_PI:
  		ret = futex_unlock_pi(uaddr);
  		break;
  	case FUTEX_TRYLOCK_PI:
-		ret = futex_lock_pi(uaddr, 0, timeout, val2, 1);
+		ret = futex_lock_pi(uaddr, 0, timeout, 1);
  		break;
  	default:
  		ret = -ENOSYS;
@@ -1837,8 +1862,7 @@ asmlinkage long sys_futex(u32 __user *ua
  			  struct timespec __user *utime, u32 __user *uaddr2,
  			  u32 val3)
  {
-	struct timespec t;
-	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+	struct timespec t = {.tv_sec = 0, .tv_nsec = 0};
  	u32 val2 = 0;

  	if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
@@ -1846,12 +1870,6 @@ asmlinkage long sys_futex(u32 __user *ua
  			return -EFAULT;
  		if (!timespec_valid(&t))
  			return -EINVAL;
-		if (op == FUTEX_WAIT)
-			timeout = timespec_to_jiffies(&t) + 1;
-		else {
-			timeout = t.tv_sec;
-			val2 = t.tv_nsec;
-		}
  	}
  	/*
  	 * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
@@ -1859,7 +1877,7 @@ asmlinkage long sys_futex(u32 __user *ua
  	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
  		val2 = (u32) (unsigned long) utime;

-	return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
+	return do_futex(uaddr, op, val, &t, uaddr2, val2, val3);
  }

  static int futexfs_get_sb(struct file_system_type *fs_type,
Index: linux-2.6/kernel/futex_compat.c
===================================================================
--- linux-2.6.orig/kernel/futex_compat.c	2007-01-17 09:39:57.000000000 +0100
+++ linux-2.6/kernel/futex_compat.c	2007-01-17 09:44:42.000000000 +0100
@@ -141,8 +141,7 @@ asmlinkage long compat_sys_futex(u32 __u
  		struct compat_timespec __user *utime, u32 __user *uaddr2,
  		u32 val3)
  {
-	struct timespec t;
-	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+	struct timespec t = {.tv_sec = 0, .tv_nsec = 0};
  	int val2 = 0;

  	if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
@@ -150,15 +149,9 @@ asmlinkage long compat_sys_futex(u32 __u
  			return -EFAULT;
  		if (!timespec_valid(&t))
  			return -EINVAL;
-		if (op == FUTEX_WAIT)
-			timeout = timespec_to_jiffies(&t) + 1;
-		else {
-			timeout = t.tv_sec;
-			val2 = t.tv_nsec;
-		}
  	}
  	if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
  		val2 = (int) (unsigned long) utime;

-	return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
+	return do_futex(uaddr, op, val, &t, uaddr2, val2, val3);
  }
Index: linux-2.6/include/linux/futex.h
===================================================================
--- linux-2.6.orig/include/linux/futex.h	2007-01-17 09:39:57.000000000 +0100
+++ linux-2.6/include/linux/futex.h	2007-01-17 09:44:42.000000000 +0100
@@ -94,7 +94,7 @@ struct robust_list_head {
  #define ROBUST_LIST_LIMIT	2048

  #ifdef __KERNEL__
-long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
+long do_futex(u32 __user *uaddr, int op, u32 val, struct timespec *timeout,
  	      u32 __user *uaddr2, u32 val2, u32 val3);

  extern int

-- 
Pierre


  parent reply	other threads:[~2007-01-17  9:02 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <45ADDF60.5080704@bull.net>
2007-01-17  9:00 ` [PATCH 2.6.20-rc5 1/4] futex priority based wakeup Pierre Peiffer
2007-01-17  9:01 ` Pierre Peiffer [this message]
2007-01-17  9:03 ` [PATCH 2.6.20-rc5 3/4] futex_requeue_pi optimization Pierre Peiffer
2007-01-17  9:04 ` [PATCH 2.6.20-rc5 4/4] sys_futex64 : allows 64bit futexes Pierre Peiffer
2007-01-18  0:17   ` Christoph Hellwig
2007-01-18  7:45     ` Ingo Molnar
2007-01-18 22:34       ` Christoph Hellwig
2007-01-18 22:35       ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=45ADE5D0.5080509@bull.net \
    --to=pierre.peiffer@bull.net \
    --cc=drepper@redhat.com \
    --cc=jakub@redhat.com \
    --cc=jean-pierre.dion@bull.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --subject='Re: [PATCH 2.6.20-rc5 2/4] Make futex_wait() use an hrtimer for timeout' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).