LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [RFC PATCH 0/3] RFC: using hrtimers for in-kernel timeouts
@ 2007-03-05  0:20 Arnd Bergmann
  2007-03-05  0:20 ` [RFC PATCH 1/3] introduce schedule_timeout_hr Arnd Bergmann
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Arnd Bergmann @ 2007-03-05  0:20 UTC (permalink / raw)
  To: linux-kernel; +Cc: Thomas Gleixner

I've played around with the new timer statistics to see which timers might
benefit of being moved from traditional timers to hrtimers.

Since my understanding is that timer_list timers are not really meant to
expire, this seems to include a lot of what comes in through
schedule_timeout, in particular select() and futex wait.

I have no idea if what I was attempting is even the right approach to
start with, but I want to share the patches in case it is ;-).

Maybe someone is interested in running some low-level benchmarks on this
or point out any bugs in the code.

	Arnd <><

--


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [RFC PATCH 1/3] introduce schedule_timeout_hr
  2007-03-05  0:20 [RFC PATCH 0/3] RFC: using hrtimers for in-kernel timeouts Arnd Bergmann
@ 2007-03-05  0:20 ` Arnd Bergmann
  2007-03-05  0:20 ` [RFC PATCH 2/3] use hrtimer in select and pselect Arnd Bergmann
  2007-03-05  0:20 ` [RFC PATCH 3/3] change schedule_timeout to use hrtimers Arnd Bergmann
  2 siblings, 0 replies; 4+ messages in thread
From: Arnd Bergmann @ 2007-03-05  0:20 UTC (permalink / raw)
  To: linux-kernel; +Cc: Thomas Gleixner

[-- Attachment #1: add-schedule-timeout-hr.diff --]
[-- Type: text/plain, Size: 2687 bytes --]

The new schedule_timeout_hr function is a variant of schedule_timeout
that uses hrtimers internally. Consequently, its argument and
return value are ktime_t.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>

Index: linux-cg/include/linux/sched.h
===================================================================
--- linux-cg.orig/include/linux/sched.h
+++ linux-cg/include/linux/sched.h
@@ -246,6 +246,8 @@ extern int in_sched_functions(unsigned l
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
 extern signed long FASTCALL(schedule_timeout(signed long timeout));
+extern ktime_t FASTCALL(schedule_timeout_hr(ktime_t timeout));
+
 extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
 asmlinkage void schedule(void);
Index: linux-cg/kernel/hrtimer.c
===================================================================
--- linux-cg.orig/kernel/hrtimer.c
+++ linux-cg/kernel/hrtimer.c
@@ -1206,6 +1206,54 @@ void hrtimer_init_sleeper(struct hrtimer
 #endif
 }
 
+/**
+ * schedule_timeout_hr - sleep until timeout
+ * @timeout: timeout value
+ *
+ * Make the current task sleep until @timeout has elapsed.
+ * The routine will return immediately unless the current task
+ * state has been set (see set_current_state()).
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout is guaranteed to
+ * pass before the routine returns. The routine will return 0
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task. In this case the remaining time
+ * in jiffies will be returned, or 0 if the timer expired in time
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * In all cases the return value is guaranteed to be a non-negative
+ * time value.
+ */
+static ktime_t __sched __schedule_timeout_hr(ktime_t time, void *addr)
+{
+	struct hrtimer_sleeper t;
+	ktime_t remain;
+
+	hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init_sleeper(&t, current);
+	__timer_stats_hrtimer_set_start_info(&t.timer, addr);
+	hrtimer_start(&t.timer, time, HRTIMER_MODE_REL);
+	schedule();
+	hrtimer_cancel(&t.timer);
+	remain = hrtimer_get_remaining(&t.timer);
+
+	if (ktime_to_ns(remain) < 0)
+		return ktime_set(0, 0);
+	else
+		return remain;
+}
+
+fastcall ktime_t __sched schedule_timeout_hr(ktime_t time)
+{
+	return __schedule_timeout_hr(time, __builtin_return_address(0));
+}
+EXPORT_SYMBOL_GPL(schedule_timeout_hr);
+
 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
 {
 	hrtimer_init_sleeper(t, current);

--


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [RFC PATCH 2/3] use hrtimer in select and pselect
  2007-03-05  0:20 [RFC PATCH 0/3] RFC: using hrtimers for in-kernel timeouts Arnd Bergmann
  2007-03-05  0:20 ` [RFC PATCH 1/3] introduce schedule_timeout_hr Arnd Bergmann
@ 2007-03-05  0:20 ` Arnd Bergmann
  2007-03-05  0:20 ` [RFC PATCH 3/3] change schedule_timeout to use hrtimers Arnd Bergmann
  2 siblings, 0 replies; 4+ messages in thread
From: Arnd Bergmann @ 2007-03-05  0:20 UTC (permalink / raw)
  To: linux-kernel; +Cc: Thomas Gleixner

[-- Attachment #1: implement-select-using-hrtimer.patch --]
[-- Type: text/plain, Size: 5440 bytes --]

This changes the select and pselect system calls to use the
new schedule_timeout_hr function. Since many applications
use the select function instead of nanosleep, this provides
a higher resolution sleep to them.

BUG: the same needs to be done for the compat syscalls, the
current patch breaks building on 64 bit machines.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>

Index: linux-cg/fs/select.c
===================================================================
--- linux-cg.orig/fs/select.c
+++ linux-cg/fs/select.c
@@ -189,7 +189,7 @@ get_max:
 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
 #define POLLEX_SET (POLLPRI)
 
-int do_select(int n, fd_set_bits *fds, s64 *timeout)
+int do_select(int n, fd_set_bits *fds, ktime_t *timeout)
 {
 	struct poll_wqueues table;
 	poll_table *wait;
@@ -205,12 +205,11 @@ int do_select(int n, fd_set_bits *fds, s
 
 	poll_initwait(&table);
 	wait = &table.pt;
-	if (!*timeout)
+	if (timeout && !timeout->tv64)
 		wait = NULL;
 	retval = 0;
 	for (;;) {
 		unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
-		long __timeout;
 
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -266,27 +265,19 @@ int do_select(int n, fd_set_bits *fds, s
 				*rexp = res_ex;
 		}
 		wait = NULL;
-		if (retval || !*timeout || signal_pending(current))
+		if (retval || (timeout && !timeout->tv64)
+		    || signal_pending(current))
 			break;
 		if(table.error) {
 			retval = table.error;
 			break;
 		}
 
-		if (*timeout < 0) {
+		if (!timeout || timeout->tv64 < 0)
 			/* Wait indefinitely */
-			__timeout = MAX_SCHEDULE_TIMEOUT;
-		} else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) {
-			/* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */
-			__timeout = MAX_SCHEDULE_TIMEOUT - 1;
-			*timeout -= __timeout;
-		} else {
-			__timeout = *timeout;
-			*timeout = 0;
-		}
-		__timeout = schedule_timeout(__timeout);
-		if (*timeout >= 0)
-			*timeout += __timeout;
+			schedule();
+		else
+			*timeout = schedule_timeout_hr(*timeout);
 	}
 	__set_current_state(TASK_RUNNING);
 
@@ -307,7 +298,7 @@ int do_select(int n, fd_set_bits *fds, s
 	((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
 
 static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
-			   fd_set __user *exp, s64 *timeout)
+			   fd_set __user *exp, ktime_t *timeout)
 {
 	fd_set_bits fds;
 	void *bits;
@@ -384,7 +375,7 @@ out_nofds:
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp)
 {
-	s64 timeout = -1;
+	ktime_t timeout, *timeoutp = NULL;
 	struct timeval tv;
 	int ret;
 
@@ -395,24 +386,20 @@ asmlinkage long sys_select(int n, fd_set
 		if (tv.tv_sec < 0 || tv.tv_usec < 0)
 			return -EINVAL;
 
+		timeout = timeval_to_ktime(tv);
 		/* Cast to u64 to make GCC stop complaining */
-		if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
-			timeout = -1;	/* infinite */
-		else {
-			timeout = ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
-			timeout += tv.tv_sec * HZ;
-		}
+		if ((u64)tv.tv_sec < (u64)MAX_INT64_SECONDS)
+			timeoutp = &timeout;
 	}
 
-	ret = core_sys_select(n, inp, outp, exp, &timeout);
+	ret = core_sys_select(n, inp, outp, exp, timeoutp);
 
 	if (tvp) {
 		struct timeval rtv;
 
 		if (current->personality & STICKY_TIMEOUTS)
 			goto sticky;
-		rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
-		rtv.tv_sec = timeout;
+		rtv = ktime_to_timeval(timeout);
 		if (timeval_compare(&rtv, &tv) >= 0)
 			rtv = tv;
 		if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
@@ -438,7 +425,7 @@ asmlinkage long sys_pselect7(int n, fd_s
 		fd_set __user *exp, struct timespec __user *tsp,
 		const sigset_t __user *sigmask, size_t sigsetsize)
 {
-	s64 timeout = MAX_SCHEDULE_TIMEOUT;
+	ktime_t timeout, *timeoutp = NULL;
 	sigset_t ksigmask, sigsaved;
 	struct timespec ts;
 	int ret;
@@ -450,13 +437,11 @@ asmlinkage long sys_pselect7(int n, fd_s
 		if (ts.tv_sec < 0 || ts.tv_nsec < 0)
 			return -EINVAL;
 
+		timeout = timespec_to_ktime(ts);
+
 		/* Cast to u64 to make GCC stop complaining */
-		if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
-			timeout = -1;	/* infinite */
-		else {
-			timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
-			timeout += ts.tv_sec * HZ;
-		}
+		if ((u64)ts.tv_sec < (u64)MAX_INT64_SECONDS)
+			timeoutp = &timeout;
 	}
 
 	if (sigmask) {
@@ -470,16 +455,15 @@ asmlinkage long sys_pselect7(int n, fd_s
 		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
 	}
 
-	ret = core_sys_select(n, inp, outp, exp, &timeout);
+	ret = core_sys_select(n, inp, outp, exp, timeoutp);
 
 	if (tsp) {
 		struct timespec rts;
 
 		if (current->personality & STICKY_TIMEOUTS)
 			goto sticky;
-		rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
-						1000;
-		rts.tv_sec = timeout;
+
+		rts = ktime_to_timespec(timeout);
 		if (timespec_compare(&rts, &ts) >= 0)
 			rts = ts;
 		if (copy_to_user(tsp, &rts, sizeof(rts))) {
Index: linux-cg/include/linux/poll.h
===================================================================
--- linux-cg.orig/include/linux/poll.h
+++ linux-cg/include/linux/poll.h
@@ -112,7 +112,7 @@ void zero_fd_set(unsigned long nr, unsig
 
 #define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)
 
-extern int do_select(int n, fd_set_bits *fds, s64 *timeout);
+extern int do_select(int n, fd_set_bits *fds, ktime_t *timeout);
 extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds,
 		       s64 *timeout);
 

--


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [RFC PATCH 3/3] change schedule_timeout to use hrtimers
  2007-03-05  0:20 [RFC PATCH 0/3] RFC: using hrtimers for in-kernel timeouts Arnd Bergmann
  2007-03-05  0:20 ` [RFC PATCH 1/3] introduce schedule_timeout_hr Arnd Bergmann
  2007-03-05  0:20 ` [RFC PATCH 2/3] use hrtimer in select and pselect Arnd Bergmann
@ 2007-03-05  0:20 ` Arnd Bergmann
  2 siblings, 0 replies; 4+ messages in thread
From: Arnd Bergmann @ 2007-03-05  0:20 UTC (permalink / raw)
  To: linux-kernel; +Cc: Thomas Gleixner

[-- Attachment #1: implement-schedule-timeout-using-hr.patch --]
[-- Type: text/plain, Size: 7126 bytes --]

According to the new timer statistics, many of the
timers that expire come from schedule_timeout.
Since the regular timer infrastructure is optimized
for timers that don't expire, this might be a useful
optimization.

This also changes the timer stats to show the caller
of schedule_timeout in the statistics rather than
schedule_timeout itself.

BUG: converting between jiffies and ktime is rather
     inefficient here.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>

Index: linux-cg/kernel/hrtimer.c
===================================================================
--- linux-cg.orig/kernel/hrtimer.c
+++ linux-cg/kernel/hrtimer.c
@@ -1254,6 +1254,96 @@ fastcall ktime_t __sched schedule_timeou
 }
 EXPORT_SYMBOL_GPL(schedule_timeout_hr);
 
+/**
+ * schedule_timeout - sleep until timeout
+ * @timeout: timeout value in jiffies
+ *
+ * Make the current task sleep until @timeout jiffies have
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
+ * pass before the routine returns. The routine will return 0
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task. In this case the remaining time
+ * in jiffies will be returned, or 0 if the timer expired in time
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
+ * the CPU away without a bound on the timeout. In this case the return
+ * value will be %MAX_SCHEDULE_TIMEOUT.
+ *
+ * In all cases the return value is guaranteed to be non-negative.
+ */
+fastcall signed long __sched schedule_timeout(signed long timeout)
+{
+	ktime_t time;
+	struct timespec ts;
+
+	switch (timeout)
+	{
+	case MAX_SCHEDULE_TIMEOUT:
+		/*
+		 * These two special cases are useful to be comfortable
+		 * in the caller. Nothing more. We could take
+		 * MAX_SCHEDULE_TIMEOUT from one of the negative value
+		 * but I' d like to return a valid offset (>=0) to allow
+		 * the caller to do everything it want with the retval.
+		 */
+		schedule();
+		goto out;
+	default:
+		/*
+		 * Another bit of PARANOID. Note that the retval will be
+		 * 0 since no piece of kernel is supposed to do a check
+		 * for a negative retval of schedule_timeout() (since it
+		 * should never happens anyway). You just have the printk()
+		 * that will tell you if something is gone wrong and where.
+		 */
+		if (timeout < 0) {
+			printk(KERN_ERR "schedule_timeout: wrong timeout "
+				"value %lx\n", timeout);
+			dump_stack();
+			current->state = TASK_RUNNING;
+			goto out;
+		}
+	}
+
+	/* FIXME: there ought to be an efficient ktime_to_jiffies
+	 *        and ktime_to_jiffies */
+	jiffies_to_timespec(timeout, &ts);
+	time = timespec_to_ktime(ts);
+	time = __schedule_timeout_hr(time, __builtin_return_address(0));
+	ts = ktime_to_timespec(time);
+	timeout = timespec_to_jiffies(&ts);
+ out:
+	return timeout < 0 ? 0 : timeout;
+}
+EXPORT_SYMBOL(schedule_timeout);
+
+/*
+ * We can use __set_current_state() here because schedule_timeout() calls
+ * schedule() unconditionally.
+ */
+signed long __sched schedule_timeout_interruptible(signed long timeout)
+{
+	__set_current_state(TASK_INTERRUPTIBLE);
+	return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_interruptible);
+
+signed long __sched schedule_timeout_uninterruptible(signed long timeout)
+{
+	__set_current_state(TASK_UNINTERRUPTIBLE);
+	return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_uninterruptible);
+
 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
 {
 	hrtimer_init_sleeper(t, current);
Index: linux-cg/kernel/timer.c
===================================================================
--- linux-cg.orig/kernel/timer.c
+++ linux-cg/kernel/timer.c
@@ -1369,103 +1369,6 @@ asmlinkage long sys_getegid(void)
 
 #endif
 
-static void process_timeout(unsigned long __data)
-{
-	wake_up_process((struct task_struct *)__data);
-}
-
-/**
- * schedule_timeout - sleep until timeout
- * @timeout: timeout value in jiffies
- *
- * Make the current task sleep until @timeout jiffies have
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
- * pass before the routine returns. The routine will return 0
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task. In this case the remaining time
- * in jiffies will be returned, or 0 if the timer expired in time
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- *
- * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
- * the CPU away without a bound on the timeout. In this case the return
- * value will be %MAX_SCHEDULE_TIMEOUT.
- *
- * In all cases the return value is guaranteed to be non-negative.
- */
-fastcall signed long __sched schedule_timeout(signed long timeout)
-{
-	struct timer_list timer;
-	unsigned long expire;
-
-	switch (timeout)
-	{
-	case MAX_SCHEDULE_TIMEOUT:
-		/*
-		 * These two special cases are useful to be comfortable
-		 * in the caller. Nothing more. We could take
-		 * MAX_SCHEDULE_TIMEOUT from one of the negative value
-		 * but I' d like to return a valid offset (>=0) to allow
-		 * the caller to do everything it want with the retval.
-		 */
-		schedule();
-		goto out;
-	default:
-		/*
-		 * Another bit of PARANOID. Note that the retval will be
-		 * 0 since no piece of kernel is supposed to do a check
-		 * for a negative retval of schedule_timeout() (since it
-		 * should never happens anyway). You just have the printk()
-		 * that will tell you if something is gone wrong and where.
-		 */
-		if (timeout < 0) {
-			printk(KERN_ERR "schedule_timeout: wrong timeout "
-				"value %lx\n", timeout);
-			dump_stack();
-			current->state = TASK_RUNNING;
-			goto out;
-		}
-	}
-
-	expire = timeout + jiffies;
-
-	setup_timer(&timer, process_timeout, (unsigned long)current);
-	__mod_timer(&timer, expire);
-	schedule();
-	del_singleshot_timer_sync(&timer);
-
-	timeout = expire - jiffies;
-
- out:
-	return timeout < 0 ? 0 : timeout;
-}
-EXPORT_SYMBOL(schedule_timeout);
-
-/*
- * We can use __set_current_state() here because schedule_timeout() calls
- * schedule() unconditionally.
- */
-signed long __sched schedule_timeout_interruptible(signed long timeout)
-{
-	__set_current_state(TASK_INTERRUPTIBLE);
-	return schedule_timeout(timeout);
-}
-EXPORT_SYMBOL(schedule_timeout_interruptible);
-
-signed long __sched schedule_timeout_uninterruptible(signed long timeout)
-{
-	__set_current_state(TASK_UNINTERRUPTIBLE);
-	return schedule_timeout(timeout);
-}
-EXPORT_SYMBOL(schedule_timeout_uninterruptible);
-
 /* Thread ID - the internal kernel "pid" */
 asmlinkage long sys_gettid(void)
 {

--


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2007-03-05  0:29 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-03-05  0:20 [RFC PATCH 0/3] RFC: using hrtimers for in-kernel timeouts Arnd Bergmann
2007-03-05  0:20 ` [RFC PATCH 1/3] introduce schedule_timeout_hr Arnd Bergmann
2007-03-05  0:20 ` [RFC PATCH 2/3] use hrtimer in select and pselect Arnd Bergmann
2007-03-05  0:20 ` [RFC PATCH 3/3] change schedule_timeout to use hrtimers Arnd Bergmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).