LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] Replace completions with semaphores
@ 2008-04-11 21:00 Matthew Wilcox
  2008-04-12  6:43 ` Daniel Walker
  2008-04-12 12:24 ` Peter Zijlstra
  0 siblings, 2 replies; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-11 21:00 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Linus Torvalds


A long long time ago when dinosaurs roamed the earth and tech
company stock valuations were high (2001), the primitive
of the completion was introduced to save us from a problem
with semaphores.  Those interested in the details can examine
http://www.ussg.iu.edu/hypermail/linux/kernel/0107.3/0674.html and
related messages, but everyone else can take my word for it that, with
the generic semaphores I hope will be merged in 2.6.26, semaphores no
longer have the problem that caused us to switch to completions.

So does it make sense to retain the completion as a primitive
in Linux?  On the one hand, it clearly denotes how one uses it --
that it's initially 'locked' and becomes 'unlocked' later.  On the
other hand, it is Yet Another Thing to be maintained; I had to
add wait_for_completion_killable(), probably there needs to be a
wait_for_completion_killable_timeout() too.

I have a cheesy hack to remove them without causing too much disturbance.
Obviously, I would never suggest this for inclusion -- #define completion
semaphore should be enough to raise anyone's eyebrows -- but it's a
starting point to decide whether or not to get rid of completions or
retain them in some way while basing their implementation on semaphores,
or leave them the hell alone.

I'm a little puzzled at the numbers I get for replacing completions with
semaphores.  The individual files change very little in size:

   text    data     bss     dec     hex filename
   1280       0       0    1280     500 kernel/semaphore-before.o
   1656       0       0    1656     678 kernel/semaphore.o
  36816    8270     376   45462    b196 kernel/sched-before.o
  35938    8270     392   44600    ae38 kernel/sched.o
 369958  678538 4287224 5335720  516aa8 kernel/built-in-before.o
 371154  678538 4287288 5336980  516f94 kernel/built-in.o
7388295 1401040 4712200 13501535         ce045f vmlinux-before
7380903 1401040 4712000 13493943         cde6b7 vmlinux

So despite a net gain of 500 bytes when comparing semaphore.o and sched.o,
kernel/built-in.o actually increases in size and vmlinux is even bigger.
The only thing I can think of is that this is a lockdep thing.

Anyway, here's the cheesy patch, just for entertainment value.  I think
the discussion needs to be more around semantics and maintainability
than code size or just how silly this patch is.

diff --git a/include/linux/completion.h b/include/linux/completion.h
index d2961b6..9e7e718 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -1,58 +1,30 @@
 #ifndef __LINUX_COMPLETION_H
 #define __LINUX_COMPLETION_H
 
-/*
- * (C) Copyright 2001 Linus Torvalds
- *
- * Atomic wait-for-completion handler data structures.
- * See kernel/sched.c for details.
- */
-
-#include <linux/wait.h>
-
-struct completion {
-	unsigned int done;
-	wait_queue_head_t wait;
-};
-
-#define COMPLETION_INITIALIZER(work) \
-	{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
-
-#define COMPLETION_INITIALIZER_ONSTACK(work) \
-	({ init_completion(&work); work; })
-
-#define DECLARE_COMPLETION(work) \
-	struct completion work = COMPLETION_INITIALIZER(work)
-
-/*
- * Lockdep needs to run a non-constant initializer for on-stack
- * completions - so we use the _ONSTACK() variant for those that
- * are on the kernel stack:
- */
-#ifdef CONFIG_LOCKDEP
-# define DECLARE_COMPLETION_ONSTACK(work) \
-	struct completion work = COMPLETION_INITIALIZER_ONSTACK(work)
-#else
-# define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
-#endif
-
-static inline void init_completion(struct completion *x)
+#include <linux/semaphore.h>
+
+#define completion semaphore
+#define COMPLETION_INITIALIZER(work)		__SEMAPHORE_INITIALIZER(work, 0)
+#define COMPLETION_INITIALIZER_ONSTACK(work)	__SEMAPHORE_INITIALIZER(work, 0)
+#define DECLARE_COMPLETION(work)	__DECLARE_SEMAPHORE_GENERIC(work, 0) 
+#define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
+#define init_completion(work)	init_MUTEX_LOCKED(work)
+
+#define wait_for_completion(work)		down(work)
+#define wait_for_completion_interruptible(work)	down_interruptible(work)
+#define wait_for_completion_killable(work)	down_killable(work)
+#define wait_for_completion_timeout(work, timeout) \
+			down_timeout(work, timeout)
+#define wait_for_completion_interruptible_timeout(work, timeout) \
+			down_interruptible_timeout(work, timeout)
+
+static inline void complete(struct semaphore *work)
 {
-	x->done = 0;
-	init_waitqueue_head(&x->wait);
+	up(work);
 }
 
-extern void wait_for_completion(struct completion *);
-extern int wait_for_completion_interruptible(struct completion *x);
-extern int wait_for_completion_killable(struct completion *x);
-extern unsigned long wait_for_completion_timeout(struct completion *x,
-						   unsigned long timeout);
-extern unsigned long wait_for_completion_interruptible_timeout(
-			struct completion *x, unsigned long timeout);
-
-extern void complete(struct completion *);
-extern void complete_all(struct completion *);
+#define complete_all(work)			up_all(work)
 
-#define INIT_COMPLETION(x)	((x).done = 0)
+#define INIT_COMPLETION(x)			init_MUTEX_LOCKED(x)
 
 #endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2df44e7..e8634e5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -90,7 +90,7 @@ extern int console_printk[];
 #define minimum_console_loglevel (console_printk[2])
 #define default_console_loglevel (console_printk[3])
 
-struct completion;
+struct semaphore;
 struct pt_regs;
 struct user;
 
@@ -135,7 +135,7 @@ extern void oops_exit(void);
 extern int oops_may_print(void);
 NORET_TYPE void do_exit(long error_code)
 	ATTRIB_NORET;
-NORET_TYPE void complete_and_exit(struct completion *, long)
+NORET_TYPE void complete_and_exit(struct semaphore *, long)
 	ATTRIB_NORET;
 extern unsigned long simple_strtoul(const char *,char **,unsigned int);
 extern long simple_strtol(const char *,char **,unsigned int);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 9b6c935..111c540 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -8,7 +8,7 @@
 #include <asm/atomic.h>
 
 struct net;
-struct completion;
+struct semaphore;
 
 /*
  * The proc filesystem constants/structures
@@ -79,7 +79,7 @@ struct proc_dir_entry {
 	atomic_t count;		/* use count */
 	int pde_users;	/* number of callers into module in progress */
 	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
-	struct completion *pde_unload_completion;
+	struct semaphore *pde_unload_completion;
 };
 
 struct kcore_list {
diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 9cae64b..470a4bc 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -46,6 +46,9 @@ extern int __must_check down_interruptible(struct semaphore *sem);
 extern int __must_check down_killable(struct semaphore *sem);
 extern int __must_check down_trylock(struct semaphore *sem);
 extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
+extern int __must_check down_interruptible_timeout(struct semaphore *sem,
+						   long jiffies);
 extern void up(struct semaphore *sem);
+extern void up_all(struct semaphore *sem);
 
 #endif /* __LINUX_SEMAPHORE_H */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 571f01d..15d0ace 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -30,7 +30,7 @@
 #include <linux/compiler.h>
 
 struct file;
-struct completion;
+struct semaphore;
 
 #define CTL_MAXNAME 10		/* how many path components do we allow in a
 				   call to sysctl?   In other words, what is
@@ -1063,7 +1063,7 @@ struct ctl_table_header
 	struct ctl_table *ctl_table;
 	struct list_head ctl_entry;
 	int used;
-	struct completion *unregistering;
+	struct semaphore *unregistering;
 	struct ctl_table *ctl_table_arg;
 	struct ctl_table_root *root;
 };
diff --git a/kernel/sched.c b/kernel/sched.c
index 8dcdec6..08752d4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4142,109 +4142,6 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
 
-void complete(struct completion *x)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&x->wait.lock, flags);
-	x->done++;
-	__wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
-	spin_unlock_irqrestore(&x->wait.lock, flags);
-}
-EXPORT_SYMBOL(complete);
-
-void complete_all(struct completion *x)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&x->wait.lock, flags);
-	x->done += UINT_MAX/2;
-	__wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
-	spin_unlock_irqrestore(&x->wait.lock, flags);
-}
-EXPORT_SYMBOL(complete_all);
-
-static inline long __sched
-do_wait_for_common(struct completion *x, long timeout, int state)
-{
-	if (!x->done) {
-		DECLARE_WAITQUEUE(wait, current);
-
-		wait.flags |= WQ_FLAG_EXCLUSIVE;
-		__add_wait_queue_tail(&x->wait, &wait);
-		do {
-			if ((state == TASK_INTERRUPTIBLE &&
-			     signal_pending(current)) ||
-			    (state == TASK_KILLABLE &&
-			     fatal_signal_pending(current))) {
-				__remove_wait_queue(&x->wait, &wait);
-				return -ERESTARTSYS;
-			}
-			__set_current_state(state);
-			spin_unlock_irq(&x->wait.lock);
-			timeout = schedule_timeout(timeout);
-			spin_lock_irq(&x->wait.lock);
-			if (!timeout) {
-				__remove_wait_queue(&x->wait, &wait);
-				return timeout;
-			}
-		} while (!x->done);
-		__remove_wait_queue(&x->wait, &wait);
-	}
-	x->done--;
-	return timeout;
-}
-
-static long __sched
-wait_for_common(struct completion *x, long timeout, int state)
-{
-	might_sleep();
-
-	spin_lock_irq(&x->wait.lock);
-	timeout = do_wait_for_common(x, timeout, state);
-	spin_unlock_irq(&x->wait.lock);
-	return timeout;
-}
-
-void __sched wait_for_completion(struct completion *x)
-{
-	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion);
-
-unsigned long __sched
-wait_for_completion_timeout(struct completion *x, unsigned long timeout)
-{
-	return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion_timeout);
-
-int __sched wait_for_completion_interruptible(struct completion *x)
-{
-	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
-	if (t == -ERESTARTSYS)
-		return t;
-	return 0;
-}
-EXPORT_SYMBOL(wait_for_completion_interruptible);
-
-unsigned long __sched
-wait_for_completion_interruptible_timeout(struct completion *x,
-					  unsigned long timeout)
-{
-	return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
-}
-EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
-
-int __sched wait_for_completion_killable(struct completion *x)
-{
-	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
-	if (t == -ERESTARTSYS)
-		return t;
-	return 0;
-}
-EXPORT_SYMBOL(wait_for_completion_killable);
-
 static long __sched
 sleep_on_common(wait_queue_head_t *q, int state, long timeout)
 {
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 5c2942e..4e536c5 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -36,6 +36,8 @@ static noinline void __down(struct semaphore *sem);
 static noinline int __down_interruptible(struct semaphore *sem);
 static noinline int __down_killable(struct semaphore *sem);
 static noinline int __down_timeout(struct semaphore *sem, long jiffies);
+static noinline int __down_interruptible_timeout(struct semaphore *sem,
+							long jiffies);
 static noinline void __up(struct semaphore *sem);
 
 /**
@@ -167,6 +169,22 @@ int down_timeout(struct semaphore *sem, long jiffies)
 }
 EXPORT_SYMBOL(down_timeout);
 
+int down_interruptible_timeout(struct semaphore *sem, long jiffies)
+{
+	unsigned long flags;
+	int result = 0;
+
+	spin_lock_irqsave(&sem->lock, flags);
+	if (likely(sem->count > 0))
+		sem->count--;
+	else
+		result = __down_interruptible_timeout(sem, jiffies);
+	spin_unlock_irqrestore(&sem->lock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL(down_interruptible_timeout);
+
 /**
  * up - release the semaphore
  * @sem: the semaphore to release
@@ -187,6 +205,18 @@ void up(struct semaphore *sem)
 }
 EXPORT_SYMBOL(up);
 
+void up_all(struct semaphore *sem)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sem->lock, flags);
+	while (!list_empty(&sem->wait_list))
+		__up(sem);
+	sem->count = 1 << 30;
+	spin_unlock_irqrestore(&sem->lock, flags);
+}
+EXPORT_SYMBOL(up_all);
+
 /* Functions for the contended case */
 
 struct semaphore_waiter {
@@ -254,6 +284,12 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
 	return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
 }
 
+static noinline int __sched __down_interruptible_timeout(struct semaphore *sem,
+								long jiffies)
+{
+	return __down_common(sem, TASK_INTERRUPTIBLE, jiffies);
+}
+
 static noinline void __sched __up(struct semaphore *sem)
 {
 	struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-11 21:00 [PATCH] Replace completions with semaphores Matthew Wilcox
@ 2008-04-12  6:43 ` Daniel Walker
  2008-04-12 10:31   ` Ingo Oeser
  2008-04-12 12:24 ` Peter Zijlstra
  1 sibling, 1 reply; 64+ messages in thread
From: Daniel Walker @ 2008-04-12  6:43 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-kernel, Ingo Molnar, Linus Torvalds


On Fri, 2008-04-11 at 15:00 -0600, Matthew Wilcox wrote:

> So does it make sense to retain the completion as a primitive
> in Linux?  On the one hand, it clearly denotes how one uses it --
> that it's initially 'locked' and becomes 'unlocked' later.  On the
> other hand, it is Yet Another Thing to be maintained; I had to
> add wait_for_completion_killable(), probably there needs to be a
> wait_for_completion_killable_timeout() too.

>From my perspective it should be keep completions , and remove
semaphores.. The problem with semaphores is the lack of a strict API,
and loose usage. I've seen a lot of "creative" locking in the kernel,
and if we allow that we're just asking for continued maintainability
problems in the code that uses semaphores. At times I've spent hours
trying to figure out what a semaphore is doing, or suppose to be doing.
If we enforce strict usage of semaphores, then we'll basically reproduce
mutex usage, and we have a generic mutex already..

Daniel



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-12  6:43 ` Daniel Walker
@ 2008-04-12 10:31   ` Ingo Oeser
  0 siblings, 0 replies; 64+ messages in thread
From: Ingo Oeser @ 2008-04-12 10:31 UTC (permalink / raw)
  To: Daniel Walker; +Cc: Matthew Wilcox, linux-kernel, Ingo Molnar, Linus Torvalds

Hi Daniel,

On Saturday 12 April 2008, Daniel Walker wrote:
> From my perspective it should be keep completions , and remove
> semaphores.. The problem with semaphores is the lack of a strict API, 
> and loose usage. 

Seconded!

Ask any software engineer, to describe what a semaphore is and
how it behaves. You'll get the full spectrum of possible semaphore
implementation options. Ask any advanced software engineer and he'll
ask "What kind of semaphore?".

So let them die ASAP!

> At times I've spent hours trying to figure out what a semaphore 
> is doing, or suppose to be doing. 

Same here. Or figuring out what kind of semphore behavior the developer
expected.

> If we enforce strict usage of semaphores, then we'll basically reproduce
> mutex usage, and we have a generic mutex already..

Or have different API for using the same semaphores in different 
creative ways. It might be hard to implement lockdep for that :-)

No, I'm happy that Linux has so many more advanced concurrency concepts,
that the pre-dinosaur locking mechanism (semaphore) is usually 
the last on your list.


Best Regards

Ingo Oeser

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-11 21:00 [PATCH] Replace completions with semaphores Matthew Wilcox
  2008-04-12  6:43 ` Daniel Walker
@ 2008-04-12 12:24 ` Peter Zijlstra
  2008-04-12 17:26   ` Matthew Wilcox
  2008-04-13  7:05   ` Ingo Molnar
  1 sibling, 2 replies; 64+ messages in thread
From: Peter Zijlstra @ 2008-04-12 12:24 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-kernel, Ingo Molnar, Linus Torvalds

On Fri, 2008-04-11 at 15:00 -0600, Matthew Wilcox wrote:

> So does it make sense to retain the completion as a primitive
> in Linux? 

My take on it is the opposite: kill off semaphores and leave the
completions around.


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-12 12:24 ` Peter Zijlstra
@ 2008-04-12 17:26   ` Matthew Wilcox
  2008-04-12 18:01     ` Daniel Walker
                       ` (2 more replies)
  2008-04-13  7:05   ` Ingo Molnar
  1 sibling, 3 replies; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-12 17:26 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Oeser, Daniel Walker
  Cc: linux-kernel, Ingo Molnar, Linus Torvalds

On Sat, Apr 12, 2008 at 02:24:41PM +0200, Peter Zijlstra wrote:
> On Fri, 2008-04-11 at 15:00 -0600, Matthew Wilcox wrote:
> > So does it make sense to retain the completion as a primitive
> > in Linux? 
> 
> My take on it is the opposite: kill off semaphores and leave the
> completions around.

OK, so all three of you have got the wrong end of the stick.

My point is that the struct semaphore and the struct completion are
essentially the same data structure, operated on in essentially the same
way and having essentially the same purpose.

It would look bloody odd to write (code taken from megasas_mgmt_ioctl_fw() in
drivers/scsi/megaraid/megaraid_sas.c):

        if (wait_for_completion_interruptible(&instance->ioctl_completion)) {
                error = -ERESTARTSYS;
                goto out_kfree_ioc;
        }
        error = megasas_mgmt_fw_ioctl(instance, user_ioc, ioc);
        complete(&instance->ioctl_sem);

What I'm trying to get a feeling for is whether people find it similarly
odd to use semaphores where we currently use completions.  We *used*
to, but I don't find that a compelling reason.


Arnd contacted me off-list and made the very sensible suggestion of:

struct completion {
	struct semaphore sem;
}

That lets us eliminate the duplicate code since all the completion
functions become very thin wrappers around semaphore operations.

I'll note that the semaphore code I hae queued for 2.6.26 is slightly
more efficient than the current implementation of completions because
completions use the generic waitqueue code and thus do an indirect
function call per wakeup.  Of course, there's no reason completions
couldn't use the same technique as my semaphore code ... but then they
would be identical to semaphores ;-)

So I'm going to redo my patch using Arnd's idea because it allows us
to eliminate code without changing the API at all.  We can continue the
discussion about eliminating one or the other API, but my opinion is that
eliminating either is a mistake.  The choice of API indicates how the
author thinks about the code, which is crucial for those reading the code.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-12 17:26   ` Matthew Wilcox
@ 2008-04-12 18:01     ` Daniel Walker
  2008-04-12 18:05     ` Peter Zijlstra
  2008-04-12 19:53     ` Roland Dreier
  2 siblings, 0 replies; 64+ messages in thread
From: Daniel Walker @ 2008-04-12 18:01 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Peter Zijlstra, Ingo Oeser, linux-kernel, Ingo Molnar, Linus Torvalds


On Sat, 2008-04-12 at 11:26 -0600, Matthew Wilcox wrote:

> It would look bloody odd to write (code taken from megasas_mgmt_ioctl_fw() in
> drivers/scsi/megaraid/megaraid_sas.c):
> 
>         if (wait_for_completion_interruptible(&instance->ioctl_completion)) {
>                 error = -ERESTARTSYS;
>                 goto out_kfree_ioc;
>         }
>         error = megasas_mgmt_fw_ioctl(instance, user_ioc, ioc);
>         complete(&instance->ioctl_sem);
> 
> What I'm trying to get a feeling for is whether people find it similarly
> odd to use semaphores where we currently use completions.  We *used*
> to, but I don't find that a compelling reason.

The above doesn't look all that odd to me. It may be that you've seen
semaphores in that position in the past and just expect to see them. 

> Arnd contacted me off-list and made the very sensible suggestion of:
> 
> struct completion {
> 	struct semaphore sem;
> }
> 
> That lets us eliminate the duplicate code since all the completion
> functions become very thin wrappers around semaphore operations.
> 
> I'll note that the semaphore code I hae queued for 2.6.26 is slightly
> more efficient than the current implementation of completions because
> completions use the generic waitqueue code and thus do an indirect
> function call per wakeup.  Of course, there's no reason completions
> couldn't use the same technique as my semaphore code ... but then they
> would be identical to semaphores ;-)

I would just re-write completions keeping the name and API in tact, make
them better and just leave semaphores alone..

Daniel


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-12 17:26   ` Matthew Wilcox
  2008-04-12 18:01     ` Daniel Walker
@ 2008-04-12 18:05     ` Peter Zijlstra
  2008-04-12 19:04       ` Matthew Wilcox
  2008-04-12 19:53     ` Roland Dreier
  2 siblings, 1 reply; 64+ messages in thread
From: Peter Zijlstra @ 2008-04-12 18:05 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Ingo Oeser, Daniel Walker, linux-kernel, Ingo Molnar, Linus Torvalds

On Sat, 2008-04-12 at 11:26 -0600, Matthew Wilcox wrote:

>  We can continue the
> discussion about eliminating one or the other API, but my opinion is that
> eliminating either is a mistake.  The choice of API indicates how the
> author thinks about the code, which is crucial for those reading the code.

Which is exactly the point, semaphores are rarely the right tool for the
job (I'm currently only aware of XFS that actually uses them as actual
semaphores, and the driver model that uses it to get around lockdep -
still need to come up with a good solution for that).

Even worse, semaphores actively harm the development of linux-rt and
arguably linux kernel development as a whole by not being part of
lockdep - so making a mistake will not be noticed until its too late.

Completions OTOH are not the typical exclusion primitive, they are more
a synchronisation primitive - not unlike wait4(). Typically not things
concerned about priority inversion.



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-12 18:05     ` Peter Zijlstra
@ 2008-04-12 19:04       ` Matthew Wilcox
  2008-04-12 19:16         ` Peter Zijlstra
  0 siblings, 1 reply; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-12 19:04 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Oeser, Daniel Walker, linux-kernel, Ingo Molnar, Linus Torvalds

On Sat, Apr 12, 2008 at 08:05:49PM +0200, Peter Zijlstra wrote:
> On Sat, 2008-04-12 at 11:26 -0600, Matthew Wilcox wrote:
> >  We can continue the
> > discussion about eliminating one or the other API, but my opinion is that
> > eliminating either is a mistake.  The choice of API indicates how the
> > author thinks about the code, which is crucial for those reading the code.
> 
> Which is exactly the point, semaphores are rarely the right tool for the
> job (I'm currently only aware of XFS that actually uses them as actual
> semaphores, and the driver model that uses it to get around lockdep -
> still need to come up with a good solution for that).

I guess you haven't grepped the tree recently then.  We have 11 places
which call sema_init with a number other than 0 or 1 as the second
argument:

./fs/xfs/linux-2.6/sema.h:#define initnsema(sp, val, name)      sema_init(sp, val)
./drivers/net/mlx4/cmd.c:       sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds);
./drivers/char/viotape.c:       sema_init(&reqSem, VIOTAPE_MAXREQ);
./drivers/infiniband/hw/mthca/mthca_cmd.c:      sema_init(&dev->cmd.event_sem, dev->cmd.max_cmds);
./drivers/acpi/osl.c:   sema_init(sem, initial_units);
./drivers/scsi/megaraid/megaraid_sas.c: sema_init(&instance->ioctl_sem, MEGASAS_INT_CMDS);
./drivers/scsi/megaraid/megaraid_mm.c:  sema_init(&adapter->kioc_semaphore, lld_adp->max_kioc);
./drivers/video/omap/hwa742.c:  sema_init(&hwa742.req_sema, i - IRQ_REQ_POOL_SIZE);
./drivers/video/omap/blizzard.c:        sema_init(&blizzard.req_sema, i - IRQ_REQ_POOL_SIZE);
./drivers/usb/misc/usblcd.c:    sema_init(&dev->limit_sem, USB_LCD_CONCURRENT_WRITES);
./drivers/usb/usb-skeleton.c:   sema_init(&dev->limit_sem, WRITES_IN_FLIGHT);

It's nice to know that nobody is using __DECLARE_SEMAPHORE_GENERIC or
__SEMAPHORE_INITIALIZER at this time.  I'll delete those from my semaphore
tree soon.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-12 19:04       ` Matthew Wilcox
@ 2008-04-12 19:16         ` Peter Zijlstra
  0 siblings, 0 replies; 64+ messages in thread
From: Peter Zijlstra @ 2008-04-12 19:16 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Ingo Oeser, Daniel Walker, linux-kernel, Ingo Molnar, Linus Torvalds

On Sat, 2008-04-12 at 13:04 -0600, Matthew Wilcox wrote:
> On Sat, Apr 12, 2008 at 08:05:49PM +0200, Peter Zijlstra wrote:
> > On Sat, 2008-04-12 at 11:26 -0600, Matthew Wilcox wrote:
> > >  We can continue the
> > > discussion about eliminating one or the other API, but my opinion is that
> > > eliminating either is a mistake.  The choice of API indicates how the
> > > author thinks about the code, which is crucial for those reading the code.
> > 
> > Which is exactly the point, semaphores are rarely the right tool for the
> > job (I'm currently only aware of XFS that actually uses them as actual
> > semaphores, and the driver model that uses it to get around lockdep -
> > still need to come up with a good solution for that).
> 
> I guess you haven't grepped the tree recently then.  We have 11 places
> which call sema_init with a number other than 0 or 1 as the second
> argument:
> 
> ../fs/xfs/linux-2.6/sema.h:#define initnsema(sp, val, name)      sema_init(sp, val)
> ../drivers/net/mlx4/cmd.c:       sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds);
> ../drivers/char/viotape.c:       sema_init(&reqSem, VIOTAPE_MAXREQ);
> ../drivers/infiniband/hw/mthca/mthca_cmd.c:      sema_init(&dev->cmd.event_sem, dev->cmd.max_cmds);
> ../drivers/acpi/osl.c:   sema_init(sem, initial_units);
> ../drivers/scsi/megaraid/megaraid_sas.c: sema_init(&instance->ioctl_sem, MEGASAS_INT_CMDS);
> ../drivers/scsi/megaraid/megaraid_mm.c:  sema_init(&adapter->kioc_semaphore, lld_adp->max_kioc);
> ../drivers/video/omap/hwa742.c:  sema_init(&hwa742.req_sema, i - IRQ_REQ_POOL_SIZE);
> ../drivers/video/omap/blizzard.c:        sema_init(&blizzard.req_sema, i - IRQ_REQ_POOL_SIZE);
> ../drivers/usb/misc/usblcd.c:    sema_init(&dev->limit_sem, USB_LCD_CONCURRENT_WRITES);
> ../drivers/usb/usb-skeleton.c:   sema_init(&dev->limit_sem, WRITES_IN_FLIGHT);
> 
> It's nice to know that nobody is using __DECLARE_SEMAPHORE_GENERIC or
> __SEMAPHORE_INITIALIZER at this time.  I'll delete those from my semaphore
> tree soon.

Luckily those are mostly drivers, but yeah - I haven't actually grepped
the tree I just know the few sem users I've ran into myself.

Still doesn't make it a good idea to have semaphores. 


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-12 17:26   ` Matthew Wilcox
  2008-04-12 18:01     ` Daniel Walker
  2008-04-12 18:05     ` Peter Zijlstra
@ 2008-04-12 19:53     ` Roland Dreier
  2008-04-12 20:47       ` Matthew Wilcox
  2008-04-13 13:55       ` [PATCH] Replace completions with semaphores Bart Van Assche
  2 siblings, 2 replies; 64+ messages in thread
From: Roland Dreier @ 2008-04-12 19:53 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Peter Zijlstra, Ingo Oeser, Daniel Walker, linux-kernel,
	Ingo Molnar, Linus Torvalds

 > Arnd contacted me off-list and made the very sensible suggestion of:
 > 
 > struct completion {
 > 	struct semaphore sem;
 > }
 > 
 > That lets us eliminate the duplicate code since all the completion
 > functions become very thin wrappers around semaphore operations.

Just make sure you don't forget the history of completions...  As
Linus said long ago (http://lwn.net/2001/0802/a/lt-completions.php3):

  In case anybody cares, the race was that Linux semaphores only protect the
  accesses _inside_ the semaphore, while the accesses by the semaphores
  themselves can "race" in the internal implementation. That helps make an
  efficient implementation, but it means that the race was:
  
            cpu #1                       cpu #2
  
            DECLARE_MUTEX_LOCKED(sem);
            ..
            down(&sem);                   up(&sem);
            return;
                                          wake_up(&sem.wait) /*BOOM*/

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-12 19:53     ` Roland Dreier
@ 2008-04-12 20:47       ` Matthew Wilcox
  2008-04-13  7:08         ` Ingo Molnar
  2008-04-13 13:55       ` [PATCH] Replace completions with semaphores Bart Van Assche
  1 sibling, 1 reply; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-12 20:47 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Peter Zijlstra, Ingo Oeser, Daniel Walker, linux-kernel,
	Ingo Molnar, Linus Torvalds

On Sat, Apr 12, 2008 at 12:53:30PM -0700, Roland Dreier wrote:
> Just make sure you don't forget the history of completions...  As
> Linus said long ago (http://lwn.net/2001/0802/a/lt-completions.php3):
> 
>   In case anybody cares, the race was that Linux semaphores only protect the
>   accesses _inside_ the semaphore, while the accesses by the semaphores
>   themselves can "race" in the internal implementation. That helps make an
>   efficient implementation, but it means that the race was:

Yes, that text appears in the URL I provided in the mail that started
this thread ;-)

The semaphore rewrite I did does not have this problem (it's less
efficient than the hand-optimised assembler, but much more maintainable).
You're supposed to be using mutexes if you want efficiency anyway.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-12 12:24 ` Peter Zijlstra
  2008-04-12 17:26   ` Matthew Wilcox
@ 2008-04-13  7:05   ` Ingo Molnar
  2008-04-13 12:52     ` Matthew Wilcox
  1 sibling, 1 reply; 64+ messages in thread
From: Ingo Molnar @ 2008-04-13  7:05 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Matthew Wilcox, linux-kernel, Linus Torvalds


* Peter Zijlstra <peterz@infradead.org> wrote:

> On Fri, 2008-04-11 at 15:00 -0600, Matthew Wilcox wrote:
> 
> > So does it make sense to retain the completion as a primitive in 
> > Linux?
> 
> My take on it is the opposite: kill off semaphores and leave the 
> completions around.

exactly. I first misread the subject line because i parsed it as 
"replace semaphores with completions" and i thought "cool", then did i 
realize that the patch does it the other way around. Converting 
completions to semaphores just makes no sense.

Besides all your arguments later in this thread about how problematic 
locking primitives semaphores are because they are so vague (which are 
all correct), there's also another aspect: completions are faster a bit 
in theory, because they know that they will schedule most of the time - 
while semaphores assume that they will _not_ schedule. (And that's 
exactly because the intent of the developer when using a completion is 
crystal clear.)

	Ingo

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-12 20:47       ` Matthew Wilcox
@ 2008-04-13  7:08         ` Ingo Molnar
  2008-04-13 12:57           ` Matthew Wilcox
  2008-04-13 14:55           ` Bart Van Assche
  0 siblings, 2 replies; 64+ messages in thread
From: Ingo Molnar @ 2008-04-13  7:08 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Roland Dreier, Peter Zijlstra, Ingo Oeser, Daniel Walker,
	linux-kernel, Linus Torvalds


* Matthew Wilcox <matthew@wil.cx> wrote:

> Yes, that text appears in the URL I provided in the mail that started 
> this thread ;-)
> 
> The semaphore rewrite I did does not have this problem (it's less 
> efficient than the hand-optimised assembler, but much more 
> maintainable). You're supposed to be using mutexes if you want 
> efficiency anyway.

but semaphores will be _removed_, _completely_. Rewriting them in 
generic C code is just the first step towards that - it consolidates all 
the myriads of semaphore implementations that Linux has spread out.

your proposed change to change completions to semaphores is totally 
backwards and prolongs an API we want to get rid of. Did you miss this 
aspect of the mutex rewrite, of the semaphore-to-mutex, 
semaphore-to-completions and semaphore-to-rwsem conversions?

	Ingo

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-13  7:05   ` Ingo Molnar
@ 2008-04-13 12:52     ` Matthew Wilcox
  2008-04-14 15:41       ` Ingo Molnar
  2008-04-14 16:54       ` Jens Axboe
  0 siblings, 2 replies; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-13 12:52 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Peter Zijlstra, linux-kernel, Linus Torvalds

On Sun, Apr 13, 2008 at 09:05:13AM +0200, Ingo Molnar wrote:
> there's also another aspect: completions are faster a bit 
> in theory, because they know that they will schedule most of the time - 
> while semaphores assume that they will _not_ schedule. (And that's 
> exactly because the intent of the developer when using a completion is 
> crystal clear.)

In practice though, the current implementation is slower.  Of course,
that's fixable, and I strongly suspect that the current users of
completions simply don't care about speed -- the normal use of
completions is in startup and shutdown paths where a millisecond extra
isn't going to be noticable.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-13  7:08         ` Ingo Molnar
@ 2008-04-13 12:57           ` Matthew Wilcox
  2008-04-14 15:39             ` Ingo Molnar
  2008-04-13 14:55           ` Bart Van Assche
  1 sibling, 1 reply; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-13 12:57 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Roland Dreier, Peter Zijlstra, Ingo Oeser, Daniel Walker,
	linux-kernel, Linus Torvalds

On Sun, Apr 13, 2008 at 09:08:33AM +0200, Ingo Molnar wrote:
> but semaphores will be _removed_, _completely_. Rewriting them in 
> generic C code is just the first step towards that - it consolidates all 
> the myriads of semaphore implementations that Linux has spread out.

I think you're dreaming.  We have 11 places in the tree that currently
demand to use a counting semaphore (with 'n' plausibly greater than 1).
If we remove the facility, these places are going to invent their own
weird locking schemes.

> your proposed change to change completions to semaphores is totally 
> backwards and prolongs an API we want to get rid of. Did you miss this 
> aspect of the mutex rewrite, of the semaphore-to-mutex, 
> semaphore-to-completions and semaphore-to-rwsem conversions?

Yes -- because it wasn't raised at the time for semaphore-to-completions.
It was raised for semaphore-to-mutex, of course.  And I agree that we
/want/ to be rid of counting semaphores, but I don't see a way to do it.
I do believe tht we should be converting all the semaphore users to
mutexes that we can (eg scx200_wdt.c), and converting semaphore users to
completions where that makes sense (eg libusual.c).  But there's still
the other dozen places to support.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-12 19:53     ` Roland Dreier
  2008-04-12 20:47       ` Matthew Wilcox
@ 2008-04-13 13:55       ` Bart Van Assche
  2008-04-13 14:22         ` Matthew Wilcox
  1 sibling, 1 reply; 64+ messages in thread
From: Bart Van Assche @ 2008-04-13 13:55 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Matthew Wilcox, Peter Zijlstra, Ingo Oeser, Daniel Walker,
	linux-kernel, Ingo Molnar, Linus Torvalds

On Sat, Apr 12, 2008 at 9:53 PM, Roland Dreier <rdreier@cisco.com> wrote:
>  Just make sure you don't forget the history of completions...  As
>  Linus said long ago (http://lwn.net/2001/0802/a/lt-completions.php3):
>
>   In case anybody cares, the race was that Linux semaphores only protect the
>   accesses _inside_ the semaphore, while the accesses by the semaphores
>   themselves can "race" in the internal implementation. That helps make an
>   efficient implementation, but it means that the race was:
>
>             cpu #1                       cpu #2
>
>             DECLARE_MUTEX_LOCKED(sem);
>             ..
>             down(&sem);                   up(&sem);
>             return;
>                                           wake_up(&sem.wait) /*BOOM*/

Thanks for bringing this back to attention -- I wasn't aware of the
message you cited.

My opinion about the above race is that this race has nothing to do
with the semaphore concept, but that the race is caused by the way in
which the semaphore object is used. Using any object after it has been
destroyed is asking for trouble.

Bart.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-13 13:55       ` [PATCH] Replace completions with semaphores Bart Van Assche
@ 2008-04-13 14:22         ` Matthew Wilcox
  0 siblings, 0 replies; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-13 14:22 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Roland Dreier, Peter Zijlstra, Ingo Oeser, Daniel Walker,
	linux-kernel, Ingo Molnar, Linus Torvalds

On Sun, Apr 13, 2008 at 03:55:45PM +0200, Bart Van Assche wrote:
> On Sat, Apr 12, 2008 at 9:53 PM, Roland Dreier <rdreier@cisco.com> wrote:
> >  Just make sure you don't forget the history of completions...  As
> >  Linus said long ago (http://lwn.net/2001/0802/a/lt-completions.php3):
> >
> >   In case anybody cares, the race was that Linux semaphores only protect the
> >   accesses _inside_ the semaphore, while the accesses by the semaphores
> >   themselves can "race" in the internal implementation. That helps make an
> >   efficient implementation, but it means that the race was:
> >
> >             cpu #1                       cpu #2
> >
> >             DECLARE_MUTEX_LOCKED(sem);
> >             ..
> >             down(&sem);                   up(&sem);
> >             return;
> >                                           wake_up(&sem.wait) /*BOOM*/
> 
> Thanks for bringing this back to attention -- I wasn't aware of the
> message you cited.
> 
> My opinion about the above race is that this race has nothing to do
> with the semaphore concept, but that the race is caused by the way in
> which the semaphore object is used. Using any object after it has been
> destroyed is asking for trouble.

I think you need to re-read more carefully.

The users of the semaphore were doing nothing wrong.  They were not
using the object after it was destroyed.

The i386 implementation of the semaphore was calling wake_up() after
setting the counter to allow cpu #0 to proceed.  That was faster for the
common case, but had this problem.  completions were careful not to do
that, and the semaphore implementation I wrote doesn't do that either.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-13  7:08         ` Ingo Molnar
  2008-04-13 12:57           ` Matthew Wilcox
@ 2008-04-13 14:55           ` Bart Van Assche
  2008-04-14 17:12             ` API documentation (was [PATCH] Replace completions with semaphores) Jonathan Corbet
  1 sibling, 1 reply; 64+ messages in thread
From: Bart Van Assche @ 2008-04-13 14:55 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Matthew Wilcox, Roland Dreier, Peter Zijlstra, Ingo Oeser,
	Daniel Walker, linux-kernel, Linus Torvalds

On Sun, Apr 13, 2008 at 9:08 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
>  but semaphores will be _removed_, _completely_. Rewriting them in
>  generic C code is just the first step towards that - it consolidates all
>  the myriads of semaphore implementations that Linux has spread out.
>
>  your proposed change to change completions to semaphores is totally
>  backwards and prolongs an API we want to get rid of. Did you miss this
>  aspect of the mutex rewrite, of the semaphore-to-mutex,
>  semaphore-to-completions and semaphore-to-rwsem conversions?

I know the semaphore-to-mutex rewrites were essential a.o. to make
further merging of the real-time tree into the mainstream Linux kernel
possible. But the semaphore concept is more powerful than the mutex
concept: semaphores can be used to let one thread wait for a state
change reported by another thread. Should all state-change-waiting be
implemented via wait_event*() functions ?

One of the strengths of the Linux kernel is that the barrier for new
developers is low: in theory anyone familiar with the C programming
language can start writing kernel drivers. Most people still learn
kernel development via the third edition of the "Linux Device Drivers"
book, and with some luck or some help, they come across an overview of
the 2.6 kernel API changes (http://lwn.net/Articles/2.6-kernel-api/).
The LWN book is getting outdated after all the 2.6 kernel API changes,
and the page with 2.6 kernel API changes was last updated six months
ago. Where can a kernel developer find up to date information about
kernel programming ?

Bart.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-13 12:57           ` Matthew Wilcox
@ 2008-04-14 15:39             ` Ingo Molnar
  2008-04-14 15:58               ` Roland Dreier
  0 siblings, 1 reply; 64+ messages in thread
From: Ingo Molnar @ 2008-04-14 15:39 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Roland Dreier, Peter Zijlstra, Ingo Oeser, Daniel Walker,
	linux-kernel, Linus Torvalds


* Matthew Wilcox <matthew@wil.cx> wrote:

> On Sun, Apr 13, 2008 at 09:08:33AM +0200, Ingo Molnar wrote:
> > but semaphores will be _removed_, _completely_. Rewriting them in 
> > generic C code is just the first step towards that - it consolidates all 
> > the myriads of semaphore implementations that Linux has spread out.
> 
> I think you're dreaming. [...]

no, i'm hoping for a simpler and thus more maintainable kernel.

> [...]  We have 11 places in the tree that currently demand to use a 
> counting semaphore (with 'n' plausibly greater than 1). [...]

which ones exactly are these places that demand the use of a counting 
semaphore? I cannot think of a single place where it's the best choice, 
let alone one where it's the only choice.

And if there's just 5 isolated places in the kernel that would need a 
different locking primitive then i say that it's probably not worth it. 
Locking is at the center of every data structure and the clarity and 
simplicity of basic data structure operations is paramount IMHO. 

Semaphores are i think a relic from the days when people thought that 
parallelism can be best achieved via complexity and rafinery of locking 
primitives - regardless of how usable and debuggable those APIs end up 
to be. Today we can go extremely parallel with very simple and robust 
primitives.

> [...] If we remove the facility, these places are going to invent 
> their own weird locking schemes.

i'm not worried about weird locking anymore - lockdep tends to expose 
them rather nicely and the lack of lockdep coverage for open-coded 
locking schemes tends to be a barrier as well. The past 2 years have 
shown that people want lockdep coverage for just about everything where 
it can be applied: workqueues, special locks, etc. (there's even 
questions about whether it could be extended to cover user-space locks.)

	Ingo

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-13 12:52     ` Matthew Wilcox
@ 2008-04-14 15:41       ` Ingo Molnar
  2008-04-14 17:46         ` Matthew Wilcox
  2008-04-14 16:54       ` Jens Axboe
  1 sibling, 1 reply; 64+ messages in thread
From: Ingo Molnar @ 2008-04-14 15:41 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Peter Zijlstra, linux-kernel, Linus Torvalds


* Matthew Wilcox <matthew@wil.cx> wrote:

> On Sun, Apr 13, 2008 at 09:05:13AM +0200, Ingo Molnar wrote:
> > there's also another aspect: completions are faster a bit 
> > in theory, because they know that they will schedule most of the time - 
> > while semaphores assume that they will _not_ schedule. (And that's 
> > exactly because the intent of the developer when using a completion is 
> > crystal clear.)
> 
> In practice though, the current implementation is slower. [...]

any URL to benchmarks?

> [...] Of course, that's fixable, and I strongly suspect that the 
> current users of completions simply don't care about speed -- the 
> normal use of completions is in startup and shutdown paths where a 
> millisecond extra isn't going to be noticable.

completions and semaphores act in the sub-microsecond range, not in the 
milliseconds range.

	Ingo

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-14 15:39             ` Ingo Molnar
@ 2008-04-14 15:58               ` Roland Dreier
  2008-04-14 16:32                 ` Peter Zijlstra
  0 siblings, 1 reply; 64+ messages in thread
From: Roland Dreier @ 2008-04-14 15:58 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Matthew Wilcox, Peter Zijlstra, Ingo Oeser, Daniel Walker,
	linux-kernel, Linus Torvalds

 > which ones exactly are these places that demand the use of a counting 
 > semaphore? I cannot think of a single place where it's the best choice, 
 > let alone one where it's the only choice.

Two of the places that use semaphores are drivers/infiniband/hw/mthca
and drivers/net/mlx4 -- in both cases, the device firmware allows up to
"N" outstanding firmware commands to be in flight, and the driver uses a
semaphore to handle issuing firmware commands.  That is, down() when we
want to issue a command, and up() when the firmware responds that the
command is complete.

What would you suggest as a better way to code this?  This is an honest
question -- there probably is a more elegant way to handle this
situation and I really would like to learn about it.

Also, the argument that removing semaphores makes the kernel as a whole
better does make sense to me; I wouldn't be opposed to basically
open-coding semaphores in terms of wait_event() in the driver or
something like that, but I wouldn't say that such an implementation is
locally more readable or maintainable if we look only at the driver
code.

 - R.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-14 15:58               ` Roland Dreier
@ 2008-04-14 16:32                 ` Peter Zijlstra
  2008-04-14 16:56                   ` Arjan van de Ven
  2008-04-14 17:46                   ` Andi Kleen
  0 siblings, 2 replies; 64+ messages in thread
From: Peter Zijlstra @ 2008-04-14 16:32 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Ingo Molnar, Matthew Wilcox, Ingo Oeser, Daniel Walker,
	linux-kernel, Linus Torvalds

On Mon, 2008-04-14 at 08:58 -0700, Roland Dreier wrote:
> > which ones exactly are these places that demand the use of a counting 
>  > semaphore? I cannot think of a single place where it's the best choice, 
>  > let alone one where it's the only choice.
> 
> Two of the places that use semaphores are drivers/infiniband/hw/mthca
> and drivers/net/mlx4 -- in both cases, the device firmware allows up to
> "N" outstanding firmware commands to be in flight, and the driver uses a
> semaphore to handle issuing firmware commands.  That is, down() when we
> want to issue a command, and up() when the firmware responds that the
> command is complete.
> 
> What would you suggest as a better way to code this?  This is an honest
> question -- there probably is a more elegant way to handle this
> situation and I really would like to learn about it.
> 
> Also, the argument that removing semaphores makes the kernel as a whole
> better does make sense to me; I wouldn't be opposed to basically
> open-coding semaphores in terms of wait_event() in the driver or
> something like that, but I wouldn't say that such an implementation is
> locally more readable or maintainable if we look only at the driver
> code.

Yeah, I would open code it.  But this is indeed a sane usage of the
counting semaphore because there is no priority inversion.


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-13 12:52     ` Matthew Wilcox
  2008-04-14 15:41       ` Ingo Molnar
@ 2008-04-14 16:54       ` Jens Axboe
  1 sibling, 0 replies; 64+ messages in thread
From: Jens Axboe @ 2008-04-14 16:54 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Ingo Molnar, Peter Zijlstra, linux-kernel, Linus Torvalds

On Sun, Apr 13 2008, Matthew Wilcox wrote:
> On Sun, Apr 13, 2008 at 09:05:13AM +0200, Ingo Molnar wrote:
> > there's also another aspect: completions are faster a bit 
> > in theory, because they know that they will schedule most of the time - 
> > while semaphores assume that they will _not_ schedule. (And that's 
> > exactly because the intent of the developer when using a completion is 
> > crystal clear.)
> 
> In practice though, the current implementation is slower.  Of course,
> that's fixable, and I strongly suspect that the current users of
> completions simply don't care about speed -- the normal use of
> completions is in startup and shutdown paths where a millisecond extra
> isn't going to be noticable.

I'd be surprised if it was in that range, milisecond range would
definetely mean that something was completely bused in that area :-).
IOW, I'd be surprised if you can measure much of a difference, even in
microbencmarks.

And performance does matter somewhat, it's not ONLY used in
startup/shutdown scenarios. The block layer uses it for sync requests,
for instance.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-14 16:32                 ` Peter Zijlstra
@ 2008-04-14 16:56                   ` Arjan van de Ven
  2008-04-14 17:50                     ` Matthew Wilcox
  2008-04-14 17:46                   ` Andi Kleen
  1 sibling, 1 reply; 64+ messages in thread
From: Arjan van de Ven @ 2008-04-14 16:56 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Roland Dreier, Ingo Molnar, Matthew Wilcox, Ingo Oeser,
	Daniel Walker, linux-kernel, Linus Torvalds

On Mon, 14 Apr 2008 18:32:28 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> On Mon, 2008-04-14 at 08:58 -0700, Roland Dreier wrote:
> > > which ones exactly are these places that demand the use of a
> > > counting 
> >  > semaphore? I cannot think of a single place where it's the best
> >  > choice, let alone one where it's the only choice.
> > 
> > Two of the places that use semaphores are
> > drivers/infiniband/hw/mthca and drivers/net/mlx4 -- in both cases,
> > the device firmware allows up to "N" outstanding firmware commands
> > to be in flight, and the driver uses a semaphore to handle issuing
> > firmware commands.  That is, down() when we want to issue a
> > command, and up() when the firmware responds that the command is
> > complete.
> > 
> > What would you suggest as a better way to code this?  This is an
> > honest question -- there probably is a more elegant way to handle
> > this situation and I really would like to learn about it.
> > 
> > Also, the argument that removing semaphores makes the kernel as a
> > whole better does make sense to me; I wouldn't be opposed to
> > basically open-coding semaphores in terms of wait_event() in the
> > driver or something like that, but I wouldn't say that such an
> > implementation is locally more readable or maintainable if we look
> > only at the driver code.
> 
> Yeah, I would open code it.  But this is indeed a sane usage of the
> counting semaphore because there is no priority inversion.

Maybe we need a "counter" primitive instead?
>From a conceptual point of view that even makes sense

(the implementation can be pretty much the current semaphore one of course)

-- 
If you want to reach me at my work email, use arjan@linux.intel.com
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: API documentation (was [PATCH] Replace completions with semaphores)
  2008-04-13 14:55           ` Bart Van Assche
@ 2008-04-14 17:12             ` Jonathan Corbet
  2008-04-14 17:33               ` Peter Zijlstra
  0 siblings, 1 reply; 64+ messages in thread
From: Jonathan Corbet @ 2008-04-14 17:12 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Matthew Wilcox, Roland Dreier, Peter Zijlstra, Ingo Oeser,
	Daniel Walker, linux-kernel, Linus Torvalds, Ingo Molnar

Bart Van Assche <bart.vanassche@gmail.com> wrote:

> The LWN book is getting outdated after all the 2.6 kernel API changes,
> and the page with 2.6 kernel API changes was last updated six months
> ago. Where can a kernel developer find up to date information about
> kernel programming ?

The failure to update the API changes page is just me not managing to
get around to it.  I'll do my best to take care of that in the next few
days.  Apologies for that.

Updating LDD (which isn't really "the LWN book" though it's hosted here)
will take a little longer.  I'd like to find a way to produce an LDD4
with quality at least as good as LDD3, but which doesn't fill the world
with immediately-obsolete bricks of dead trees.  Still working on it...

jon

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: API documentation (was [PATCH] Replace completions with semaphores)
  2008-04-14 17:12             ` API documentation (was [PATCH] Replace completions with semaphores) Jonathan Corbet
@ 2008-04-14 17:33               ` Peter Zijlstra
  2008-04-14 18:38                 ` Bart Van Assche
  0 siblings, 1 reply; 64+ messages in thread
From: Peter Zijlstra @ 2008-04-14 17:33 UTC (permalink / raw)
  To: Jonathan Corbet
  Cc: Bart Van Assche, Matthew Wilcox, Roland Dreier, Ingo Oeser,
	Daniel Walker, linux-kernel, Linus Torvalds, Ingo Molnar

On Mon, 2008-04-14 at 11:12 -0600, Jonathan Corbet wrote:
> Bart Van Assche <bart.vanassche@gmail.com> wrote:
> 
> > The LWN book is getting outdated after all the 2.6 kernel API changes,
> > and the page with 2.6 kernel API changes was last updated six months
> > ago. Where can a kernel developer find up to date information about
> > kernel programming ?
> 
> The failure to update the API changes page is just me not managing to
> get around to it.  I'll do my best to take care of that in the next few
> days.  Apologies for that.
> 
> Updating LDD (which isn't really "the LWN book" though it's hosted here)
> will take a little longer.  I'd like to find a way to produce an LDD4
> with quality at least as good as LDD3, but which doesn't fill the world
> with immediately-obsolete bricks of dead trees.  Still working on it...

Books should only be used to obtain the general picture, any details
will be instantly-obsolete, esp at the pace Linux changes.

Most of the concepts from LDD3 are still valid, many of the details are
dead wrong.

Can't we make LDD4 a high level book, explcitly mentioning how people
should go about obtaining details? Like go ask on #kernelnewbies and the
sorts.

The thing I always tell #kernelnewbies people is to look at a related
driver (of course that kite doesn't always fly). Another good way to
learn stuff is to just read the implementation.

A 'trick' that is often useful is to look in git to see how something
was changed, provided you knew how to do it some time in the past.




^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-14 16:32                 ` Peter Zijlstra
  2008-04-14 16:56                   ` Arjan van de Ven
@ 2008-04-14 17:46                   ` Andi Kleen
  2008-04-14 17:54                     ` Peter Zijlstra
  1 sibling, 1 reply; 64+ messages in thread
From: Andi Kleen @ 2008-04-14 17:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Roland Dreier, Ingo Molnar, Matthew Wilcox, Ingo Oeser,
	Daniel Walker, linux-kernel, Linus Torvalds

Peter Zijlstra <peterz@infradead.org> writes:
>
> Yeah, I would open code it.  But this is indeed a sane usage of the
> counting semaphore because there is no priority inversion.

But when you open code that, how is it different from just having
semaphores? 

-Andi

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-14 15:41       ` Ingo Molnar
@ 2008-04-14 17:46         ` Matthew Wilcox
  0 siblings, 0 replies; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-14 17:46 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Peter Zijlstra, linux-kernel, Linus Torvalds

On Mon, Apr 14, 2008 at 05:41:29PM +0200, Ingo Molnar wrote:
> * Matthew Wilcox <matthew@wil.cx> wrote:
> > On Sun, Apr 13, 2008 at 09:05:13AM +0200, Ingo Molnar wrote:
> > > there's also another aspect: completions are faster a bit 
> > > in theory, because they know that they will schedule most of the time - 
> > > while semaphores assume that they will _not_ schedule. (And that's 
> > > exactly because the intent of the developer when using a completion is 
> > > crystal clear.)
> > 
> > In practice though, the current implementation is slower. [...]
> 
> any URL to benchmarks?

No -- just reading the code.

> > [...] Of course, that's fixable, and I strongly suspect that the 
> > current users of completions simply don't care about speed -- the 
> > normal use of completions is in startup and shutdown paths where a 
> > millisecond extra isn't going to be noticable.
> 
> completions and semaphores act in the sub-microsecond range, not in the 
> milliseconds range.

I've clearly misled both you and Jens, for which I apologise.  My point
was "even if they were a millisecond slower, nobody would notice",
rather than "I've measured it and they're a millisecond slower" or even
"from eyeballing it, I estimate they're about a millisecond slower".

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-14 16:56                   ` Arjan van de Ven
@ 2008-04-14 17:50                     ` Matthew Wilcox
  0 siblings, 0 replies; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-14 17:50 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Peter Zijlstra, Roland Dreier, Ingo Molnar, Ingo Oeser,
	Daniel Walker, linux-kernel, Linus Torvalds

On Mon, Apr 14, 2008 at 09:56:38AM -0700, Arjan van de Ven wrote:
> On Mon, 14 Apr 2008 18:32:28 +0200
> Peter Zijlstra <peterz@infradead.org> wrote:
> > Yeah, I would open code it.  But this is indeed a sane usage of the
> > counting semaphore because there is no priority inversion.
> 
> Maybe we need a "counter" primitive instead?
> From a conceptual point of view that even makes sense
> 
> (the implementation can be pretty much the current semaphore one of course)

I'm only too happy to rename semaphore.c to atomic_counter.c and do
appropriate renames.  Or maybe 'kcounter' would be more in vogue for a
name ;-)  I'd like a name that implies sleeping, but I'm not able to
think of one right now.

Then semaphores and completions each become wrappers around counters
and everybody's happy.  Right?

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-14 17:46                   ` Andi Kleen
@ 2008-04-14 17:54                     ` Peter Zijlstra
  2008-04-14 18:09                       ` Daniel Walker
                                         ` (2 more replies)
  0 siblings, 3 replies; 64+ messages in thread
From: Peter Zijlstra @ 2008-04-14 17:54 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Roland Dreier, Ingo Molnar, Matthew Wilcox, Ingo Oeser,
	Daniel Walker, linux-kernel, Linus Torvalds

On Mon, 2008-04-14 at 19:46 +0200, Andi Kleen wrote:
> Peter Zijlstra <peterz@infradead.org> writes:
> >
> > Yeah, I would open code it.  But this is indeed a sane usage of the
> > counting semaphore because there is no priority inversion.
> 
> But when you open code that, how is it different from just having
> semaphores? 

Because we can then eventually get rid of semaphores, so those people
cannot mistakenly use them. Its just too easy to create prio inversion
with them around.


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-14 17:54                     ` Peter Zijlstra
@ 2008-04-14 18:09                       ` Daniel Walker
  2008-04-14 19:16                       ` Andi Kleen
  2008-04-14 19:16                       ` Alan Cox
  2 siblings, 0 replies; 64+ messages in thread
From: Daniel Walker @ 2008-04-14 18:09 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, Roland Dreier, Ingo Molnar, Matthew Wilcox,
	Ingo Oeser, linux-kernel, Linus Torvalds


On Mon, 2008-04-14 at 19:54 +0200, Peter Zijlstra wrote:
> On Mon, 2008-04-14 at 19:46 +0200, Andi Kleen wrote:
> > Peter Zijlstra <peterz@infradead.org> writes:
> > >
> > > Yeah, I would open code it.  But this is indeed a sane usage of the
> > > counting semaphore because there is no priority inversion.
> > 
> > But when you open code that, how is it different from just having
> > semaphores? 
> 
> Because we can then eventually get rid of semaphores, so those people
> cannot mistakenly use them. Its just too easy to create prio inversion
> with them around.

I'm not for open coding anything .. This "writes in flight" type code
happens often enough we should have something that covers it..
completions come fairly close (since they're just like locked
semaphores) only it's not easy to initialize them to allow multiple
completes before the waiting starts..

Daniel


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: API documentation (was [PATCH] Replace completions with semaphores)
  2008-04-14 17:33               ` Peter Zijlstra
@ 2008-04-14 18:38                 ` Bart Van Assche
  0 siblings, 0 replies; 64+ messages in thread
From: Bart Van Assche @ 2008-04-14 18:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jonathan Corbet, Matthew Wilcox, Roland Dreier, Ingo Oeser,
	Daniel Walker, linux-kernel, Linus Torvalds, Ingo Molnar

On Mon, Apr 14, 2008 at 7:33 PM, Peter Zijlstra <peterz@infradead.org> wrote:
>  Books should only be used to obtain the general picture, any details
>  will be instantly-obsolete, esp at the pace Linux changes.
>
>  Most of the concepts from LDD3 are still valid, many of the details are
>  dead wrong.
>
>  Can't we make LDD4 a high level book, explcitly mentioning how people
>  should go about obtaining details? Like go ask on #kernelnewbies and the
>  sorts.
>
>  The thing I always tell #kernelnewbies people is to look at a related
>  driver (of course that kite doesn't always fly). Another good way to
>  learn stuff is to just read the implementation.
>
>  A 'trick' that is often useful is to look in git to see how something
>  was changed, provided you knew how to do it some time in the past.

What I remember from the first time I wrote a network driver is that
reading the chapter in LDD3 about network drivers was the fastest way
to get started. Learning how to write a network driver by reverse
engineering existing drivers would have required much more time. My
opinion is that mentioning all relevant details in a book makes sense.

Bart.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-14 17:54                     ` Peter Zijlstra
  2008-04-14 18:09                       ` Daniel Walker
@ 2008-04-14 19:16                       ` Andi Kleen
  2008-04-15  6:18                         ` Bart Van Assche
  2008-04-14 19:16                       ` Alan Cox
  2 siblings, 1 reply; 64+ messages in thread
From: Andi Kleen @ 2008-04-14 19:16 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Roland Dreier, Ingo Molnar, Matthew Wilcox, Ingo Oeser,
	Daniel Walker, linux-kernel, Linus Torvalds

Peter Zijlstra wrote:
> On Mon, 2008-04-14 at 19:46 +0200, Andi Kleen wrote:
>   
>> Peter Zijlstra <peterz@infradead.org> writes:
>>     
>>> Yeah, I would open code it.  But this is indeed a sane usage of the
>>> counting semaphore because there is no priority inversion.
>>>       
>> But when you open code that, how is it different from just having
>> semaphores? 
>>     
>
> Because we can then eventually get rid of semaphores, so those people
> cannot mistakenly use them. Its just too easy to create prio inversion
> with them around.
>   

But then you end up with lots of likely subtly buggy open coded
implementations. Also not good.

For me it sounds like you just want to rename semaphores to some other
name that people don't use them for normal locking?

-Andi




^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-14 17:54                     ` Peter Zijlstra
  2008-04-14 18:09                       ` Daniel Walker
  2008-04-14 19:16                       ` Andi Kleen
@ 2008-04-14 19:16                       ` Alan Cox
  2 siblings, 0 replies; 64+ messages in thread
From: Alan Cox @ 2008-04-14 19:16 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, Roland Dreier, Ingo Molnar, Matthew Wilcox,
	Ingo Oeser, Daniel Walker, linux-kernel, Linus Torvalds


> Because we can then eventually get rid of semaphores, so those people
> cannot mistakenly use them. Its just too easy to create prio inversion
> with them around.

And it'll be far harder to even find the priority inversion problems in
the open coded alternatives.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-14 19:16                       ` Andi Kleen
@ 2008-04-15  6:18                         ` Bart Van Assche
  2008-04-15  6:46                           ` Peter Zijlstra
  0 siblings, 1 reply; 64+ messages in thread
From: Bart Van Assche @ 2008-04-15  6:18 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Peter Zijlstra, Roland Dreier, Ingo Molnar, Matthew Wilcox,
	Ingo Oeser, Daniel Walker, linux-kernel, Linus Torvalds

On Mon, Apr 14, 2008 at 9:16 PM, Andi Kleen <andi@firstfloor.org> wrote:
>  For me it sounds like you just want to rename semaphores to some other
>  name that people don't use them for normal locking?

Would it really be a good idea to give a synchronization concept that
behaves exactly like a semaphore another name than "semaphore" ? The
semaphore concept is well known and is taught in every computer
science course.

Bart.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15  6:18                         ` Bart Van Assche
@ 2008-04-15  6:46                           ` Peter Zijlstra
  2008-04-15  7:17                             ` Bart Van Assche
  0 siblings, 1 reply; 64+ messages in thread
From: Peter Zijlstra @ 2008-04-15  6:46 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Andi Kleen, Roland Dreier, Ingo Molnar, Matthew Wilcox,
	Ingo Oeser, Daniel Walker, linux-kernel, Linus Torvalds

On Tue, 2008-04-15 at 08:18 +0200, Bart Van Assche wrote:
> On Mon, Apr 14, 2008 at 9:16 PM, Andi Kleen <andi@firstfloor.org> wrote:
> >  For me it sounds like you just want to rename semaphores to some other
> >  name that people don't use them for normal locking?
> 
> Would it really be a good idea to give a synchronization concept that
> behaves exactly like a semaphore another name than "semaphore" ? The
> semaphore concept is well known and is taught in every computer
> science course.

Are the ramifications wrt priority inversion taught? Is it made clear
that its hard to validate because there is no clear resource owner?

Afaik, non of these subjects are touched upon in the CS-101 courses and
that is exactly the problem. So you can say they are not well know, they
are just widely misunderstood.

And yes, if there are more hand a very few such users it doesn't make
sense to keep them open coded.


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15  6:46                           ` Peter Zijlstra
@ 2008-04-15  7:17                             ` Bart Van Assche
  2008-04-15  8:44                               ` Peter Zijlstra
  0 siblings, 1 reply; 64+ messages in thread
From: Bart Van Assche @ 2008-04-15  7:17 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, Roland Dreier, Ingo Molnar, Matthew Wilcox,
	Ingo Oeser, Daniel Walker, linux-kernel, Linus Torvalds

On Tue, Apr 15, 2008 at 8:46 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, 2008-04-15 at 08:18 +0200, Bart Van Assche wrote:
>  > Would it really be a good idea to give a synchronization concept that
>  > behaves exactly like a semaphore another name than "semaphore" ? The
>  > semaphore concept is well known and is taught in every computer
>  > science course.
>
>  Are the ramifications wrt priority inversion taught? Is it made clear
>  that its hard to validate because there is no clear resource owner?
>
>  Afaik, non of these subjects are touched upon in the CS-101 courses and
>  that is exactly the problem. So you can say they are not well know, they
>  are just widely misunderstood.
>
>  And yes, if there are more hand a very few such users it doesn't make
>  sense to keep them open coded.

Regarding semaphores and priority inversion: I have never recommended
the use of semaphores over mutexes, all I recommended is to keep the
name "semaphore" for something that behaves like a semaphore. There
might be better ways to discourage the use of the semaphore API, e.g.
letting the compiler print a warning every time a semaphore function
is called unless one or another #define has been enabled.

Regarding priority inheritance: does the above mean that you consider
priority inheritance as an optimal solution for realizing real-time
behavior in the kernel ? Are you aware of the fundamental problems
associated with priority inheritance ? These issues are well explained
in Victor Yodaiken's paper "Against priority inheritance". See also
http://www.linuxdevices.com/files/misc/yodaiken-july02.pdf .

Bart.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15  7:17                             ` Bart Van Assche
@ 2008-04-15  8:44                               ` Peter Zijlstra
  2008-04-15 13:15                                 ` Bart Van Assche
  2008-04-15 16:09                                 ` Linus Torvalds
  0 siblings, 2 replies; 64+ messages in thread
From: Peter Zijlstra @ 2008-04-15  8:44 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Andi Kleen, Roland Dreier, Ingo Molnar, Matthew Wilcox,
	Ingo Oeser, Daniel Walker, linux-kernel, Linus Torvalds

On Tue, 2008-04-15 at 09:17 +0200, Bart Van Assche wrote:
> On Tue, Apr 15, 2008 at 8:46 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Tue, 2008-04-15 at 08:18 +0200, Bart Van Assche wrote:
> >  > Would it really be a good idea to give a synchronization concept that
> >  > behaves exactly like a semaphore another name than "semaphore" ? The
> >  > semaphore concept is well known and is taught in every computer
> >  > science course.
> >
> >  Are the ramifications wrt priority inversion taught? Is it made clear
> >  that its hard to validate because there is no clear resource owner?
> >
> >  Afaik, non of these subjects are touched upon in the CS-101 courses and
> >  that is exactly the problem. So you can say they are not well know, they
> >  are just widely misunderstood.
> >
> >  And yes, if there are more hand a very few such users it doesn't make
> >  sense to keep them open coded.
> 
> Regarding semaphores and priority inversion: I have never recommended
> the use of semaphores over mutexes, all I recommended is to keep the
> name "semaphore" for something that behaves like a semaphore. There
> might be better ways to discourage the use of the semaphore API, e.g.
> letting the compiler print a warning every time a semaphore function
> is called unless one or another #define has been enabled.

That sounds horrible; I really prefer targeted replacements like
completions that make it clear what they're supposed to be used for.

> Regarding priority inheritance: does the above mean that you consider
> priority inheritance as an optimal solution for realizing real-time
> behavior in the kernel ? Are you aware of the fundamental problems
> associated with priority inheritance ? These issues are well explained
> in Victor Yodaiken's paper "Against priority inheritance". See also
> http://www.linuxdevices.com/files/misc/yodaiken-july02.pdf .

Priority inheritance isn't ideal, but comming from a general purpose
kernel that wasn't build from scratch to accomodate hard realtime its
basically the only option.

Also things like lockdep are a real help to a lot of developers - loads
of locking bugs never make it into the kernel because of it.


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15  8:44                               ` Peter Zijlstra
@ 2008-04-15 13:15                                 ` Bart Van Assche
  2008-04-15 16:09                                 ` Linus Torvalds
  1 sibling, 0 replies; 64+ messages in thread
From: Bart Van Assche @ 2008-04-15 13:15 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, Roland Dreier, Ingo Molnar, Matthew Wilcox,
	Ingo Oeser, Daniel Walker, linux-kernel, Linus Torvalds

On Tue, Apr 15, 2008 at 10:44 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>  Also things like lockdep are a real help to a lot of developers - loads
>  of locking bugs never make it into the kernel because of it.

I agree that lockdep is a real help for kernel developers. But please
keep in mind that locking order checking has its limitations. While
locking order checking can detect certain very important classes of
deadlocks, it can't detect all classes of possible deadlocks. Consider
e.g. the example mentioned by Roland Dreier, where semaphores are used
to count the number of elements in a queue. Deadlocks triggered by
waiting for a certain semaphore value can't be detected by
lockdep-style algorithms. And renaming semaphores into something else
won't help.

Bart.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15  8:44                               ` Peter Zijlstra
  2008-04-15 13:15                                 ` Bart Van Assche
@ 2008-04-15 16:09                                 ` Linus Torvalds
  2008-04-15 16:27                                   ` Andi Kleen
  1 sibling, 1 reply; 64+ messages in thread
From: Linus Torvalds @ 2008-04-15 16:09 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Bart Van Assche, Andi Kleen, Roland Dreier, Ingo Molnar,
	Matthew Wilcox, Ingo Oeser, Daniel Walker, linux-kernel



On Tue, 15 Apr 2008, Peter Zijlstra wrote:
> 
> That sounds horrible; I really prefer targeted replacements like
> completions that make it clear what they're supposed to be used for.

I agree. I'd rather _keep_ the completions, and get rid of the semaphores, 
so I think the whole fundamental patch here under discussion is broken.

We should do:
 - continue replacing semaphores that are used purely for mutual exclusion 
   with mutexes.
 - probably add support for completions to do counting
 - and then eventually just do the _reverse_ of what the current patch 
   does, namely replace the existing counting semaphore usage with 
   completions.

Hmm?

		Linus

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15 16:09                                 ` Linus Torvalds
@ 2008-04-15 16:27                                   ` Andi Kleen
  2008-04-15 16:57                                     ` Linus Torvalds
  0 siblings, 1 reply; 64+ messages in thread
From: Andi Kleen @ 2008-04-15 16:27 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Peter Zijlstra, Bart Van Assche, Roland Dreier, Ingo Molnar,
	Matthew Wilcox, Ingo Oeser, Daniel Walker, linux-kernel


>  - probably add support for completions to do counting

But that's just a semaphore, isn't it?

-Andi

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15 16:27                                   ` Andi Kleen
@ 2008-04-15 16:57                                     ` Linus Torvalds
  2008-04-15 17:05                                       ` Ingo Molnar
  2008-04-15 17:15                                       ` [PATCH] Replace completions with semaphores Andi Kleen
  0 siblings, 2 replies; 64+ messages in thread
From: Linus Torvalds @ 2008-04-15 16:57 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Peter Zijlstra, Bart Van Assche, Roland Dreier, Ingo Molnar,
	Matthew Wilcox, Ingo Oeser, Daniel Walker, linux-kernel



On Tue, 15 Apr 2008, Andi Kleen wrote:
> 
> >  - probably add support for completions to do counting
> 
> But that's just a semaphore, isn't it?

Exactly. But the point here is:

 - nobody should use semaphores anyway (use mutexes)
 - making *more* code use semaphores is wrong
 - completions have a different _mental_ model

IOW, this is not about implementation issues. It's about how you think 
about the operations.

We should _not_ implement completions as semaphores, simply because we 
want to get *rid* of semaphores some day. 

So rather than this long and involved patch series that first makes 
semaphores generic, and then makes them be used as completions, I'd much 
rather just skip this whole pointless exercise entirely. 

Why have "generic semaphores" at all, if we want to get rid of them? 

		Linus

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15 16:57                                     ` Linus Torvalds
@ 2008-04-15 17:05                                       ` Ingo Molnar
  2008-04-15 18:50                                         ` Matthew Wilcox
  2008-04-15 17:15                                       ` [PATCH] Replace completions with semaphores Andi Kleen
  1 sibling, 1 reply; 64+ messages in thread
From: Ingo Molnar @ 2008-04-15 17:05 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andi Kleen, Peter Zijlstra, Bart Van Assche, Roland Dreier,
	Matthew Wilcox, Ingo Oeser, Daniel Walker, linux-kernel


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> Why have "generic semaphores" at all, if we want to get rid of them?

i very much agree with the "get rid of semaphores" argument - the reason 
why i initially supported the "move to generic semaphores" step was 
because i saw it basically as the precursor to full removal: it is the 
removal of semaphores from all architectures - with a small generic 
compatibility wrapper to handle the remaining few uses of semaphores.

i got thoroughly surprised by the "increase the scope of semaphores" 
angle to the patchset though, and in hindsight i'd rather see neither of 
those generalizations and see semaphores die a slow but sure natural 
death than to see their prolongation :-/

perhaps the 'remove all semaphores from all architectures' privilege 
should be a final step kept for the lucky guy who manages to get rid of 
the last remaining semaphore use in the kernel? :)

	Ingo

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15 16:57                                     ` Linus Torvalds
  2008-04-15 17:05                                       ` Ingo Molnar
@ 2008-04-15 17:15                                       ` Andi Kleen
  2008-04-15 17:26                                         ` Linus Torvalds
  1 sibling, 1 reply; 64+ messages in thread
From: Andi Kleen @ 2008-04-15 17:15 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Peter Zijlstra, Bart Van Assche, Roland Dreier, Ingo Molnar,
	Matthew Wilcox, Ingo Oeser, Daniel Walker, linux-kernel

Linus Torvalds wrote:
> 
> On Tue, 15 Apr 2008, Andi Kleen wrote:
>>>  - probably add support for completions to do counting
>> But that's just a semaphore, isn't it?
> 
> Exactly. But the point here is:
> 
>  - nobody should use semaphores anyway (use mutexes)

For normal locks. But if you have N number of outstanding
events you need to wait for the semaphore is the right primitive.

And it seems there is a not high but non trivial number
of places in the kernel who have a legitimate need for this.

>  - making *more* code use semaphores is wrong
>  - completions have a different _mental_ model
>
> IOW, this is not about implementation issues. It's about how you think 
> about the operations.

Ok so you just want to rename it.

Fine for me. I always found up() and down() unintuitive anyways
(but it's admittedly better than "P" and "V" which some other systems use)

> We should _not_ implement completions as semaphores, simply because we 
> want to get *rid* of semaphores some day. 
> 
> So rather than this long and involved patch series that first makes 
> semaphores generic, and then makes them be used as completions, I'd much 
> rather just skip this whole pointless exercise entirely. 
> 
> Why have "generic semaphores" at all, if we want to get rid of them? 

Because we still "counted completions" for some things and that's the
same code?

Rather i suspect the real problem is not the name, but just not sure
it gets abused. That is largely more a review problem and as far as I
can figure out basically all the usual reviewers take care of that
anyways. But renaming it also probably wouldn't hurt.

[IMHO I always thought we should have a maintained single "list of
things for reviewers to watch out for" list somewhere]

-Andi


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15 17:15                                       ` [PATCH] Replace completions with semaphores Andi Kleen
@ 2008-04-15 17:26                                         ` Linus Torvalds
  2008-04-15 17:41                                           ` Matthew Wilcox
  0 siblings, 1 reply; 64+ messages in thread
From: Linus Torvalds @ 2008-04-15 17:26 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Peter Zijlstra, Bart Van Assche, Roland Dreier, Ingo Molnar,
	Matthew Wilcox, Ingo Oeser, Daniel Walker, linux-kernel



On Tue, 15 Apr 2008, Andi Kleen wrote:
> 
> For normal locks. But if you have N number of outstanding
> events you need to wait for the semaphore is the right primitive.

Right. Except if you were to just use completions instead (which could be 
trivially extended to do that).

MUCH more trivial than this complex series.

(You may think that the "Replace completions with semaphores" patch is not 
very complicated, but it *is* - it depends very intimately on the big 
patch-series that basically turns semaphores into what completions are 
now!)

In other words, what makes me not like this is hat we first turn 
semaphores into the generic code (which is largely what completions were: 
just a special case of the generic semaphores!) and then turns completions 
into these things. That just doesn't make any sense to me!

			Linus

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15 17:26                                         ` Linus Torvalds
@ 2008-04-15 17:41                                           ` Matthew Wilcox
  2008-04-15 18:14                                             ` Linus Torvalds
  0 siblings, 1 reply; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-15 17:41 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andi Kleen, Peter Zijlstra, Bart Van Assche, Roland Dreier,
	Ingo Molnar, Ingo Oeser, Daniel Walker, linux-kernel

On Tue, Apr 15, 2008 at 10:26:21AM -0700, Linus Torvalds wrote:
> MUCH more trivial than this complex series.
> 
> (You may think that the "Replace completions with semaphores" patch is not 
> very complicated, but it *is* - it depends very intimately on the big 
> patch-series that basically turns semaphores into what completions are 
> now!)

complex?  big?  The big bits are dealing with renaming asm/semaphore.h to
linux/semaphore.h, and I've dropped those out now.  There's a couple of
up-front patches which add inclusions of asm/semaphore.h to files which
were missing it.  Then I add the new semaphore implementation, delete
the old ones, and add the down_timeout() and down_killable() functions.

> In other words, what makes me not like this is hat we first turn 
> semaphores into the generic code (which is largely what completions were: 
> just a special case of the generic semaphores!) and then turns completions 
> into these things. That just doesn't make any sense to me!

Blame me for not realising that completions were semaphores under a
different name.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15 17:41                                           ` Matthew Wilcox
@ 2008-04-15 18:14                                             ` Linus Torvalds
  2008-04-16 16:07                                               ` Ingo Oeser
  0 siblings, 1 reply; 64+ messages in thread
From: Linus Torvalds @ 2008-04-15 18:14 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andi Kleen, Peter Zijlstra, Bart Van Assche, Roland Dreier,
	Ingo Molnar, Ingo Oeser, Daniel Walker, linux-kernel



On Tue, 15 Apr 2008, Matthew Wilcox wrote:
> 
> > In other words, what makes me not like this is hat we first turn 
> > semaphores into the generic code (which is largely what completions were: 
> > just a special case of the generic semaphores!) and then turns completions 
> > into these things. That just doesn't make any sense to me!
> 
> Blame me for not realising that completions were semaphores under a
> different name.

The origin of completions is literally the semaphore code - just 
simplified to use spinlocks and be usable as just a mutex. We used to use 
semaphores, and because of the subtle race with lockless semaphores I 
wrote that stupid completion code as a "generic semaphore with a very 
specific usage schenario" and called them "completions".

The completions _could_ have been extended/used as mutex semaphores, but 
the difference was really the mental model for them. That then limited the 
implementation of them: the functions working on completions are defined 
on purpose to be limited - it doesn't really have "up()" and "down()" 
functions: "complete()" is really a up(), but "wait_for_completion()" is 
more like a "wait_until_I_could_do_a_trydown()" function.

Would it make sense to use completions for countable events too? Yeah. In 
fact, we have some things that really would like to do counting, both in 
the sense of "wait for <n> events to all complete" _and_ in the sense of 
"allow up to <n> events to be outstanding". Both of which could be done as 
a counting function (just make "complete" increment the counter, and then 
make "wait for <n> events" initialize it to negative, while "allow <n>
outstanding events" would be a positive counter, and make 
"wait_for_completion()" basically be a "decrement and wait until it 
is zero".

IOW, completions() really follow the same patterns as semaphores, and it 
*does* make sense to just have one single code-base. But if we want to 
make semaphores go away, I think that it would be better to implement 
semaphores in terms of "extended completions" rather than the other way 
around. That way, we could one day really get rid of semaphores entirely.

		Linus

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15 17:05                                       ` Ingo Molnar
@ 2008-04-15 18:50                                         ` Matthew Wilcox
  2008-04-16 12:37                                           ` Ingo Molnar
  0 siblings, 1 reply; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-15 18:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Andi Kleen, Peter Zijlstra, Bart Van Assche,
	Roland Dreier, Ingo Oeser, Daniel Walker, linux-kernel

On Tue, Apr 15, 2008 at 07:05:56PM +0200, Ingo Molnar wrote:
> i very much agree with the "get rid of semaphores" argument - the reason 
> why i initially supported the "move to generic semaphores" step was 
> because i saw it basically as the precursor to full removal: it is the 
> removal of semaphores from all architectures - with a small generic 
> compatibility wrapper to handle the remaining few uses of semaphores.

Hm.  I thought you initially supported it because it deleted so much
code.  I don't want to go and add down_killable() to each architecture
again, and you were pretty enthusiastic about adding down_killable().

> i got thoroughly surprised by the "increase the scope of semaphores" 
> angle to the patchset though, and in hindsight i'd rather see neither of 
> those generalizations and see semaphores die a slow but sure natural 
> death than to see their prolongation :-/

I'm fully in favour of reducing the number of semaphore users, and
eventually eliminating them.  Arjan and I discussed a way to do that
just now ... I'll write some code, see how it looks.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15 18:50                                         ` Matthew Wilcox
@ 2008-04-16 12:37                                           ` Ingo Molnar
  2008-04-16 12:50                                             ` Andi Kleen
  0 siblings, 1 reply; 64+ messages in thread
From: Ingo Molnar @ 2008-04-16 12:37 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Linus Torvalds, Andi Kleen, Peter Zijlstra, Bart Van Assche,
	Roland Dreier, Ingo Oeser, Daniel Walker, linux-kernel


* Matthew Wilcox <matthew@wil.cx> wrote:

> On Tue, Apr 15, 2008 at 07:05:56PM +0200, Ingo Molnar wrote:
>
> > i very much agree with the "get rid of semaphores" argument - the 
> > reason why i initially supported the "move to generic semaphores" 
> > step was because i saw it basically as the precursor to full 
> > removal: it is the removal of semaphores from all architectures - 
> > with a small generic compatibility wrapper to handle the remaining 
> > few uses of semaphores.
> 
> Hm.  I thought you initially supported it because it deleted so much 
> code. [...]

... sorry, but i always thought of semaphores to be removed completely.

> [...]  I don't want to go and add down_killable() to each architecture 
> again, and you were pretty enthusiastic about adding down_killable().

... the killable sleeps should and are already propagated everywhere - i 
never thought of them as a semaphore-only feature.

killable sleeps are probably the next best thing to true 
interruptability.

btw., has anyone thought about killable sync/fsync syscalls - would that 
surprise too many programs?

> > i got thoroughly surprised by the "increase the scope of semaphores" 
> > angle to the patchset though, and in hindsight i'd rather see 
> > neither of those generalizations and see semaphores die a slow but 
> > sure natural death than to see their prolongation :-/
> 
> I'm fully in favour of reducing the number of semaphore users, and 
> eventually eliminating them.  Arjan and I discussed a way to do that 
> just now ... I'll write some code, see how it looks.

cool!

	Ingo

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-16 12:37                                           ` Ingo Molnar
@ 2008-04-16 12:50                                             ` Andi Kleen
  2008-04-16 12:59                                               ` Killable stat/readdir Matthew Wilcox
  0 siblings, 1 reply; 64+ messages in thread
From: Andi Kleen @ 2008-04-16 12:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Matthew Wilcox, Linus Torvalds, Peter Zijlstra, Bart Van Assche,
	Roland Dreier, Ingo Oeser, Daniel Walker, linux-kernel


> btw., has anyone thought about killable sync/fsync syscalls - would that 
> surprise too many programs?

killable stat and readdir would be even more important I would say.

When sync/fsync takes too long it's typically just a kernel bug
of some sort (like that long running MM starvation issue that stalls
writes on some kinds of background activity) that should be really just
fixed.

But handling down network servers which hit stat/readdir etc. is
a real situation not explained by a bug.

For example a standard situation that hits me regularly is that
I save something in firefox on a different server and then later
turn that machine off. Then next time I try to save something
in firefox it first blocks forever in stat()ing that down directory.

-Andi


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Killable stat/readdir
  2008-04-16 12:50                                             ` Andi Kleen
@ 2008-04-16 12:59                                               ` Matthew Wilcox
  0 siblings, 0 replies; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-16 12:59 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, Linus Torvalds, Peter Zijlstra, Bart Van Assche,
	Roland Dreier, Ingo Oeser, Daniel Walker, linux-kernel

On Wed, Apr 16, 2008 at 02:50:34PM +0200, Andi Kleen wrote:
> killable stat and readdir would be even more important I would say.
> 
> When sync/fsync takes too long it's typically just a kernel bug
> of some sort (like that long running MM starvation issue that stalls
> writes on some kinds of background activity) that should be really just
> fixed.
> 
> But handling down network servers which hit stat/readdir etc. is
> a real situation not explained by a bug.
> 
> For example a standard situation that hits me regularly is that
> I save something in firefox on a different server and then later
> turn that machine off. Then next time I try to save something
> in firefox it first blocks forever in stat()ing that down directory.

I think that's fixed now.  Can you check with 2.6.25-rc1 or later?
If it's unkillable, can you let me know the wchan for that process?

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-15 18:14                                             ` Linus Torvalds
@ 2008-04-16 16:07                                               ` Ingo Oeser
  2008-04-16 16:16                                                 ` Matthew Wilcox
  0 siblings, 1 reply; 64+ messages in thread
From: Ingo Oeser @ 2008-04-16 16:07 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Matthew Wilcox, Andi Kleen, Peter Zijlstra, Bart Van Assche,
	Roland Dreier, Ingo Molnar, Daniel Walker, linux-kernel

On Tuesday 15 April 2008, Linus Torvalds wrote:
> Would it make sense to use completions for countable events too? Yeah. In 
> fact, we have some things that really would like to do counting, both in 
> the sense of "wait for <n> events to all complete" _and_ in the sense of 
> "allow up to <n> events to be outstanding". Both of which could be done as 
> a counting function (just make "complete" increment the counter, and then 
> make "wait for <n> events" initialize it to negative, while "allow <n>
> outstanding events" would be a positive counter, and make 
> "wait_for_completion()" basically be a "decrement and wait until it 

Hmm, why not introduce a provide_slots(), 
wait_for_slot() and free_slot() API?

provide_slots() :=  initialize a resource with N available slots
wait_for_slot() :=  wait for a slot in countable resource to be free
free_slot() :=  make a slot available

That would:
- fit the use case nicely
- could be optimized for countable resources
  with upper limits
- allow to measure slot utilization
- ...
- is not called semaphore :-)

And isn't this the same problem (called scheduling) as:
- keeping N cpus busy
- keeping N (disk) spindles busy
- keeping N nic transmit queues busy
- ...

???

But maybe I oversimplify here :-)


Best Regards

Ingo Oeser

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-16 16:07                                               ` Ingo Oeser
@ 2008-04-16 16:16                                                 ` Matthew Wilcox
  2008-04-16 16:31                                                   ` Oliver Neukum
  0 siblings, 1 reply; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-16 16:16 UTC (permalink / raw)
  To: Ingo Oeser
  Cc: Linus Torvalds, Andi Kleen, Peter Zijlstra, Bart Van Assche,
	Roland Dreier, Ingo Molnar, Daniel Walker, linux-kernel

On Wed, Apr 16, 2008 at 06:07:24PM +0200, Ingo Oeser wrote:
> Hmm, why not introduce a provide_slots(), 
> wait_for_slot() and free_slot() API?
> 
> provide_slots() :=  initialize a resource with N available slots
> wait_for_slot() :=  wait for a slot in countable resource to be free
> free_slot() :=  make a slot available
> 
> That would:
> - fit the use case nicely
> - could be optimized for countable resources
>   with upper limits
> - allow to measure slot utilization
> - ...
> - is not called semaphore :-)

The kcounter API I'm working on has something in common with this.  But
it adds the more important feature:

 - Is debuggable

I've also researched the current users of semaphores and completions and
the API has to be extended a few different ways to take account of some
reasonable usage patterns.

The basic idea is that you get back a cookie from the kcounter_claim()
which you have to hand to the kcounter_release() function so it
knows which one you released.  It's similar to mutex debugging except
that mutexes can use the thread_info pointer as an implicit cookie.
Completions and semaphores have usage patterns where the resource is
released from interrupt context.

I'm not finished yet, but this is the current API:

extern void kcounter_init(struct kcounter *kc, unsigned int count);

extern long __must_check kcounter_claim(struct kcounter *kc);
extern long __must_check kcounter_claim_interruptible(struct kcounter *kc);
extern long __must_check kcounter_claim_timeout(struct kcounter *kc,
                                                long jiffies);
extern long __must_check kcounter_claim_interruptible_timeout(
                                struct kcounter *kc, long jiffies);
extern long __must_check kcounter_try_claim(struct kcounter *kc);
extern int __must_check kcounter_has_resources(struct kcounter *kc);
extern void kcounter_release(struct kcounter *kc, long resource);

#ifdef CONFIG_DEBUG_KCOUNTER
extern void kcounter_add_resource(struct kcounter *kc);
extern int __must_check kcounter_remove_resource(struct kcounter *kc);
#else
#define kcounter_add_resource(kc)       kcounter_release(kc, 0)
#define kcounter_remove_resource(kc)    kcounter_claim(kc)
#endif
extern void kcounter_add_all_resources(struct kcounter *kc);

The kcounter_add_resource()/kcounter_remove_resource() API is for where
we're actually adding new slots rather than releasing an already-acquired
slot.  Without debugging they're the same thing, but conceptually,
they're a different thing.

> And isn't this the same problem (called scheduling) as:
> - keeping N cpus busy
> - keeping N (disk) spindles busy
> - keeping N nic transmit queues busy
> - ...

It's not nearly as complex.  Each of those uses domain-specific knowledge
to optimise the solution.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-16 16:16                                                 ` Matthew Wilcox
@ 2008-04-16 16:31                                                   ` Oliver Neukum
  2008-04-16 16:34                                                     ` Matthew Wilcox
  2008-04-16 16:50                                                     ` Arjan van de Ven
  0 siblings, 2 replies; 64+ messages in thread
From: Oliver Neukum @ 2008-04-16 16:31 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Ingo Oeser, Linus Torvalds, Andi Kleen, Peter Zijlstra,
	Bart Van Assche, Roland Dreier, Ingo Molnar, Daniel Walker,
	linux-kernel

Am Mittwoch, 16. April 2008 18:16:52 schrieb Matthew Wilcox:
> The basic idea is that you get back a cookie from the kcounter_claim()
> which you have to hand to the kcounter_release() function so it
> knows which one you released.  It's similar to mutex debugging except

So in addition to the kcounter we need to save a token in a data structure?
In fact, there must be a data structure that can house that token. So you
can no longer live with a pointer just to a device descriptor, but every
individual use of a resource must have an associated data structure?

	Regards
		Oliver


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-16 16:31                                                   ` Oliver Neukum
@ 2008-04-16 16:34                                                     ` Matthew Wilcox
  2008-04-16 16:42                                                       ` Oliver Neukum
  2008-04-16 16:50                                                     ` Arjan van de Ven
  1 sibling, 1 reply; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-16 16:34 UTC (permalink / raw)
  To: Oliver Neukum
  Cc: Ingo Oeser, Linus Torvalds, Andi Kleen, Peter Zijlstra,
	Bart Van Assche, Roland Dreier, Ingo Molnar, Daniel Walker,
	linux-kernel

On Wed, Apr 16, 2008 at 06:31:08PM +0200, Oliver Neukum wrote:
> Am Mittwoch, 16. April 2008 18:16:52 schrieb Matthew Wilcox:
> > The basic idea is that you get back a cookie from the kcounter_claim()
> > which you have to hand to the kcounter_release() function so it
> > knows which one you released. ?It's similar to mutex debugging except
> 
> So in addition to the kcounter we need to save a token in a data structure?
> In fact, there must be a data structure that can house that token. So you
> can no longer live with a pointer just to a device descriptor, but every
> individual use of a resource must have an associated data structure?

That's right.  Do you have an example where this would be inconvenient?
I couldn't find one.  For example, with USB, you could place one in the
struct urb.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-16 16:34                                                     ` Matthew Wilcox
@ 2008-04-16 16:42                                                       ` Oliver Neukum
  2008-04-16 16:44                                                         ` Matthew Wilcox
  0 siblings, 1 reply; 64+ messages in thread
From: Oliver Neukum @ 2008-04-16 16:42 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Ingo Oeser, Linus Torvalds, Andi Kleen, Peter Zijlstra,
	Bart Van Assche, Roland Dreier, Ingo Molnar, Daniel Walker,
	linux-kernel

Am Mittwoch, 16. April 2008 18:34:14 schrieb Matthew Wilcox:
> On Wed, Apr 16, 2008 at 06:31:08PM +0200, Oliver Neukum wrote:
> > Am Mittwoch, 16. April 2008 18:16:52 schrieb Matthew Wilcox:
> > > The basic idea is that you get back a cookie from the kcounter_claim()
> > > which you have to hand to the kcounter_release() function so it
> > > knows which one you released. ?It's similar to mutex debugging except
> > 
> > So in addition to the kcounter we need to save a token in a data structure?
> > In fact, there must be a data structure that can house that token. So you
> > can no longer live with a pointer just to a device descriptor, but every
> > individual use of a resource must have an associated data structure?
> 
> That's right.  Do you have an example where this would be inconvenient?
> I couldn't find one.  For example, with USB, you could place one in the
> struct urb.

That's a data structure we really want to shrink. And furthermore, the needs
of the use cases should shape the locking primitives, not the reverse.

	Regards
		Oliver

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-16 16:42                                                       ` Oliver Neukum
@ 2008-04-16 16:44                                                         ` Matthew Wilcox
  2008-04-16 16:47                                                           ` Roland Dreier
  0 siblings, 1 reply; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-16 16:44 UTC (permalink / raw)
  To: Oliver Neukum
  Cc: Ingo Oeser, Linus Torvalds, Andi Kleen, Peter Zijlstra,
	Bart Van Assche, Roland Dreier, Ingo Molnar, Daniel Walker,
	linux-kernel

On Wed, Apr 16, 2008 at 06:42:50PM +0200, Oliver Neukum wrote:
> Am Mittwoch, 16. April 2008 18:34:14 schrieb Matthew Wilcox:
> > On Wed, Apr 16, 2008 at 06:31:08PM +0200, Oliver Neukum wrote:
> > > Am Mittwoch, 16. April 2008 18:16:52 schrieb Matthew Wilcox:
> > > > The basic idea is that you get back a cookie from the kcounter_claim()
> > > > which you have to hand to the kcounter_release() function so it
> > > > knows which one you released. ?It's similar to mutex debugging except
> > > 
> > > So in addition to the kcounter we need to save a token in a data structure?
> > > In fact, there must be a data structure that can house that token. So you
> > > can no longer live with a pointer just to a device descriptor, but every
> > > individual use of a resource must have an associated data structure?
> > 
> > That's right.  Do you have an example where this would be inconvenient?
> > I couldn't find one.  For example, with USB, you could place one in the
> > struct urb.
> 
> That's a data structure we really want to shrink. And furthermore, the needs
> of the use cases should shape the locking primitives, not the reverse.

The cookies aren't checked by the kcounter implementation if
CONFIG_DEBUG_KCOUNTER isn't set.  So you can avoid storing them if it's
*really* that important to shrink your data structures.

You'll still have to check the return value from the various
kcounter_claim* APIs for errors, of course.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-16 16:44                                                         ` Matthew Wilcox
@ 2008-04-16 16:47                                                           ` Roland Dreier
  0 siblings, 0 replies; 64+ messages in thread
From: Roland Dreier @ 2008-04-16 16:47 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Oliver Neukum, Ingo Oeser, Linus Torvalds, Andi Kleen,
	Peter Zijlstra, Bart Van Assche, Ingo Molnar, Daniel Walker,
	linux-kernel

 > The cookies aren't checked by the kcounter implementation if
 > CONFIG_DEBUG_KCOUNTER isn't set.  So you can avoid storing them if it's
 > *really* that important to shrink your data structures.

I guess if anyone cared we could have something analogous to
DECLARE_PCI_UNMAP_ADDR() et al that compiles to nothing if
CONFIG_DEBUG_KCOUNTER isn't set.  But I'm not sure it's worth it.
Certainly for the mthca/mlx4 cases of semaphore use there's no problem
holding onto a cookie.

 - R.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-16 16:31                                                   ` Oliver Neukum
  2008-04-16 16:34                                                     ` Matthew Wilcox
@ 2008-04-16 16:50                                                     ` Arjan van de Ven
  2008-04-16 16:58                                                       ` Matthew Wilcox
  1 sibling, 1 reply; 64+ messages in thread
From: Arjan van de Ven @ 2008-04-16 16:50 UTC (permalink / raw)
  To: Oliver Neukum
  Cc: Matthew Wilcox, Ingo Oeser, Linus Torvalds, Andi Kleen,
	Peter Zijlstra, Bart Van Assche, Roland Dreier, Ingo Molnar,
	Daniel Walker, linux-kernel

On Wed, 16 Apr 2008 18:31:08 +0200
Oliver Neukum <oliver@neukum.org> wrote:

> Am Mittwoch, 16. April 2008 18:16:52 schrieb Matthew Wilcox:
> > The basic idea is that you get back a cookie from the
> > kcounter_claim() which you have to hand to the kcounter_release()
> > function so it knows which one you released.  It's similar to mutex
> > debugging except
> 
> So in addition to the kcounter we need to save a token in a data
> structure? In fact, there must be a data structure that can house
> that token. So you can no longer live with a pointer just to a device
> descriptor, but every individual use of a resource must have an
> associated data structure?

yup. For kcounters there is a clear owner for each "slot".
[This is the part that makes it debugable again)

Now for non-debug builds, the space taken for the token can be.. zero 
depending on how we define the types for it.

> 
> 	Regards
> 		Oliver
> 
> --
> To unsubscribe from this list: send the line "unsubscribe
> linux-kernel" in the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


-- 
If you want to reach me at my work email, use arjan@linux.intel.com
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-16 16:50                                                     ` Arjan van de Ven
@ 2008-04-16 16:58                                                       ` Matthew Wilcox
  2008-04-16 17:08                                                         ` Arjan van de Ven
  0 siblings, 1 reply; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-16 16:58 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Oliver Neukum, Ingo Oeser, Linus Torvalds, Andi Kleen,
	Peter Zijlstra, Bart Van Assche, Roland Dreier, Ingo Molnar,
	Daniel Walker, linux-kernel

On Wed, Apr 16, 2008 at 09:50:26AM -0700, Arjan van de Ven wrote:
> yup. For kcounters there is a clear owner for each "slot".
> [This is the part that makes it debugable again)
> 
> Now for non-debug builds, the space taken for the token can be.. zero 
> depending on how we define the types for it.

Hm.  That might be worth changing the API for.  Right now, I'm
overloading the return value of kcounter_claim with an errno and a
cookie.  Maybe we should go for:

extern int __must_check kcounter_claim(struct kcounter *kc, kcounter_cookie_t *resource);

with the return value being 0 or errno and on success, kcounter_claim
writes a cookie to 'resource'?

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-16 16:58                                                       ` Matthew Wilcox
@ 2008-04-16 17:08                                                         ` Arjan van de Ven
  2008-04-16 17:12                                                           ` Matthew Wilcox
  2008-04-16 18:10                                                           ` Matthew Wilcox
  0 siblings, 2 replies; 64+ messages in thread
From: Arjan van de Ven @ 2008-04-16 17:08 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Oliver Neukum, Ingo Oeser, Linus Torvalds, Andi Kleen,
	Peter Zijlstra, Bart Van Assche, Roland Dreier, Ingo Molnar,
	Daniel Walker, linux-kernel

On Wed, 16 Apr 2008 10:58:11 -0600
Matthew Wilcox <matthew@wil.cx> wrote:

> On Wed, Apr 16, 2008 at 09:50:26AM -0700, Arjan van de Ven wrote:
> > yup. For kcounters there is a clear owner for each "slot".
> > [This is the part that makes it debugable again)
> > 
> > Now for non-debug builds, the space taken for the token can be..
> > zero depending on how we define the types for it.
> 
> Hm.  That might be worth changing the API for.  Right now, I'm
> overloading the return value of kcounter_claim with an errno and a
> cookie.  Maybe we should go for:
> 
> extern int __must_check kcounter_claim(struct kcounter *kc,
> kcounter_cookie_t *resource);
> 
> with the return value being 0 or errno and on success, kcounter_claim
> writes a cookie to 'resource'?

or we return a cookie, and define some sort of "cookie_is_error()" thing

> 


-- 
If you want to reach me at my work email, use arjan@linux.intel.com
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-16 17:08                                                         ` Arjan van de Ven
@ 2008-04-16 17:12                                                           ` Matthew Wilcox
  2008-04-16 18:10                                                           ` Matthew Wilcox
  1 sibling, 0 replies; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-16 17:12 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Oliver Neukum, Ingo Oeser, Linus Torvalds, Andi Kleen,
	Peter Zijlstra, Bart Van Assche, Roland Dreier, Ingo Molnar,
	Daniel Walker, linux-kernel

On Wed, Apr 16, 2008 at 10:08:49AM -0700, Arjan van de Ven wrote:
> or we return a cookie, and define some sort of "cookie_is_error()" thing

I was considering that, but then we can't rely on __must_check to warn
whether people are actually checking the errno or just storing the
cookie somewhere.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
  2008-04-16 17:08                                                         ` Arjan van de Ven
  2008-04-16 17:12                                                           ` Matthew Wilcox
@ 2008-04-16 18:10                                                           ` Matthew Wilcox
  1 sibling, 0 replies; 64+ messages in thread
From: Matthew Wilcox @ 2008-04-16 18:10 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Oliver Neukum, Ingo Oeser, Linus Torvalds, Andi Kleen,
	Peter Zijlstra, Bart Van Assche, Roland Dreier, Ingo Molnar,
	Daniel Walker, linux-kernel


OK, here's the first version that even compiles.  It's not completed --
a few functions need to be written, and I certainly haven't tried
to convert anything to use it.  Also, documentation would be good.
But it's useful for people to take pot-shots at the API and suggest
other things that are worth debugging checks.

One thing I want to add is a 'check if we can free this kcounter'.
ie are there any outstanding resources claimed.  And I'm not checking
for a double-release of a resource yet (trivial to add, but I need to
grab lunch).

commit 4496d41a540897039138a7d67dbef51f96dd0b09
Author: Matthew Wilcox <matthew@wil.cx>
Date:   Wed Apr 16 14:04:28 2008 -0400

    kcounters

diff --git a/include/linux/kcounter.h b/include/linux/kcounter.h
new file mode 100644
index 0000000..35f83ed
--- /dev/null
+++ b/include/linux/kcounter.h
@@ -0,0 +1,58 @@
+#ifndef __LINUX_KCOUNTER_H
+#define __LINUX_KCOUNTER_H
+/*
+ * include/linux/kcounter.h
+ *
+ * Copyright (c) 2008 Intel Corporation
+ * Author: Matthew Wilcox <willy@linux.intel.com>
+ *
+ * Distributed under the terms of the GNU GPL, version 2
+ *
+ * kcounters model a situation where you have a certain number of resources
+ * and tasks must sleep to acquire a resource if there are none available.
+ * New resources may be added or existing ones removed.
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#ifdef CONFIG_DEBUG_KCOUNTER
+typedef struct { struct kcounter_owner *owner; } kcounter_cookie_t;
+#else
+typedef struct { } kcounter_cookie_t;
+#endif
+
+struct kcounter {
+	spinlock_t		lock;
+	unsigned int		count;
+	struct list_head	wait_list;
+#ifdef CONFIG_DEBUG_KCOUNTER
+	unsigned int		size;
+	struct kcounter_owner	*owners;
+#endif
+};
+
+extern void kcounter_init(struct kcounter *kc, unsigned int count);
+
+extern int __must_check kcounter_claim(struct kcounter *, kcounter_cookie_t *);
+extern int __must_check kcounter_claim_interruptible(struct kcounter *,
+				kcounter_cookie_t *);
+extern int __must_check kcounter_claim_timeout(struct kcounter *,
+				kcounter_cookie_t *, long jiffies);
+extern int __must_check kcounter_claim_interruptible_timeout(struct kcounter *,
+				kcounter_cookie_t *, long jiffies);
+extern int __must_check kcounter_try_claim(struct kcounter *kc,
+				kcounter_cookie_t *);
+extern int __must_check kcounter_has_free_resources(struct kcounter *kc);
+extern void kcounter_release(struct kcounter *, kcounter_cookie_t *);
+
+#ifdef CONFIG_DEBUG_KCOUNTER
+extern void kcounter_add_resource(struct kcounter *kc);
+extern int __must_check kcounter_remove_resource(struct kcounter *kc);
+#else
+#define kcounter_add_resource(kc)	kcounter_release(kc, NULL)
+#define kcounter_remove_resource(kc)	kcounter_claim(kc, NULL)
+#endif
+extern void kcounter_add_all_resources(struct kcounter *kc);
+
+#endif
diff --git a/kernel/kcounter.c b/kernel/kcounter.c
new file mode 100644
index 0000000..d290edc
--- /dev/null
+++ b/kernel/kcounter.c
@@ -0,0 +1,272 @@
+/*
+ * kernel/kcounter.c
+ *
+ * Copyright (c) 2008 Intel Corporation
+ * Author: Matthew Wilcox <willy@linux.intel.com>
+ *
+ * Distributed under the terms of the GNU GPL, version 2
+ */
+
+#include <linux/kcounter.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_DEBUG_KCOUNTER
+static void kcounter_debug_init(struct kcounter *, unsigned int count);
+static void kcounter_debug_claim(struct kcounter *, kcounter_cookie_t *);
+static void kcounter_debug_release(struct kcounter *, kcounter_cookie_t *);
+#else
+#define kcounter_debug_init(kc, count)		do { } while (0)
+#define kcounter_debug_claim(kc, resource)	do { } while (0)
+#define kcounter_debug_release(kc, resource)	do { } while (0)
+#endif
+
+static noinline int __kc_claim(struct kcounter *kc);
+static noinline int __kc_claim_interruptible(struct kcounter *kc);
+static noinline int __kc_claim_timeout(struct kcounter *kc, long jiffies);
+static noinline int __kc_claim_interruptible_timeout(struct kcounter *kc, long jiffies);
+static noinline void __kc_release(struct kcounter *kc);
+
+void kcounter_init(struct kcounter *kc, unsigned int count)
+{
+	spin_lock_init(&kc->lock);
+	kc->count = count;
+	INIT_LIST_HEAD(&kc->wait_list);
+	kcounter_debug_init(kc, count);
+}
+EXPORT_SYMBOL(kcounter_init);
+
+int kcounter_claim(struct kcounter *kc, kcounter_cookie_t *resource)
+{
+	unsigned long flags;
+	int result = 0;
+
+	might_sleep();
+	spin_lock_irqsave(&kc->lock, flags);
+	if (kc->count > 0)
+		kc->count--;
+	else
+		result = __kc_claim(kc);
+	if (!result)
+		kcounter_debug_claim(kc, resource);
+	spin_unlock_irqrestore(&kc->lock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL(kcounter_claim);
+
+int kcounter_claim_interruptible(struct kcounter *kc, kcounter_cookie_t *resource)
+{
+	unsigned long flags;
+	int result = 0;
+
+	might_sleep();
+	spin_lock_irqsave(&kc->lock, flags);
+	if (kc->count > 0)
+		kc->count--;
+	else
+		result = __kc_claim_interruptible(kc);
+	if (!result)
+		kcounter_debug_claim(kc, resource);
+	spin_unlock_irqrestore(&kc->lock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL(kcounter_claim_interruptible);
+
+int kcounter_claim_timeout(struct kcounter *kc, kcounter_cookie_t *resource, long jiffies)
+{
+	unsigned long flags;
+	int result = 0;
+
+	might_sleep();
+	spin_lock_irqsave(&kc->lock, flags);
+	if (kc->count > 0)
+		kc->count--;
+	else
+		result = __kc_claim_timeout(kc, jiffies);
+	if (!result)
+		kcounter_debug_claim(kc, resource);
+	spin_unlock_irqrestore(&kc->lock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL(kcounter_claim_timeout);
+
+int kcounter_claim_interruptible_timeout(struct kcounter *kc, kcounter_cookie_t *resource, long jiffies)
+{
+	unsigned long flags;
+	int result = 0;
+
+	might_sleep();
+	spin_lock_irqsave(&kc->lock, flags);
+	if (kc->count > 0)
+		kc->count--;
+	else
+		result = __kc_claim_interruptible_timeout(kc, jiffies);
+	if (!result)
+		kcounter_debug_claim(kc, resource);
+	spin_unlock_irqrestore(&kc->lock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL(kcounter_claim_interruptible_timeout);
+
+void kcounter_release(struct kcounter *kc, kcounter_cookie_t *resource)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kc->lock, flags);
+	kcounter_debug_release(kc, resource);
+	if (list_empty(&kc->wait_list))
+		kc->count++;
+	else
+		__kc_release(kc);
+	spin_unlock_irqrestore(&kc->lock, flags);
+}
+
+/* Functions for the contended case */
+
+struct kcounter_waiter {
+	struct list_head list;
+	struct task_struct *task;
+	int up;
+};
+
+/*
+ * Because this function is inlined, the 'state' parameter will be
+ * constant, and thus optimised away by the compiler.  Likewise the
+ * 'timeout' parameter for the cases without timeouts.
+ */
+static inline int __sched __kc_claim_common(struct kcounter *kc,
+						long state, long timeout)
+{
+	struct task_struct *task = current;
+	struct kcounter_waiter waiter;
+
+	list_add_tail(&waiter.list, &kc->wait_list);
+	waiter.task = task;
+	waiter.up = 0;
+
+	for (;;) {
+		if (state == TASK_INTERRUPTIBLE && signal_pending(task))
+			goto interrupted;
+		if (state == TASK_KILLABLE && fatal_signal_pending(task))
+			goto interrupted;
+		if (timeout == 0)
+			goto timed_out;
+		__set_task_state(task, state);
+		spin_unlock_irq(&kc->lock);
+		timeout = schedule_timeout(timeout);
+		spin_lock_irq(&kc->lock);
+		if (waiter.up)
+			return 0;
+	}
+
+ timed_out:
+	list_del(&waiter.list);
+	return -ETIME;
+
+ interrupted:
+	list_del(&waiter.list);
+	return -EINTR;
+}
+
+static noinline int __sched __kc_claim(struct kcounter *kc)
+{
+	return __kc_claim_common(kc, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT);
+}
+
+static noinline int __sched __kc_claim_interruptible(struct kcounter *kc)
+{
+	return __kc_claim_common(kc, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+}
+
+static noinline int __sched __kc_claim_timeout(struct kcounter *kc,
+							long jiffies)
+{
+	return __kc_claim_common(kc, TASK_KILLABLE, jiffies);
+}
+
+static noinline int __sched __kc_claim_interruptible_timeout(
+					struct kcounter *kc, long jiffies)
+{
+	return __kc_claim_common(kc, TASK_INTERRUPTIBLE, jiffies);
+}
+
+static noinline void __sched __kc_release(struct kcounter *kc)
+{
+	struct kcounter_waiter *waiter = list_first_entry(&kc->wait_list,
+						struct kcounter_waiter, list);
+	list_del(&waiter->list);
+	waiter->up = 1;
+	wake_up_process(waiter->task);
+}
+
+#ifdef CONFIG_DEBUG_KCOUNTER
+
+#include <linux/debug_locks.h>
+#include <linux/sched.h>
+
+struct kcounter_owner {
+	struct kcounter_owner	*next;
+	struct thread_info	*thread;
+	const char		*name;
+	void			*magic;
+};
+
+static void kcounter_init_owner(struct kcounter_owner *owner)
+{
+	owner->next = NULL;
+	owner->thread = NULL;
+	owner->name = NULL;
+	owner->magic = owner;
+}
+
+static void kcounter_debug_init(struct kcounter *kc, unsigned int count)
+{
+	int i;
+	struct kcounter_owner *owner, **ptr;
+	kc->size = count;
+	ptr = &kc->owners;
+	for (i = 0; i < count; i++) {
+		owner = kmalloc(sizeof(*owner), GFP_KERNEL | __GFP_NOFAIL);
+		kcounter_init_owner(owner);
+		*ptr = owner;
+		ptr = &owner->next;
+	}
+}
+
+void kcounter_debug_claim(struct kcounter *kc, kcounter_cookie_t *resource)
+{
+	struct kcounter_owner *owner;
+
+	for (owner = kc->owners; owner != NULL; owner = owner->next) {
+		if (owner->thread)
+			continue;
+		owner->thread = task_thread_info(current);
+		resource->owner = owner;
+	}
+
+	/*
+	 * No free slots found, but we were going to return success.
+	 * This is an internal bug in the kcounter implementation.
+	 */
+	BUG();
+}
+
+void kcounter_debug_release(struct kcounter *kc, kcounter_cookie_t *resource)
+{
+	struct kcounter_owner *owner;
+
+	for (owner = kc->owners; owner != NULL; owner = owner->next) {
+		if (owner != resource->owner)
+			continue;
+		owner->thread = NULL;
+		return;
+	}
+
+	/* The resource doesn't belong to this kcounter */
+	DEBUG_LOCKS_WARN_ON(1);
+}
+#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0796c1a..926293d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -265,6 +265,13 @@ config DEBUG_MUTEXES
 	 This feature allows mutex semantics violations to be detected and
 	 reported.
 
+config DEBUG_KCOUNTER
+	bool "Kcounter debugging: basic checks"
+	depends on DEBUG_KERNEL
+	help
+	  Enabling this option allows the kernel to detect some basic
+	  misuses of the kcounter feature.
+
 config DEBUG_SEMAPHORE
 	bool "Semaphore debugging"
 	depends on DEBUG_KERNEL

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply related	[flat|nested] 64+ messages in thread

* Re: [PATCH] Replace completions with semaphores
       [not found]                   ` <aiMGu-7f0-7@gated-at.bofh.it>
@ 2008-04-16 10:22                     ` Bodo Eggert
  0 siblings, 0 replies; 64+ messages in thread
From: Bodo Eggert @ 2008-04-16 10:22 UTC (permalink / raw)
  To: Bart Van Assche, Andi Kleen, Peter Zijlstra, Roland Dreier,
	Ingo Molnar, Matthew Wilcox, Ingo Oeser, Daniel Walker,
	linux-kernel, Linus Torvalds

Bart Van Assche <bart.vanassche@gmail.com> wrote:
> On Mon, Apr 14, 2008 at 9:16 PM, Andi Kleen <andi@firstfloor.org> wrote:

>>  For me it sounds like you just want to rename semaphores to some other
>>  name that people don't use them for normal locking?
> 
> Would it really be a good idea to give a synchronization concept that
> behaves exactly like a semaphore another name than "semaphore" ? The
> semaphore concept is well known and is taught in every computer
> science course.

What about calling it counting_semaphore, if it's to be used for this purpose?


^ permalink raw reply	[flat|nested] 64+ messages in thread

end of thread, other threads:[~2008-04-16 18:10 UTC | newest]

Thread overview: 64+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-04-11 21:00 [PATCH] Replace completions with semaphores Matthew Wilcox
2008-04-12  6:43 ` Daniel Walker
2008-04-12 10:31   ` Ingo Oeser
2008-04-12 12:24 ` Peter Zijlstra
2008-04-12 17:26   ` Matthew Wilcox
2008-04-12 18:01     ` Daniel Walker
2008-04-12 18:05     ` Peter Zijlstra
2008-04-12 19:04       ` Matthew Wilcox
2008-04-12 19:16         ` Peter Zijlstra
2008-04-12 19:53     ` Roland Dreier
2008-04-12 20:47       ` Matthew Wilcox
2008-04-13  7:08         ` Ingo Molnar
2008-04-13 12:57           ` Matthew Wilcox
2008-04-14 15:39             ` Ingo Molnar
2008-04-14 15:58               ` Roland Dreier
2008-04-14 16:32                 ` Peter Zijlstra
2008-04-14 16:56                   ` Arjan van de Ven
2008-04-14 17:50                     ` Matthew Wilcox
2008-04-14 17:46                   ` Andi Kleen
2008-04-14 17:54                     ` Peter Zijlstra
2008-04-14 18:09                       ` Daniel Walker
2008-04-14 19:16                       ` Andi Kleen
2008-04-15  6:18                         ` Bart Van Assche
2008-04-15  6:46                           ` Peter Zijlstra
2008-04-15  7:17                             ` Bart Van Assche
2008-04-15  8:44                               ` Peter Zijlstra
2008-04-15 13:15                                 ` Bart Van Assche
2008-04-15 16:09                                 ` Linus Torvalds
2008-04-15 16:27                                   ` Andi Kleen
2008-04-15 16:57                                     ` Linus Torvalds
2008-04-15 17:05                                       ` Ingo Molnar
2008-04-15 18:50                                         ` Matthew Wilcox
2008-04-16 12:37                                           ` Ingo Molnar
2008-04-16 12:50                                             ` Andi Kleen
2008-04-16 12:59                                               ` Killable stat/readdir Matthew Wilcox
2008-04-15 17:15                                       ` [PATCH] Replace completions with semaphores Andi Kleen
2008-04-15 17:26                                         ` Linus Torvalds
2008-04-15 17:41                                           ` Matthew Wilcox
2008-04-15 18:14                                             ` Linus Torvalds
2008-04-16 16:07                                               ` Ingo Oeser
2008-04-16 16:16                                                 ` Matthew Wilcox
2008-04-16 16:31                                                   ` Oliver Neukum
2008-04-16 16:34                                                     ` Matthew Wilcox
2008-04-16 16:42                                                       ` Oliver Neukum
2008-04-16 16:44                                                         ` Matthew Wilcox
2008-04-16 16:47                                                           ` Roland Dreier
2008-04-16 16:50                                                     ` Arjan van de Ven
2008-04-16 16:58                                                       ` Matthew Wilcox
2008-04-16 17:08                                                         ` Arjan van de Ven
2008-04-16 17:12                                                           ` Matthew Wilcox
2008-04-16 18:10                                                           ` Matthew Wilcox
2008-04-14 19:16                       ` Alan Cox
2008-04-13 14:55           ` Bart Van Assche
2008-04-14 17:12             ` API documentation (was [PATCH] Replace completions with semaphores) Jonathan Corbet
2008-04-14 17:33               ` Peter Zijlstra
2008-04-14 18:38                 ` Bart Van Assche
2008-04-13 13:55       ` [PATCH] Replace completions with semaphores Bart Van Assche
2008-04-13 14:22         ` Matthew Wilcox
2008-04-13  7:05   ` Ingo Molnar
2008-04-13 12:52     ` Matthew Wilcox
2008-04-14 15:41       ` Ingo Molnar
2008-04-14 17:46         ` Matthew Wilcox
2008-04-14 16:54       ` Jens Axboe
     [not found] <ahyFC-2bu-25@gated-at.bofh.it>
     [not found] ` <ahUPJ-8rN-1@gated-at.bofh.it>
     [not found]   ` <ai4vO-2Rp-19@gated-at.bofh.it>
     [not found]     ` <ai9Yw-5uh-7@gated-at.bofh.it>
     [not found]       ` <aiz6z-OZ-31@gated-at.bofh.it>
     [not found]         ` <aizgp-151-25@gated-at.bofh.it>
     [not found]           ` <aizSW-2Ar-17@gated-at.bofh.it>
     [not found]             ` <aiAYF-5e7-19@gated-at.bofh.it>
     [not found]               ` <aiB8o-5A7-17@gated-at.bofh.it>
     [not found]                 ` <aiCnR-7k-39@gated-at.bofh.it>
     [not found]                   ` <aiMGu-7f0-7@gated-at.bofh.it>
2008-04-16 10:22                     ` Bodo Eggert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).