LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] Add FUTEX_CMP_REQUEUE futex op
@ 2004-05-20 9:38 Jakub Jelinek
2004-05-20 22:52 ` Andrew Morton
` (2 more replies)
0 siblings, 3 replies; 24+ messages in thread
From: Jakub Jelinek @ 2004-05-20 9:38 UTC (permalink / raw)
To: linux-kernel; +Cc: mingo
Hi!
FUTEX_REQUEUE operation has been added to the kernel mainly to improve
pthread_cond_broadcast which previously used FUTEX_WAKE INT_MAX op.
pthread_cond_broadcast releases internal condvar mutex before FUTEX_REQUEUE
operation, as otherwise the woken up thread most likely immediately sleeps
again on the internal condvar mutex until the broadcasting thread releases
it.
Unfortunately this is racy and causes e.g.
http://sources.redhat.com/cgi-bin/cvsweb.cgi/libc/nptl/tst-cond16.c?rev=1.1&content-type=text/x-cvsweb-markup&cvsroot=glibc
to hang on SMP.
http://listman.redhat.com/archives/phil-list/2004-May/msg00023.html
contains analysis how the hang happens, the problem is if any thread
does pthread_cond_*wait in between releasing of the internal condvar
mutex and FUTEX_REQUEUE operation, a wrong thread might be awaken (and
immediately go to sleep again because it doesn't satisfy conditions for
returning from pthread_cond_*wait) while the right thread requeued on
the associated mutex and there would be nobody to wake that thread up.
The patch below extends FUTEX_REQUEUE operation with something FUTEX_WAIT
already uses:
FUTEX_CMP_REQUEUE is passed an additional argument which is the expected
value of *futex. Kernel then while holding the futex locks checks if
*futex != expected and returns -EAGAIN in that case, while if it is equal,
continues with a normal FUTEX_REQUEUE operation.
If the syscall returns -EAGAIN, NPTL can fall back to FUTEX_WAKE INT_MAX
operation which doesn't have this problem, but is less efficient, while
in the likely case that nobody hit the (small) window the efficient
FUTEX_REQUEUE operation is used.
--- linux-2.6.5/include/linux/futex.h.jj 2004-04-04 05:37:36.000000000 +0200
+++ linux-2.6.5/include/linux/futex.h 2004-05-05 09:57:09.200306101 +0200
@@ -8,9 +8,10 @@
#define FUTEX_WAKE (1)
#define FUTEX_FD (2)
#define FUTEX_REQUEUE (3)
-
+#define FUTEX_CMP_REQUEUE (4)
long do_futex(unsigned long uaddr, int op, int val,
- unsigned long timeout, unsigned long uaddr2, int val2);
+ unsigned long timeout, unsigned long uaddr2, int val2,
+ int val3);
#endif
--- linux-2.6.5/kernel/futex.c.jj 2004-04-04 05:36:52.000000000 +0200
+++ linux-2.6.5/kernel/futex.c 2004-05-05 12:23:33.481048623 +0200
@@ -96,6 +96,7 @@ struct futex_q {
*/
struct futex_hash_bucket {
spinlock_t lock;
+ unsigned int nqueued;
struct list_head chain;
};
@@ -318,13 +319,14 @@ out:
* physical page.
*/
static int futex_requeue(unsigned long uaddr1, unsigned long uaddr2,
- int nr_wake, int nr_requeue)
+ int nr_wake, int nr_requeue, int *valp)
{
union futex_key key1, key2;
struct futex_hash_bucket *bh1, *bh2;
struct list_head *head1;
struct futex_q *this, *next;
int ret, drop_count = 0;
+ unsigned int nqueued;
down_read(¤t->mm->mmap_sem);
@@ -338,12 +340,33 @@ static int futex_requeue(unsigned long u
bh1 = hash_futex(&key1);
bh2 = hash_futex(&key2);
+ nqueued = bh1->nqueued;
+ if (likely (valp != NULL)) {
+ int curval;
+
+ smp_mb ();
+
+ if (get_user(curval, (int *)uaddr1) != 0) {
+ ret = -EFAULT;
+ goto out;
+ }
+ if (curval != *valp) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
+
if (bh1 < bh2)
spin_lock(&bh1->lock);
spin_lock(&bh2->lock);
if (bh1 > bh2)
spin_lock(&bh1->lock);
+ if (unlikely (nqueued != bh1->nqueued && valp != NULL)) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+
head1 = &bh1->chain;
list_for_each_entry_safe(this, next, head1, list) {
if (!match_futex (&this->key, &key1))
@@ -365,6 +388,7 @@ static int futex_requeue(unsigned long u
}
}
+out_unlock:
spin_unlock(&bh1->lock);
if (bh1 != bh2)
spin_unlock(&bh2->lock);
@@ -398,6 +422,7 @@ static void queue_me(struct futex_q *q,
q->lock_ptr = &bh->lock;
spin_lock(&bh->lock);
+ bh->nqueued++;
list_add_tail(&q->list, &bh->chain);
spin_unlock(&bh->lock);
}
@@ -625,7 +650,7 @@ out:
}
long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
- unsigned long uaddr2, int val2)
+ unsigned long uaddr2, int val2, int val3)
{
int ret;
@@ -641,7 +666,10 @@ long do_futex(unsigned long uaddr, int o
ret = futex_fd(uaddr, val);
break;
case FUTEX_REQUEUE:
- ret = futex_requeue(uaddr, uaddr2, val, val2);
+ ret = futex_requeue(uaddr, uaddr2, val, val2, NULL);
+ break;
+ case FUTEX_CMP_REQUEUE:
+ ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
break;
default:
ret = -ENOSYS;
@@ -651,7 +679,8 @@ long do_futex(unsigned long uaddr, int o
asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
- struct timespec __user *utime, u32 __user *uaddr2)
+ struct timespec __user *utime, u32 __user *uaddr2,
+ int val3)
{
struct timespec t;
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -665,11 +694,11 @@ asmlinkage long sys_futex(u32 __user *ua
/*
* requeue parameter in 'utime' if op == FUTEX_REQUEUE.
*/
- if (op == FUTEX_REQUEUE)
+ if (op >= FUTEX_REQUEUE)
val2 = (int) (long) utime;
return do_futex((unsigned long)uaddr, op, val, timeout,
- (unsigned long)uaddr2, val2);
+ (unsigned long)uaddr2, val2, val3);
}
static struct super_block *
--- linux-2.6.5/kernel/compat.c.jj 2004-04-04 05:37:07.000000000 +0200
+++ linux-2.6.5/kernel/compat.c 2004-05-05 09:56:36.761119626 +0200
@@ -208,7 +208,7 @@ asmlinkage long compat_sys_sigprocmask(i
#ifdef CONFIG_FUTEX
asmlinkage long compat_sys_futex(u32 *uaddr, int op, int val,
- struct compat_timespec *utime, u32 *uaddr2)
+ struct compat_timespec *utime, u32 *uaddr2, int val3)
{
struct timespec t;
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -219,11 +219,11 @@ asmlinkage long compat_sys_futex(u32 *ua
return -EFAULT;
timeout = timespec_to_jiffies(&t) + 1;
}
- if (op == FUTEX_REQUEUE)
+ if (op >= FUTEX_REQUEUE)
val2 = (int) (long) utime;
return do_futex((unsigned long)uaddr, op, val, timeout,
- (unsigned long)uaddr2, val2);
+ (unsigned long)uaddr2, val2, val3);
}
#endif
Jakub
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-20 9:38 [PATCH] Add FUTEX_CMP_REQUEUE futex op Jakub Jelinek
@ 2004-05-20 22:52 ` Andrew Morton
2004-05-21 6:06 ` Ulrich Drepper
2004-05-21 7:05 ` Ingo Molnar
2004-05-21 23:05 ` Andrew Morton
2004-05-29 3:13 ` Rusty Russell
2 siblings, 2 replies; 24+ messages in thread
From: Andrew Morton @ 2004-05-20 22:52 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: linux-kernel, mingo
Jakub Jelinek <jakub@redhat.com> wrote:
>
> asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
> - struct timespec __user *utime, u32 __user *uaddr2)
> + struct timespec __user *utime, u32 __user *uaddr2,
> + int val3)
Is it safe to go adding a new argument to an existing syscall in this manner?
It'll work OK on x86 because of the stack layout but is the same true of
all other supported architectures?
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-20 22:52 ` Andrew Morton
@ 2004-05-21 6:06 ` Ulrich Drepper
2004-05-21 6:36 ` Andrew Morton
2004-05-21 7:05 ` Ingo Molnar
1 sibling, 1 reply; 24+ messages in thread
From: Ulrich Drepper @ 2004-05-21 6:06 UTC (permalink / raw)
To: Andrew Morton; +Cc: Jakub Jelinek, linux-kernel, mingo
Andrew Morton wrote:
> Is it safe to go adding a new argument to an existing syscall in this manner?
Yes. This is a multiplexed syscall and the opcode decides which syscall
parameter is used.
> It'll work OK on x86 because of the stack layout but is the same true of
> all other supported architectures?
We add parameters at the end. This does not influence how previous
values are passed. And especially for syscalls it makes no difference.
--
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-21 6:06 ` Ulrich Drepper
@ 2004-05-21 6:36 ` Andrew Morton
2004-05-21 7:15 ` Ulrich Drepper
2004-05-21 7:43 ` Jakub Jelinek
0 siblings, 2 replies; 24+ messages in thread
From: Andrew Morton @ 2004-05-21 6:36 UTC (permalink / raw)
To: Ulrich Drepper; +Cc: jakub, linux-kernel, mingo
Ulrich Drepper <drepper@redhat.com> wrote:
>
> Andrew Morton wrote:
>
> > Is it safe to go adding a new argument to an existing syscall in this manner?
>
> Yes. This is a multiplexed syscall and the opcode decides which syscall
> parameter is used.
>
Of course.
> > It'll work OK on x86 because of the stack layout but is the same true of
> > all other supported architectures?
>
> We add parameters at the end. This does not influence how previous
> values are passed. And especially for syscalls it makes no difference.
>
what we're effectively doing is:
void foo(int a, int b, int c)
{
}
and from another compilation unit:
foo(a, b);
and we're expecting the a's and b's to line up across all architectures and
compiler options. I thought that on some architectures that only works out
if the function has a vararg declaration.
Does it do the right thing on stack-grows-up machines?
If the compiler passes the first few args via registers and the rest on the
stack, are we sure that it won't at some level of complexity decide to pass
_all_ the args on the stack? It's free to do so, I think.
I have a vague memory of getting bitten by this trick once...
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-20 22:52 ` Andrew Morton
2004-05-21 6:06 ` Ulrich Drepper
@ 2004-05-21 7:05 ` Ingo Molnar
1 sibling, 0 replies; 24+ messages in thread
From: Ingo Molnar @ 2004-05-21 7:05 UTC (permalink / raw)
To: Andrew Morton; +Cc: Jakub Jelinek, linux-kernel
On Thu, 20 May 2004, Andrew Morton wrote:
> > asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
> > - struct timespec __user *utime, u32 __user *uaddr2)
> > + struct timespec __user *utime, u32 __user *uaddr2,
> > + int val3)
>
> Is it safe to go adding a new argument to an existing syscall in this manner?
>
> It'll work OK on x86 because of the stack layout but is the same true of
> all other supported architectures?
we added a new futex paramater once before (the original requeue patch) -
so if it broke anything, it didnt break loud enough for anyone to notice
:-|
Ingo
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-21 6:36 ` Andrew Morton
@ 2004-05-21 7:15 ` Ulrich Drepper
2004-05-21 7:43 ` Jakub Jelinek
1 sibling, 0 replies; 24+ messages in thread
From: Ulrich Drepper @ 2004-05-21 7:15 UTC (permalink / raw)
To: Andrew Morton; +Cc: jakub, linux-kernel, mingo
Andrew Morton wrote:
> and we're expecting the a's and b's to line up across all architectures and
> compiler options. I thought that on some architectures that only works out
> if the function has a vararg declaration.
I never heard that.
> Does it do the right thing on stack-grows-up machines?
Would be only HP/PA and I don't see this to be a problem.
> If the compiler passes the first few args via registers and the rest on the
> stack, are we sure that it won't at some level of complexity decide to pass
> _all_ the args on the stack? It's free to do so, I think.
This is not how the calling conventions are designed. If registers are
used they happens unconditional of the remainder of the parameter list.
The stack is used as an overflow.
> I have a vague memory of getting bitten by this trick once...
I don't and, as Ingo mentioned, we already did it before.
--
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-21 6:36 ` Andrew Morton
2004-05-21 7:15 ` Ulrich Drepper
@ 2004-05-21 7:43 ` Jakub Jelinek
2004-05-22 16:58 ` Arnd Bergmann
1 sibling, 1 reply; 24+ messages in thread
From: Jakub Jelinek @ 2004-05-21 7:43 UTC (permalink / raw)
To: Andrew Morton; +Cc: Ulrich Drepper, linux-kernel, mingo
On Thu, May 20, 2004 at 11:36:39PM -0700, Andrew Morton wrote:
> > > It'll work OK on x86 because of the stack layout but is the same true of
> > > all other supported architectures?
> >
> > We add parameters at the end. This does not influence how previous
> > values are passed. And especially for syscalls it makes no difference.
> >
>
> what we're effectively doing is:
>
> void foo(int a, int b, int c)
> {
> }
>
> and from another compilation unit:
>
> foo(a, b);
>
> and we're expecting the a's and b's to line up across all architectures and
> compiler options. I thought that on some architectures that only works out
> if the function has a vararg declaration.
The kernel syscall ABI is on many arches different from the compiler ABI.
Adding an argument this way certainly works on the architectures I'm
familiar with (i386, x86_64, ia64, ppc, ppc64, s390, s390x, sparc, sparc64,
alpha). I believe arm will work too, don't keep track of the rest of
arches.
Well, for s390/s390x there is a problem that it doesn't allow (yet) 6
argument syscalls at all, so one possibility for s390* is adding a wrapper around
sys_futex which will take the 5th and 6th arguments for FUTEX_CMP_REQUEUE
from a structure pointed to by 5th argument and pass that to sys_futex.
If some weirdo arch has problems with this, it can certainly deal with it in
its architecture wrapper.
Jakub
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-20 9:38 [PATCH] Add FUTEX_CMP_REQUEUE futex op Jakub Jelinek
2004-05-20 22:52 ` Andrew Morton
@ 2004-05-21 23:05 ` Andrew Morton
2004-05-22 10:10 ` Ingo Oeser
2004-05-23 17:33 ` Jakub Jelinek
2004-05-29 3:13 ` Rusty Russell
2 siblings, 2 replies; 24+ messages in thread
From: Andrew Morton @ 2004-05-21 23:05 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: linux-kernel, mingo
Jakub Jelinek <jakub@redhat.com> wrote:
>
> The patch below extends FUTEX_REQUEUE operation with something FUTEX_WAIT
> already uses:
> FUTEX_CMP_REQUEUE is passed an additional argument which is the expected
> value of *futex. Kernel then while holding the futex locks checks if
^^^^^^^^^^^^^^^^^^^^^^^
But in your patch the check is happening _prior_ to taking the futex locks.
> *futex != expected and returns -EAGAIN in that case, while if it is equal,
> continues with a normal FUTEX_REQUEUE operation.
> If the syscall returns -EAGAIN, NPTL can fall back to FUTEX_WAKE INT_MAX
> operation which doesn't have this problem, but is less efficient, while
> in the likely case that nobody hit the (small) window the efficient
> FUTEX_REQUEUE operation is used.
You've added an smp_mb(). These things are becoming like lock_kernel() -
hard for the reader to discern what it's protecting against.
Please always include a comment when adding a barrier like this.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-21 23:05 ` Andrew Morton
@ 2004-05-22 10:10 ` Ingo Oeser
2004-05-23 17:33 ` Jakub Jelinek
1 sibling, 0 replies; 24+ messages in thread
From: Ingo Oeser @ 2004-05-22 10:10 UTC (permalink / raw)
To: linux-kernel; +Cc: Andrew Morton, Jakub Jelinek, mingo
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
On Saturday 22 May 2004 01:05, Andrew Morton wrote:
> You've added an smp_mb(). These things are becoming like lock_kernel() -
> hard for the reader to discern what it's protecting against.
>
> Please always include a comment when adding a barrier like this.
What about including this in the API?
sth. like
#define smp_mb(why) do { \
static __debugdata__ char __lock_reason[] = why; \
} while(0)
would be sufficient. __debugdata__ section
will then be stripped from compressed image but not vmlinux.
Is there another way to force developers to comment? ;-)
Regards
Ingo Oeser
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.4 (GNU/Linux)
iD8DBQFArycyU56oYWuOrkARAljxAKDNE04H2/zr+rZcu1SAR3x19rUtHgCgwqKV
+Mpszd42hQ7hDQjphDobNRk=
=gh0g
-----END PGP SIGNATURE-----
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-21 7:43 ` Jakub Jelinek
@ 2004-05-22 16:58 ` Arnd Bergmann
2004-05-24 7:34 ` Jakub Jelinek
0 siblings, 1 reply; 24+ messages in thread
From: Arnd Bergmann @ 2004-05-22 16:58 UTC (permalink / raw)
To: Jakub Jelinek
Cc: Andrew Morton, Ulrich Drepper, linux-kernel, mingo, Martin Schwidefsky
[-- Attachment #1: Type: text/plain, Size: 6446 bytes --]
On Friday 21 May 2004 09:43, Jakub Jelinek wrote:
> Well, for s390/s390x there is a problem that it doesn't allow (yet) 6
> argument syscalls at all, so one possibility for s390* is adding a wrapper around
> sys_futex which will take the 5th and 6th arguments for FUTEX_CMP_REQUEUE
> from a structure pointed to by 5th argument and pass that to sys_futex.
I would really prefer not re-inventing brain-damage like ipc_kludge. I
have tried to do a special s390_futex_cmp_requeue() syscall, which is
still somewhat ugly. At least it has the advantage that it does not break
the de-facto calling conventions for s390 syscalls that either pass
up to five arguments in r2 to r6 or everything in an array pointed to
by r2.
Martin and Jakub, does the patch below look ok to you or should we do
it in yet another way?
Arnd <><
===== arch/s390/kernel/compat_linux.c 1.21 vs edited =====
--- 1.21/arch/s390/kernel/compat_linux.c Sat May 22 06:31:44 2004
+++ edited/arch/s390/kernel/compat_linux.c Sat May 22 17:34:21 2004
@@ -58,6 +58,7 @@
#include <linux/compat.h>
#include <linux/vfs.h>
#include <linux/ptrace.h>
+#include <linux/futex.h>
#include <asm/types.h>
#include <asm/ipc.h>
@@ -1262,4 +1263,16 @@
ret = put_user (ktimer_id, timer_id);
return ret;
+}
+
+extern asmlinkage long compat_sys_futex(u32 *uaddr, int op, int val,
+ struct compat_timespec *utime, u32 *uaddr2, int val3);
+
+asmlinkage long compat_s390_futex(u32 *uaddr, int op, int val,
+ struct compat_timespec *utime, u32 *uaddr2, int val3)
+{
+ if (op == FUTEX_CMP_REQUEUE)
+ return -EINVAL;
+
+ return compat_sys_futex(uaddr, op, val, utime, uaddr2, 0);
}
===== arch/s390/kernel/compat_wrapper.S 1.13 vs edited =====
--- 1.13/arch/s390/kernel/compat_wrapper.S Sat May 22 06:31:44 2004
+++ edited/arch/s390/kernel/compat_wrapper.S Sat May 22 18:16:33 2004
@@ -1090,14 +1090,14 @@
llgtr %r3,%r3 # struct stat64 *
jg sys32_fstat64 # branch to system call
- .globl compat_sys_futex_wrapper
-compat_sys_futex_wrapper:
+ .globl compat_s390_futex_wrapper
+compat_s390_futex_wrapper:
llgtr %r2,%r2 # u32 *
lgfr %r3,%r3 # int
lgfr %r4,%r4 # int
llgtr %r5,%r5 # struct compat_timespec *
llgtr %r6,%r6 # u32 *
- jg compat_sys_futex # branch to system call
+ jg compat_s390_futex # branch to system call
.globl sys32_setxattr_wrapper
sys32_setxattr_wrapper:
@@ -1404,3 +1404,12 @@
llgtr %r3,%r3 # struct compat_mq_attr *
llgtr %r4,%r4 # struct compat_mq_attr *
jg compat_sys_mq_getsetattr
+
+ .globl compat_s390_futex_cmp_requeue_wrapper
+compat_s390_futex_cmp_requeue_wrapper:
+ llgtr %r2,%r2 # u32 *
+ llgtr %r3,%r3 # u32 *
+ lgfr %r4,%r4 # int
+ lgfr %r5,%r5 # int
+ lgfr %r6,%r6 # int
+ jg s390_futex_cmp_requeue
===== arch/s390/kernel/sys_s390.c 1.14 vs edited =====
--- 1.14/arch/s390/kernel/sys_s390.c Sat May 22 06:31:44 2004
+++ edited/arch/s390/kernel/sys_s390.c Sat May 22 17:31:57 2004
@@ -26,9 +26,8 @@
#include <linux/mman.h>
#include <linux/file.h>
#include <linux/utsname.h>
-#ifdef CONFIG_ARCH_S390X
#include <linux/personality.h>
-#endif /* CONFIG_ARCH_S390X */
+#include <linux/futex.h>
#include <asm/uaccess.h>
#include <asm/ipc.h>
@@ -265,3 +264,20 @@
return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
}
+asmlinkage long
+s390_futex(u32 __user *uaddr, int op, int val,
+ struct timespec __user *utime, u32 __user *uaddr2)
+{
+ if (op == FUTEX_CMP_REQUEUE)
+ return -EINVAL;
+
+ return sys_futex(uaddr, op, val, utime, uaddr2, 0);
+}
+
+asmlinkage long
+s390_futex_cmp_requeue(u32 __user *uaddr, u32 __user *uaddr2,
+ int nr_wake, int nr_requeue, int val)
+{
+ return do_futex((unsigned long) uaddr, FUTEX_CMP_REQUEUE,
+ nr_wake, 0, (unsigned long)uaddr2, nr_requeue, val);
+}
===== arch/s390/kernel/syscalls.S 1.12 vs edited =====
--- 1.12/arch/s390/kernel/syscalls.S Sat May 22 06:34:19 2004
+++ edited/arch/s390/kernel/syscalls.S Sat May 22 18:15:24 2004
@@ -246,7 +246,7 @@
SYSCALL(sys_fremovexattr,sys_fremovexattr,sys32_fremovexattr_wrapper) /* 235 */
SYSCALL(sys_gettid,sys_gettid,sys_gettid)
SYSCALL(sys_tkill,sys_tkill,sys_tkill)
-SYSCALL(sys_futex,sys_futex,compat_sys_futex_wrapper)
+SYSCALL(s390_futex,s390_futex,compat_s390_futex_wrapper)
SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,sys32_sched_setaffinity_wrapper)
SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrapper) /* 240 */
SYSCALL(sys_tgkill,sys_tgkill,sys_tgkill)
@@ -285,3 +285,4 @@
SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper)
SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper)
SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper)
+SYSCALL(s390_futex_cmp_requeue,s390_futex_cmp_requeue,compat_s390_futex_cmp_requeue_wrapper)
===== include/asm-s390/unistd.h 1.26 vs edited =====
--- 1.26/include/asm-s390/unistd.h Sat May 22 06:34:45 2004
+++ edited/include/asm-s390/unistd.h Sat May 22 17:08:01 2004
@@ -269,8 +269,9 @@
#define __NR_mq_timedreceive 274
#define __NR_mq_notify 275
#define __NR_mq_getsetattr 276
+#define __NR_futex_cmp_requeue 277
-#define NR_syscalls 277
+#define NR_syscalls 278
/*
* There are some system calls that are not present on 64 bit, some
===== include/linux/syscalls.h 1.6 vs edited =====
--- 1.6/include/linux/syscalls.h Sat May 22 06:31:47 2004
+++ edited/include/linux/syscalls.h Sat May 22 17:32:58 2004
@@ -165,7 +165,8 @@
asmlinkage long sys_waitpid(pid_t pid, unsigned int __user *stat_addr, int options);
asmlinkage long sys_set_tid_address(int __user *tidptr);
asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
- struct timespec __user *utime, u32 __user *uaddr2);
+ struct timespec __user *utime, u32 __user *uaddr2,
+ int val3);
asmlinkage long sys_init_module(void __user *umod, unsigned long len,
const char __user *uargs);
===== kernel/fork.c 1.169 vs edited =====
--- 1.169/kernel/fork.c Sat May 22 06:32:19 2004
+++ edited/kernel/fork.c Sat May 22 17:35:46 2004
@@ -516,7 +516,7 @@
* not set up a proper pointer then tough luck.
*/
put_user(0, tidptr);
- sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL);
+ sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
}
}
[-- Attachment #2: signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-21 23:05 ` Andrew Morton
2004-05-22 10:10 ` Ingo Oeser
@ 2004-05-23 17:33 ` Jakub Jelinek
1 sibling, 0 replies; 24+ messages in thread
From: Jakub Jelinek @ 2004-05-23 17:33 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, mingo
On Fri, May 21, 2004 at 04:05:10PM -0700, Andrew Morton wrote:
> Jakub Jelinek <jakub@redhat.com> wrote:
> >
> > The patch below extends FUTEX_REQUEUE operation with something FUTEX_WAIT
> > already uses:
> > FUTEX_CMP_REQUEUE is passed an additional argument which is the expected
> > value of *futex. Kernel then while holding the futex locks checks if
>
> ^^^^^^^^^^^^^^^^^^^^^^^
>
> But in your patch the check is happening _prior_ to taking the futex locks.
Sure, that's because I'm trying to avoid get_user while holding spinlock, as
get_user may sleep.
> You've added an smp_mb(). These things are becoming like lock_kernel() -
> hard for the reader to discern what it's protecting against.
>
> Please always include a comment when adding a barrier like this.
Comment added below.
--- linux-2.6.5/include/linux/futex.h.jj 2004-04-04 05:37:36.000000000 +0200
+++ linux-2.6.5/include/linux/futex.h 2004-05-05 09:57:09.200306101 +0200
@@ -8,9 +8,10 @@
#define FUTEX_WAKE (1)
#define FUTEX_FD (2)
#define FUTEX_REQUEUE (3)
-
+#define FUTEX_CMP_REQUEUE (4)
long do_futex(unsigned long uaddr, int op, int val,
- unsigned long timeout, unsigned long uaddr2, int val2);
+ unsigned long timeout, unsigned long uaddr2, int val2,
+ int val3);
#endif
--- linux-2.6.5/kernel/futex.c.jj 2004-04-04 05:36:52.000000000 +0200
+++ linux-2.6.5/kernel/futex.c 2004-05-05 12:23:33.481048623 +0200
@@ -96,6 +96,7 @@ struct futex_q {
*/
struct futex_hash_bucket {
spinlock_t lock;
+ unsigned int nqueued;
struct list_head chain;
};
@@ -318,13 +319,14 @@ out:
* physical page.
*/
static int futex_requeue(unsigned long uaddr1, unsigned long uaddr2,
- int nr_wake, int nr_requeue)
+ int nr_wake, int nr_requeue, int *valp)
{
union futex_key key1, key2;
struct futex_hash_bucket *bh1, *bh2;
struct list_head *head1;
struct futex_q *this, *next;
int ret, drop_count = 0;
+ unsigned int nqueued;
down_read(¤t->mm->mmap_sem);
@@ -338,12 +340,41 @@ static int futex_requeue(unsigned long u
bh1 = hash_futex(&key1);
bh2 = hash_futex(&key2);
+ nqueued = bh1->nqueued;
+ if (likely (valp != NULL)) {
+ int curval;
+
+ /* In order to avoid doing get_user while
+ holding bh1->lock and bh2->lock, nqueued
+ (monotonically increasing field) must be first
+ read, then *uaddr1 fetched from userland and
+ after acquiring lock nqueued field compared with
+ the stored value. The smp_mb () below
+ makes sure that bh1->nqueued is read from memory
+ before *uaddr1. */
+ smp_mb ();
+
+ if (get_user(curval, (int *)uaddr1) != 0) {
+ ret = -EFAULT;
+ goto out;
+ }
+ if (curval != *valp) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
+
if (bh1 < bh2)
spin_lock(&bh1->lock);
spin_lock(&bh2->lock);
if (bh1 > bh2)
spin_lock(&bh1->lock);
+ if (unlikely (nqueued != bh1->nqueued && valp != NULL)) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+
head1 = &bh1->chain;
list_for_each_entry_safe(this, next, head1, list) {
if (!match_futex (&this->key, &key1))
@@ -365,6 +396,7 @@ static int futex_requeue(unsigned long u
}
}
+out_unlock:
spin_unlock(&bh1->lock);
if (bh1 != bh2)
spin_unlock(&bh2->lock);
@@ -398,6 +430,7 @@ static void queue_me(struct futex_q *q,
q->lock_ptr = &bh->lock;
spin_lock(&bh->lock);
+ bh->nqueued++;
list_add_tail(&q->list, &bh->chain);
spin_unlock(&bh->lock);
}
@@ -625,7 +658,7 @@ out:
}
long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
- unsigned long uaddr2, int val2)
+ unsigned long uaddr2, int val2, int val3)
{
int ret;
@@ -641,7 +674,10 @@ long do_futex(unsigned long uaddr, int o
ret = futex_fd(uaddr, val);
break;
case FUTEX_REQUEUE:
- ret = futex_requeue(uaddr, uaddr2, val, val2);
+ ret = futex_requeue(uaddr, uaddr2, val, val2, NULL);
+ break;
+ case FUTEX_CMP_REQUEUE:
+ ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
break;
default:
ret = -ENOSYS;
@@ -651,7 +687,8 @@ long do_futex(unsigned long uaddr, int o
asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
- struct timespec __user *utime, u32 __user *uaddr2)
+ struct timespec __user *utime, u32 __user *uaddr2,
+ int val3)
{
struct timespec t;
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -665,11 +702,11 @@ asmlinkage long sys_futex(u32 __user *ua
/*
* requeue parameter in 'utime' if op == FUTEX_REQUEUE.
*/
- if (op == FUTEX_REQUEUE)
+ if (op >= FUTEX_REQUEUE)
val2 = (int) (long) utime;
return do_futex((unsigned long)uaddr, op, val, timeout,
- (unsigned long)uaddr2, val2);
+ (unsigned long)uaddr2, val2, val3);
}
static struct super_block *
--- linux-2.6.5/kernel/compat.c.jj 2004-04-04 05:37:07.000000000 +0200
+++ linux-2.6.5/kernel/compat.c 2004-05-05 09:56:36.761119626 +0200
@@ -208,7 +208,7 @@ asmlinkage long compat_sys_sigprocmask(i
#ifdef CONFIG_FUTEX
asmlinkage long compat_sys_futex(u32 *uaddr, int op, int val,
- struct compat_timespec *utime, u32 *uaddr2)
+ struct compat_timespec *utime, u32 *uaddr2, int val3)
{
struct timespec t;
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -219,11 +219,11 @@ asmlinkage long compat_sys_futex(u32 *ua
return -EFAULT;
timeout = timespec_to_jiffies(&t) + 1;
}
- if (op == FUTEX_REQUEUE)
+ if (op >= FUTEX_REQUEUE)
val2 = (int) (long) utime;
return do_futex((unsigned long)uaddr, op, val, timeout,
- (unsigned long)uaddr2, val2);
+ (unsigned long)uaddr2, val2, val3);
}
#endif
Jakub
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-22 16:58 ` Arnd Bergmann
@ 2004-05-24 7:34 ` Jakub Jelinek
2004-05-24 8:12 ` Andrew Morton
2004-05-24 17:34 ` Arnd Bergmann
0 siblings, 2 replies; 24+ messages in thread
From: Jakub Jelinek @ 2004-05-24 7:34 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Andrew Morton, Ulrich Drepper, linux-kernel, mingo, Martin Schwidefsky
On Sat, May 22, 2004 at 06:58:41PM +0200, Arnd Bergmann wrote:
> On Friday 21 May 2004 09:43, Jakub Jelinek wrote:
> > Well, for s390/s390x there is a problem that it doesn't allow (yet) 6
> > argument syscalls at all, so one possibility for s390* is adding a wrapper around
> > sys_futex which will take the 5th and 6th arguments for FUTEX_CMP_REQUEUE
> > from a structure pointed to by 5th argument and pass that to sys_futex.
>
> I would really prefer not re-inventing brain-damage like ipc_kludge. I
> have tried to do a special s390_futex_cmp_requeue() syscall, which is
> still somewhat ugly. At least it has the advantage that it does not break
> the de-facto calling conventions for s390 syscalls that either pass
> up to five arguments in r2 to r6 or everything in an array pointed to
> by r2.
>
> Martin and Jakub, does the patch below look ok to you or should we do
> it in yet another way?
My personal preference is:
1) change s390* entry*.S so that all syscalls can use up to 6 arguments,
otherwise we'll have the same problem every now and then
(last syscall before this one was fadvise64_64 if I remember well)
2) allow sys_futex to have 6 arguments (the 6th on the stack, get_user'ed
from an s390 wrapper)
3) special structure passed in 5th argument for FUTEX_CMP_REQUEUE
4) your solution
(you have a special wrapper around futex anyway, so why not to handle
it there already and create completely new syscall).
That said, NPTL can deal with any of these variants and the decision
is up to Martin I think (assuming the base patch gets accepted, that is).
http://listman.redhat.com/archives/phil-list/2004-May/msg00031.html
is the latest version of the userland part of FUTEX_CMP_REQUEUE changes
(against CVS glibc) and there futex_requeue () macro is defined
in arch specific headers, so nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h
can do there its own black magic.
Jakub
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-24 7:34 ` Jakub Jelinek
@ 2004-05-24 8:12 ` Andrew Morton
2004-05-24 8:19 ` Jakub Jelinek
2004-05-24 8:27 ` bert hubert
2004-05-24 17:34 ` Arnd Bergmann
1 sibling, 2 replies; 24+ messages in thread
From: Andrew Morton @ 2004-05-24 8:12 UTC (permalink / raw)
To: Jakub Jelinek
Cc: arnd, drepper, linux-kernel, mingo, schwidefsky, bert hubert
Jakub Jelinek <jakub@redhat.com> wrote:
>
> That said, NPTL can deal with any of these variants and the decision
> is up to Martin I think (assuming the base patch gets accepted, that is).
Well the race is real and does need a kernel fix, so I queued it up. I
guess if the new argument to sys_futex() breaks some architecture they can
independently add a new syscall for it, or send a fix. Mutter.
It's a bit of a shame that you need to be a rocket scientist to
understand the futex syscall interface. Bert, are you still maintaining
the manpage? If so, is there enough info here to update it?
FUTEX_REQUEUE operation has been added to the kernel mainly to improve
pthread_cond_broadcast which previously used FUTEX_WAKE INT_MAX op.
pthread_cond_broadcast releases internal condvar mutex before FUTEX_REQUEUE
operation, as otherwise the woken up thread most likely immediately sleeps
again on the internal condvar mutex until the broadcasting thread releases it.
Unfortunately this is racy and causes e.g.
http://sources.redhat.com/cgi-bin/cvsweb.cgi/libc/nptl/tst-cond16.c?rev=1.1&content-type=text/x-cvsweb-markup&cvsroot=glibc
to hang on SMP.
http://listman.redhat.com/archives/phil-list/2004-May/msg00023.html contains
analysis how the hang happens, the problem is if any thread does
pthread_cond_*wait in between releasing of the internal condvar mutex and
FUTEX_REQUEUE operation, a wrong thread might be awaken (and immediately go to
sleep again because it doesn't satisfy conditions for returning from
pthread_cond_*wait) while the right thread requeued on the associated mutex
and there would be nobody to wake that thread up.
The patch below extends FUTEX_REQUEUE operation with something FUTEX_WAIT
already uses:
FUTEX_CMP_REQUEUE is passed an additional argument which is the expected value
of *futex. Kernel then while holding the futex locks checks if *futex !=
expected and returns -EAGAIN in that case, while if it is equal, continues
with a normal FUTEX_REQUEUE operation. If the syscall returns -EAGAIN, NPTL
can fall back to FUTEX_WAKE INT_MAX operation which doesn't have this problem,
but is less efficient, while in the likely case that nobody hit the (small)
window the efficient FUTEX_REQUEUE operation is used.
---
25-akpm/include/linux/futex.h | 5 ++--
25-akpm/kernel/compat.c | 6 ++---
25-akpm/kernel/futex.c | 49 ++++++++++++++++++++++++++++++++++++------
3 files changed, 49 insertions(+), 11 deletions(-)
diff -puN include/linux/futex.h~add-futex_cmp_requeue-futex-op include/linux/futex.h
--- 25/include/linux/futex.h~add-futex_cmp_requeue-futex-op 2004-05-23 11:15:27.460533968 -0700
+++ 25-akpm/include/linux/futex.h 2004-05-23 11:15:27.467532904 -0700
@@ -8,9 +8,10 @@
#define FUTEX_WAKE (1)
#define FUTEX_FD (2)
#define FUTEX_REQUEUE (3)
-
+#define FUTEX_CMP_REQUEUE (4)
long do_futex(unsigned long uaddr, int op, int val,
- unsigned long timeout, unsigned long uaddr2, int val2);
+ unsigned long timeout, unsigned long uaddr2, int val2,
+ int val3);
#endif
diff -puN kernel/compat.c~add-futex_cmp_requeue-futex-op kernel/compat.c
--- 25/kernel/compat.c~add-futex_cmp_requeue-futex-op 2004-05-23 11:15:27.462533664 -0700
+++ 25-akpm/kernel/compat.c 2004-05-23 11:15:27.470532448 -0700
@@ -208,7 +208,7 @@ asmlinkage long compat_sys_sigprocmask(i
#ifdef CONFIG_FUTEX
asmlinkage long compat_sys_futex(u32 *uaddr, int op, int val,
- struct compat_timespec *utime, u32 *uaddr2)
+ struct compat_timespec *utime, u32 *uaddr2, int val3)
{
struct timespec t;
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -219,11 +219,11 @@ asmlinkage long compat_sys_futex(u32 *ua
return -EFAULT;
timeout = timespec_to_jiffies(&t) + 1;
}
- if (op == FUTEX_REQUEUE)
+ if (op >= FUTEX_REQUEUE)
val2 = (int) (long) utime;
return do_futex((unsigned long)uaddr, op, val, timeout,
- (unsigned long)uaddr2, val2);
+ (unsigned long)uaddr2, val2, val3);
}
#endif
diff -puN kernel/futex.c~add-futex_cmp_requeue-futex-op kernel/futex.c
--- 25/kernel/futex.c~add-futex_cmp_requeue-futex-op 2004-05-23 11:15:27.463533512 -0700
+++ 25-akpm/kernel/futex.c 2004-05-23 11:15:27.469532600 -0700
@@ -96,6 +96,7 @@ struct futex_q {
*/
struct futex_hash_bucket {
spinlock_t lock;
+ unsigned int nqueued;
struct list_head chain;
};
@@ -318,13 +319,14 @@ out:
* physical page.
*/
static int futex_requeue(unsigned long uaddr1, unsigned long uaddr2,
- int nr_wake, int nr_requeue)
+ int nr_wake, int nr_requeue, int *valp)
{
union futex_key key1, key2;
struct futex_hash_bucket *bh1, *bh2;
struct list_head *head1;
struct futex_q *this, *next;
int ret, drop_count = 0;
+ unsigned int nqueued;
down_read(¤t->mm->mmap_sem);
@@ -338,12 +340,41 @@ static int futex_requeue(unsigned long u
bh1 = hash_futex(&key1);
bh2 = hash_futex(&key2);
+ nqueued = bh1->nqueued;
+ if (likely(valp != NULL)) {
+ int curval;
+
+ /* In order to avoid doing get_user while
+ holding bh1->lock and bh2->lock, nqueued
+ (monotonically increasing field) must be first
+ read, then *uaddr1 fetched from userland and
+ after acquiring lock nqueued field compared with
+ the stored value. The smp_mb () below
+ makes sure that bh1->nqueued is read from memory
+ before *uaddr1. */
+ smp_mb();
+
+ if (get_user(curval, (int *)uaddr1) != 0) {
+ ret = -EFAULT;
+ goto out;
+ }
+ if (curval != *valp) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
+
if (bh1 < bh2)
spin_lock(&bh1->lock);
spin_lock(&bh2->lock);
if (bh1 > bh2)
spin_lock(&bh1->lock);
+ if (unlikely(nqueued != bh1->nqueued && valp != NULL)) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+
head1 = &bh1->chain;
list_for_each_entry_safe(this, next, head1, list) {
if (!match_futex (&this->key, &key1))
@@ -365,6 +396,7 @@ static int futex_requeue(unsigned long u
}
}
+out_unlock:
spin_unlock(&bh1->lock);
if (bh1 != bh2)
spin_unlock(&bh2->lock);
@@ -398,6 +430,7 @@ static void queue_me(struct futex_q *q,
q->lock_ptr = &bh->lock;
spin_lock(&bh->lock);
+ bh->nqueued++;
list_add_tail(&q->list, &bh->chain);
spin_unlock(&bh->lock);
}
@@ -625,7 +658,7 @@ out:
}
long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
- unsigned long uaddr2, int val2)
+ unsigned long uaddr2, int val2, int val3)
{
int ret;
@@ -641,7 +674,10 @@ long do_futex(unsigned long uaddr, int o
ret = futex_fd(uaddr, val);
break;
case FUTEX_REQUEUE:
- ret = futex_requeue(uaddr, uaddr2, val, val2);
+ ret = futex_requeue(uaddr, uaddr2, val, val2, NULL);
+ break;
+ case FUTEX_CMP_REQUEUE:
+ ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
break;
default:
ret = -ENOSYS;
@@ -651,7 +687,8 @@ long do_futex(unsigned long uaddr, int o
asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
- struct timespec __user *utime, u32 __user *uaddr2)
+ struct timespec __user *utime, u32 __user *uaddr2,
+ int val3)
{
struct timespec t;
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -665,11 +702,11 @@ asmlinkage long sys_futex(u32 __user *ua
/*
* requeue parameter in 'utime' if op == FUTEX_REQUEUE.
*/
- if (op == FUTEX_REQUEUE)
+ if (op >= FUTEX_REQUEUE)
val2 = (int) (long) utime;
return do_futex((unsigned long)uaddr, op, val, timeout,
- (unsigned long)uaddr2, val2);
+ (unsigned long)uaddr2, val2, val3);
}
static struct super_block *
_
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-24 8:12 ` Andrew Morton
@ 2004-05-24 8:19 ` Jakub Jelinek
2004-05-28 13:09 ` DOCUMENTATION " bert hubert
2004-05-24 8:27 ` bert hubert
1 sibling, 1 reply; 24+ messages in thread
From: Jakub Jelinek @ 2004-05-24 8:19 UTC (permalink / raw)
To: Andrew Morton
Cc: arnd, drepper, linux-kernel, mingo, schwidefsky, bert hubert
On Mon, May 24, 2004 at 01:12:03AM -0700, Andrew Morton wrote:
> Jakub Jelinek <jakub@redhat.com> wrote:
> >
> > That said, NPTL can deal with any of these variants and the decision
> > is up to Martin I think (assuming the base patch gets accepted, that is).
>
> Well the race is real and does need a kernel fix, so I queued it up. I
> guess if the new argument to sys_futex() breaks some architecture they can
> independently add a new syscall for it, or send a fix. Mutter.
>
> It's a bit of a shame that you need to be a rocket scientist to
> understand the futex syscall interface. Bert, are you still maintaining
> the manpage? If so, is there enough info here to update it?
The latest futex(2) or futex(4) manpage I saw doesn't mention FUTEX_REQUEUE
at all.
Also, any futex man page should probably SEE ALSO Ulrich's futex paper:
http://people.redhat.com/drepper/futex.pdf
which helps understanding how to successfully use futexes, because
it is certainly not trivial.
Jakub
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-24 8:12 ` Andrew Morton
2004-05-24 8:19 ` Jakub Jelinek
@ 2004-05-24 8:27 ` bert hubert
1 sibling, 0 replies; 24+ messages in thread
From: bert hubert @ 2004-05-24 8:27 UTC (permalink / raw)
To: Andrew Morton
Cc: Jakub Jelinek, arnd, drepper, linux-kernel, mingo, schwidefsky
On Mon, May 24, 2004 at 01:12:03AM -0700, Andrew Morton wrote:
> Jakub Jelinek <jakub@redhat.com> wrote:
> >
> > That said, NPTL can deal with any of these variants and the decision
> > is up to Martin I think (assuming the base patch gets accepted, that is).
>
> Well the race is real and does need a kernel fix, so I queued it up. I
> guess if the new argument to sys_futex() breaks some architecture they can
> independently add a new syscall for it, or send a fix. Mutter.
>
> It's a bit of a shame that you need to be a rocket scientist to
> understand the futex syscall interface. Bert, are you still maintaining
> the manpage? If so, is there enough info here to update it?
I'll spend some time with the manpage tomorrow. Some other comments have
been queued too and I'll process them. By that time, I'll hand over the
manpages to aeb.
Thanks for the heads up.
--
http://www.PowerDNS.com Open source, database driven DNS Software
http://lartc.org Linux Advanced Routing & Traffic Control HOWTO
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-24 7:34 ` Jakub Jelinek
2004-05-24 8:12 ` Andrew Morton
@ 2004-05-24 17:34 ` Arnd Bergmann
1 sibling, 0 replies; 24+ messages in thread
From: Arnd Bergmann @ 2004-05-24 17:34 UTC (permalink / raw)
To: Jakub Jelinek
Cc: Andrew Morton, Ulrich Drepper, linux-kernel, mingo, Martin Schwidefsky
[-- Attachment #1: Type: text/plain, Size: 5806 bytes --]
On Monday 24 May 2004 09:34, Jakub Jelinek wrote:
> My personal preference is:
> 1) change s390* entry*.S so that all syscalls can use up to 6 arguments,
> otherwise we'll have the same problem every now and then
> (last syscall before this one was fadvise64_64 if I remember well)
> 2) allow sys_futex to have 6 arguments (the 6th on the stack, get_user'ed
> from an s390 wrapper)
> 3) special structure passed in 5th argument for FUTEX_CMP_REQUEUE
> 4) your solution
> (you have a special wrapper around futex anyway, so why not to handle
> it there already and create completely new syscall).
I have talked to Martin about it (he won't be online for one more week),
and he proposed a variant of 2): sys_futex, and possibly further new
syscalls that might get added, pass their sixth argument in %r7,
which is easier to get by than the user stack. Unlike doing it in
the standard system call path, this won't cause any overhead for
the other syscalls, but we need some wrappers.
I haven't tested this, because I'm not too familiar with glibc
changes. Jakub, if you are happy with this approach, can you
add the respective glibc stuff and try if this is any good?
Arnd <><
[PATCH] Fix FUTEX_CMP_REQUEUE on s390
From: Arnd Bergmann <arnd@arndb.de>
System calls normally can not have more than five arguments on s390
because of ABI restrictions, but the addition of the FUTEX_CMP_REQUEUE
added a sixth argument to sys_futex. This patch extends the regular
syscall conventions by an extra argument register for sys_futex.
The regular method of passing all arguments on the user stack for six
argument syscalls on s390 does not work here because it would break
the existing usage of futex.
I'm also fixing the sys_futex prototype in <linux/syscalls.h> to make
it possible to call it from s390_futex.
===== arch/s390/kernel/compat_linux.c 1.21 vs edited =====
--- 1.21/arch/s390/kernel/compat_linux.c Sat May 22 06:31:44 2004
+++ edited/arch/s390/kernel/compat_linux.c Mon May 24 18:51:26 2004
@@ -1263,3 +1263,16 @@
return ret;
}
+
+asmlinkage long compat_sys_futex(u32 *uaddr, int op, int val,
+ struct compat_timespec *utime, u32 *uaddr2, int val3);
+
+asmlinkage long
+sys32_futex(u32 __user *uaddr, int op, int val,
+ struct compat_timespec __user *utime, u32 __user *uaddr2)
+{
+ struct pt_regs *regs;
+
+ regs = __KSTK_PTREGS(current);
+ return compat_sys_futex(uaddr, op, val, utime, uaddr2, regs->gprs[7]);
+}
===== arch/s390/kernel/compat_wrapper.S 1.13 vs edited =====
--- 1.13/arch/s390/kernel/compat_wrapper.S Sat May 22 06:31:44 2004
+++ edited/arch/s390/kernel/compat_wrapper.S Mon May 24 19:04:15 2004
@@ -1090,14 +1090,14 @@
llgtr %r3,%r3 # struct stat64 *
jg sys32_fstat64 # branch to system call
- .globl compat_sys_futex_wrapper
-compat_sys_futex_wrapper:
+ .globl sys32_futex_wrapper
+sys32_futex_wrapper:
llgtr %r2,%r2 # u32 *
lgfr %r3,%r3 # int
lgfr %r4,%r4 # int
llgtr %r5,%r5 # struct compat_timespec *
llgtr %r6,%r6 # u32 *
- jg compat_sys_futex # branch to system call
+ jg sys32_futex # branch to system call
.globl sys32_setxattr_wrapper
sys32_setxattr_wrapper:
===== arch/s390/kernel/sys_s390.c 1.14 vs edited =====
--- 1.14/arch/s390/kernel/sys_s390.c Sat May 22 06:31:44 2004
+++ edited/arch/s390/kernel/sys_s390.c Mon May 24 18:51:26 2004
@@ -265,3 +265,12 @@
return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
}
+asmlinkage long
+s390_futex(u32 __user *uaddr, int op, int val,
+ struct timespec __user *utime, u32 __user *uaddr2)
+{
+ struct pt_regs *regs;
+
+ regs = __KSTK_PTREGS(current);
+ return sys_futex(uaddr, op, val, utime, uaddr2, regs->gprs[7]);
+}
===== arch/s390/kernel/syscalls.S 1.12 vs edited =====
--- 1.12/arch/s390/kernel/syscalls.S Sat May 22 06:34:19 2004
+++ edited/arch/s390/kernel/syscalls.S Mon May 24 19:03:28 2004
@@ -246,7 +246,7 @@
SYSCALL(sys_fremovexattr,sys_fremovexattr,sys32_fremovexattr_wrapper) /* 235 */
SYSCALL(sys_gettid,sys_gettid,sys_gettid)
SYSCALL(sys_tkill,sys_tkill,sys_tkill)
-SYSCALL(sys_futex,sys_futex,compat_sys_futex_wrapper)
+SYSCALL(s390_futex,s390_futex,sys32_futex_wrapper)
SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,sys32_sched_setaffinity_wrapper)
SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrapper) /* 240 */
SYSCALL(sys_tgkill,sys_tgkill,sys_tgkill)
===== include/linux/syscalls.h 1.6 vs edited =====
--- 1.6/include/linux/syscalls.h Sat May 22 06:31:47 2004
+++ edited/include/linux/syscalls.h Mon May 24 18:51:26 2004
@@ -165,7 +165,7 @@
asmlinkage long sys_waitpid(pid_t pid, unsigned int __user *stat_addr, int options);
asmlinkage long sys_set_tid_address(int __user *tidptr);
asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
- struct timespec __user *utime, u32 __user *uaddr2);
+ struct timespec __user *utime, u32 __user *uaddr2, int val3);
asmlinkage long sys_init_module(void __user *umod, unsigned long len,
const char __user *uargs);
===== kernel/fork.c 1.169 vs edited =====
--- 1.169/kernel/fork.c Sat May 22 06:32:19 2004
+++ edited/kernel/fork.c Mon May 24 18:51:26 2004
@@ -516,7 +516,7 @@
* not set up a proper pointer then tough luck.
*/
put_user(0, tidptr);
- sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL);
+ sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
}
}
===== kernel/futex.c 1.40 vs edited =====
--- 1.40/kernel/futex.c Sat May 22 09:03:35 2004
+++ edited/kernel/futex.c Mon May 24 18:51:26 2004
@@ -38,6 +38,7 @@
#include <linux/futex.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
+#include <linux/syscalls.h>
#define FUTEX_HASHBITS 8
[-- Attachment #2: signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* DOCUMENTATION Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-24 8:19 ` Jakub Jelinek
@ 2004-05-28 13:09 ` bert hubert
2004-05-28 14:02 ` Jakub Jelinek
0 siblings, 1 reply; 24+ messages in thread
From: bert hubert @ 2004-05-28 13:09 UTC (permalink / raw)
To: Jakub Jelinek
Cc: Andrew Morton, arnd, drepper, linux-kernel, mingo, schwidefsky
> > It's a bit of a shame that you need to be a rocket scientist to
> > understand the futex syscall interface. Bert, are you still maintaining
> > the manpage? If so, is there enough info here to update it?
>
> The latest futex(2) or futex(4) manpage I saw doesn't mention FUTEX_REQUEUE
> at all.
Now fixed, please see http://ds9a.nl/futex-manpages - but please realise I'm
somewhat out of my depth. Comments welcome.
Futexes have mutated into complicated things, I wonder if this was the last
of the changes needed.
The big change in the manpages is the addition of FUTEX_REQUEUE and
FUTEX_CMP_REQUEUE. Furthermore, I realised that the futex system call does
not return EAGAIN etc, it returns -EAGAIN. I guesstimated that CMP_REQUEUE
will be merged before 2.6.7.
To clarify the overloaded situation wrt the futex syscall, I split it up in
three prototypes.
Ulrich, does/will glibc provide a futex(2) function? Or should people just
call the syscall themselves?
There were also some complaints I did not address futexfs but as far as I
can see, it is a kernel internal matter of no interest to userspace coders?
> Also, any futex man page should probably SEE ALSO Ulrich's futex paper:
> http://people.redhat.com/drepper/futex.pdf
Referenced, thanks!
--
http://www.PowerDNS.com Open source, database driven DNS Software
http://lartc.org Linux Advanced Routing & Traffic Control HOWTO
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: DOCUMENTATION Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-28 13:09 ` DOCUMENTATION " bert hubert
@ 2004-05-28 14:02 ` Jakub Jelinek
2004-05-28 15:39 ` bert hubert
0 siblings, 1 reply; 24+ messages in thread
From: Jakub Jelinek @ 2004-05-28 14:02 UTC (permalink / raw)
To: bert hubert, Andrew Morton, arnd, drepper, linux-kernel, mingo,
schwidefsky
On Fri, May 28, 2004 at 03:09:35PM +0200, bert hubert wrote:
> > > It's a bit of a shame that you need to be a rocket scientist to
> > > understand the futex syscall interface. Bert, are you still maintaining
> > > the manpage? If so, is there enough info here to update it?
> >
> > The latest futex(2) or futex(4) manpage I saw doesn't mention FUTEX_REQUEUE
> > at all.
>
> Now fixed, please see http://ds9a.nl/futex-manpages - but please realise I'm
> somewhat out of my depth. Comments welcome.
>
> Futexes have mutated into complicated things, I wonder if this was the last
> of the changes needed.
>
> The big change in the manpages is the addition of FUTEX_REQUEUE and
> FUTEX_CMP_REQUEUE. Furthermore, I realised that the futex system call does
> not return EAGAIN etc, it returns -EAGAIN. I guesstimated that CMP_REQUEUE
futex behaves like most of the other syscalls, i.e. on error returns -1
and sets errno to the error value, like EAGAIN, EFAULT etc.
This is done in a userland wrapper around the syscall, how the kernel tells
userland an error happened and what errno value should be set is an
architecture specific implementation detail.
The man pages in the 2nd section document the behaviour of the userland
wrappers (see e.g. read(2)), not what the kernel actually returns.
> Ulrich, does/will glibc provide a futex(2) function? Or should people just
> call the syscall themselves?
glibc doesn't provide a futex(2) function, so at least ATM people can use
#include <sys/syscall.h>
syscall (SYS_futex, &futex, FUTEX_xyz, ...) or come up with their own
assembly to invoke the syscall.
Jakub
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: DOCUMENTATION Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-28 14:02 ` Jakub Jelinek
@ 2004-05-28 15:39 ` bert hubert
0 siblings, 0 replies; 24+ messages in thread
From: bert hubert @ 2004-05-28 15:39 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: arnd, drepper, jamie, linux-kernel, mingo, schwidefsky
On Fri, May 28, 2004 at 10:02:34AM -0400, Jakub Jelinek wrote:
> The man pages in the 2nd section document the behaviour of the userland
> wrappers (see e.g. read(2)), not what the kernel actually returns.
Ok - but in the absence of futex(2), is it perhaps better to move this page
to futex(9)? Or merge it into futex(4)?
> > Ulrich, does/will glibc provide a futex(2) function? Or should people just
> > call the syscall themselves?
>
> glibc doesn't provide a futex(2) function, so at least ATM people can use
> #include <sys/syscall.h>
If it will provide one, I'll change the page to be in line with how futex(2)
will probably work, with errno etc.
Regarding the verbiage, I need input about the use of the word 'process', as
I'm not sure if there is a generic word covering both processes and threads.
Jamie suggests 'tasks', but that is kernel lingo and of little use for
userspace developers. Suggestions?
Thanks.
bert
--
http://www.PowerDNS.com Open source, database driven DNS Software
http://lartc.org Linux Advanced Routing & Traffic Control HOWTO
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-05-20 9:38 [PATCH] Add FUTEX_CMP_REQUEUE futex op Jakub Jelinek
2004-05-20 22:52 ` Andrew Morton
2004-05-21 23:05 ` Andrew Morton
@ 2004-05-29 3:13 ` Rusty Russell
2 siblings, 0 replies; 24+ messages in thread
From: Rusty Russell @ 2004-05-29 3:13 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: lkml - Kernel Mailing List, mingo, Andrew Morton
On Thu, 2004-05-20 at 19:38, Jakub Jelinek wrote:
> Hi!
>
> FUTEX_REQUEUE operation has been added to the kernel mainly to improve
> pthread_cond_broadcast which previously used FUTEX_WAKE INT_MAX op.
For the record, I wasn't happy about adding FUTEX_REQUEUE to optimize
for suboptimal apps using the horrid pthreads interface, but I
understand the benchmarking realities.
I'm certainly way less than thrilled to discover that it doesn't work,
and we need to implement FUTEX_CMP_REQUEUE, and of course can't get rid
of FUTEX_REQUEUE.
The base futex concept and interface is simple (although the
implementation w/ the Linux mm subsystem proved interesting in the
corner cases); it's increasingly becoming a horror with these kind of
hacks.
8(
Rusty.
--
Anyone who quotes me in their signature is an idiot -- Rusty Russell
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-06-09 20:04 ` Pete Zaitcev
2004-06-09 22:08 ` Arnd Bergmann
@ 2004-06-11 8:35 ` Martin Schwidefsky
1 sibling, 0 replies; 24+ messages in thread
From: Martin Schwidefsky @ 2004-06-11 8:35 UTC (permalink / raw)
To: Pete Zaitcev; +Cc: linux-kernel, zaitcev
> > --- linux-2.6/arch/s390/kernel/compat_wrapper.S Mon Jun 7
16:07:24 2004
> > +++ linux-2.6-s390/arch/s390/kernel/compat_wrapper.S Mon Jun
7 16:07:53 2004
> > @@ -1097,6 +1097,8 @@
> > lgfr %r4,%r4 # int
> > llgtr %r5,%r5 #
struct compat_timespec *
> > llgtr %r6,%r6 #
u32 *
> > + lgf %r0,164(%r15) # int
> > + stg %r0,160(%r15)
> > jg compat_sys_futex # branch to system call
> >
> > .globl sys32_setxattr_wrapper
>
> Is it just me, or this could he above stand a use of STACK_FRAME_OVERHEAD
> instead of 160? I envision a time when Ulrich Weigand comes out with
> a gcc -fkernel, and at that time we'll need all such references
> configurable.
No, the offset of the arguments > 5 on the stack is 96 bytes on 31 bit and
160 bytes on 64 bit, period. This won't change because it is ABI relevant.
> > diff -urN linux-2.6/include/asm-s390/ptrace.h linux-2.6-s390/include/asm-s390/ptrace.h
> > --- linux-2.6/include/asm-s390/ptrace.h Mon May 10 04:32:54 2004
> > +++ linux-2.6-s390/include/asm-s390/ptrace.h Mon Jun 7 16:07:53 2004
> > @@ -303,6 +303,7 @@
> > */
> > struct pt_regs
> > {
> > + unsigned long args[1];
> > psw_t psw;
>
> This worries me, together with
> (__u32*)((addr_t) &__KSTK_PTREGS(child)->psw
>
> Why not to place the necessary word outside of the struct?
> It just logically doesn't belong. Might be just as easy to
> do that mvc to other place.
Well my first implementation had the arguments outside the pt_regs. But
I came to the conclusion that this is wrong. First of all pt_regs is
supposed to describe what is put on the stack during a system call and
this includes the additional parameter. The second, more important
thing is that without a structure describing exactly what's put on the
stack it is impossible for lcrash or crash to decode the system entry
stack frame. This is something we definitly want to have.
I though about the implications of the pt_regs changes for some time.
Since the pt_regs structure is solely used to put the register on the
kernel stack I don't see a problem. Every other structure is independent
of it (at least in the 2.6 kernels).
blue skies,
Martin
Linux/390 Design & Development, IBM Deutschland Entwicklung GmbH
Schönaicherstr. 220, D-71032 Böblingen, Telefon: 49 - (0)7031 - 16-2247
E-Mail: schwidefsky@de.ibm.com
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
2004-06-09 20:04 ` Pete Zaitcev
@ 2004-06-09 22:08 ` Arnd Bergmann
2004-06-11 8:35 ` Martin Schwidefsky
1 sibling, 0 replies; 24+ messages in thread
From: Arnd Bergmann @ 2004-06-09 22:08 UTC (permalink / raw)
To: Pete Zaitcev; +Cc: Martin Schwidefsky, linux-kernel
On Wednesday 09 June 2004 22:04, Pete Zaitcev wrote:
> Is it just me, or this could he above stand a use of STACK_FRAME_OVERHEAD
> instead of 160? I envision a time when Ulrich Weigand comes out with
> a gcc -fkernel, and at that time we'll need all such references
> configurable.
It wouldn't hurt, but even if we get -mkernel support in gcc, that doesn't
mean that the stack frame size has to change: You can easily have %r15
point to 160 bytes above the register save area without actually using all
that space for saving registers. The only thing that would need to change is
the location of the backchain pointer.
> Why not to place the necessary word outside of the struct?
> It just logically doesn't belong. Might be just as easy to
> do that mvc to other place.
That actually was what Martin tried in his first implementation (well, the
last one before the one he submitted). It didn't work out because some code
relied on the stack starting right after pt_regs. Martin can probably clarify
that on Friday.
Arnd <><
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
[not found] <mailman.1086629984.12568.linux-kernel2news@redhat.com>
@ 2004-06-09 20:04 ` Pete Zaitcev
2004-06-09 22:08 ` Arnd Bergmann
2004-06-11 8:35 ` Martin Schwidefsky
0 siblings, 2 replies; 24+ messages in thread
From: Pete Zaitcev @ 2004-06-09 20:04 UTC (permalink / raw)
To: Martin Schwidefsky; +Cc: linux-kernel, zaitcev
On Mon, 7 Jun 2004 18:03:49 +0200
Martin Schwidefsky <schwidefsky@de.ibm.com> wrote:
> --- linux-2.6/arch/s390/kernel/compat_wrapper.S Mon Jun 7 16:07:24 2004
> +++ linux-2.6-s390/arch/s390/kernel/compat_wrapper.S Mon Jun 7 16:07:53 2004
> @@ -1097,6 +1097,8 @@
> lgfr %r4,%r4 # int
> llgtr %r5,%r5 # struct compat_timespec *
> llgtr %r6,%r6 # u32 *
> + lgf %r0,164(%r15) # int
> + stg %r0,160(%r15)
> jg compat_sys_futex # branch to system call
>
> .globl sys32_setxattr_wrapper
Is it just me, or this could he above stand a use of STACK_FRAME_OVERHEAD
instead of 160? I envision a time when Ulrich Weigand comes out with
a gcc -fkernel, and at that time we'll need all such references
configurable.
> diff -urN linux-2.6/include/asm-s390/ptrace.h linux-2.6-s390/include/asm-s390/ptrace.h
> --- linux-2.6/include/asm-s390/ptrace.h Mon May 10 04:32:54 2004
> +++ linux-2.6-s390/include/asm-s390/ptrace.h Mon Jun 7 16:07:53 2004
> @@ -303,6 +303,7 @@
> */
> struct pt_regs
> {
> + unsigned long args[1];
> psw_t psw;
This worries me, together with
(__u32*)((addr_t) &__KSTK_PTREGS(child)->psw
Why not to place the necessary word outside of the struct?
It just logically doesn't belong. Might be just as easy to
do that mvc to other place.
I think I'll try to scope such an implemenation.
-- Pete
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] Add FUTEX_CMP_REQUEUE futex op
@ 2004-06-07 16:03 Martin Schwidefsky
0 siblings, 0 replies; 24+ messages in thread
From: Martin Schwidefsky @ 2004-06-07 16:03 UTC (permalink / raw)
To: arnd; +Cc: jakub, akpm, drepper, linux-kernel, mingo
> > My personal preference is:
> > 1) change s390* entry*.S so that all syscalls can use up to 6 arguments,
> > otherwise we'll have the same problem every now and then
> > (last syscall before this one was fadvise64_64 if I remember well)
> > 2) allow sys_futex to have 6 arguments (the 6th on the stack, get_user'ed
> > from an s390 wrapper)
> > 3) special structure passed in 5th argument for FUTEX_CMP_REQUEUE
> > 4) your solution
> > (you have a special wrapper around futex anyway, so why not to handle
> > it there already and create completely new syscall).
>
> I have talked to Martin about it (he won't be online for one more week),
> and he proposed a variant of 2): sys_futex, and possibly further new
> syscalls that might get added, pass their sixth argument in %r7,
> which is easier to get by than the user stack. Unlike doing it in
> the standard system call path, this won't cause any overhead for
> the other syscalls, but we need some wrappers.
I prefer to store the additional argument in %r7 for ALL system calls
to avoid addtitional system call wrappers. That way new system calls
with 6 arguments can be added without problems. The store for the
additional 6th argument can be hidden in a agi slot so this is pretty
much the perfect solution.
blue skies,
Martin.
---
[PATCH] s390: add support for 6 system call arguments (FUTEX_CMP_REQUEUE).
This patch adds support for 6 system call arguments on s390. The
first exploiter of this will be the sys_futex system call for the
FUTEX_CMP_REQUEUE operation. The idea is simple: use register %r7
for the 6th argument. This can be extended to 7/8/9/... arguments
if there ever will be the need for it. To call the system call
function in the kernel the additional arguments needs to get stored
on the stack. 8 bytes are added to the head of struct pt_regs. %r7
is stored to the additional field for all system calls. The store
is hidden in a address-generation-interlock slot, it doesn't slow
down the system call path.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diffstat:
arch/s390/kernel/asm-offsets.c | 1 +
arch/s390/kernel/compat_wrapper.S | 2 ++
arch/s390/kernel/entry.S | 10 +++++++---
arch/s390/kernel/entry64.S | 10 +++++++---
arch/s390/kernel/ptrace.c | 10 +++++-----
include/asm-s390/ptrace.h | 1 +
6 files changed, 23 insertions(+), 11 deletions(-)
diff -urN linux-2.6/arch/s390/kernel/asm-offsets.c linux-2.6-s390/arch/s390/kernel/asm-offsets.c
--- linux-2.6/arch/s390/kernel/asm-offsets.c Mon May 10 04:32:26 2004
+++ linux-2.6-s390/arch/s390/kernel/asm-offsets.c Mon Jun 7 16:10:03 2004
@@ -32,6 +32,7 @@
DEFINE(__TI_cpu, offsetof(struct thread_info, cpu),);
DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count),);
BLANK();
+ DEFINE(__PT_ARGS, offsetof(struct pt_regs, args),);
DEFINE(__PT_PSW, offsetof(struct pt_regs, psw),);
DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs),);
DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2),);
diff -urN linux-2.6/arch/s390/kernel/compat_wrapper.S linux-2.6-s390/arch/s390/kernel/compat_wrapper.S
--- linux-2.6/arch/s390/kernel/compat_wrapper.S Mon Jun 7 16:07:24 2004
+++ linux-2.6-s390/arch/s390/kernel/compat_wrapper.S Mon Jun 7 16:07:53 2004
@@ -1097,6 +1097,8 @@
lgfr %r4,%r4 # int
llgtr %r5,%r5 # struct compat_timespec *
llgtr %r6,%r6 # u32 *
+ lgf %r0,164(%r15) # int
+ stg %r0,160(%r15)
jg compat_sys_futex # branch to system call
.globl sys32_setxattr_wrapper
diff -urN linux-2.6/arch/s390/kernel/entry.S linux-2.6-s390/arch/s390/kernel/entry.S
--- linux-2.6/arch/s390/kernel/entry.S Mon Jun 7 16:07:24 2004
+++ linux-2.6-s390/arch/s390/kernel/entry.S Mon Jun 7 16:11:55 2004
@@ -24,7 +24,8 @@
* Stack layout for the system_call stack entry.
* The first few entries are identical to the user_regs_struct.
*/
-SP_PTREGS = STACK_FRAME_OVERHEAD
+SP_PTREGS = STACK_FRAME_OVERHEAD
+SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS
SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW
SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS
SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 4
@@ -230,12 +231,14 @@
sysc_enter:
GET_THREAD_INFO # load pointer to task_struct to R9
sla %r7,2 # *4 and test for svc 0
- bnz BASED(sysc_do_restart) # svc number > 0
+ bnz BASED(sysc_nr_ok) # svc number > 0
# svc 0: system call number in %r1
cl %r1,BASED(.Lnr_syscalls)
- bnl BASED(sysc_do_restart)
+ bnl BASED(sysc_nr_ok)
lr %r7,%r1 # copy svc number to %r7
sla %r7,2 # *4
+sysc_nr_ok:
+ mvc SP_ARGS(4,%r15),SP_R7(%r15)
sysc_do_restart:
tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
l %r8,sys_call_table-system_call(%r7,%r13) # get system call addr.
@@ -510,6 +513,7 @@
lr %r7,%r1 # copy svc number to %r7
sla %r7,2 # *4
pgm_svcstd:
+ mvc SP_ARGS(4,%r15),SP_R7(%r15)
tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
l %r8,sys_call_table-system_call(%r7,%r13) # get system call addr.
bnz BASED(pgm_tracesys)
diff -urN linux-2.6/arch/s390/kernel/entry64.S linux-2.6-s390/arch/s390/kernel/entry64.S
--- linux-2.6/arch/s390/kernel/entry64.S Mon Jun 7 16:07:24 2004
+++ linux-2.6-s390/arch/s390/kernel/entry64.S Mon Jun 7 16:13:17 2004
@@ -24,7 +24,8 @@
* Stack layout for the system_call stack entry.
* The first few entries are identical to the user_regs_struct.
*/
-SP_PTREGS = STACK_FRAME_OVERHEAD
+SP_PTREGS = STACK_FRAME_OVERHEAD
+SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS
SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW
SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS
SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8
@@ -214,13 +215,15 @@
sysc_enter:
GET_THREAD_INFO # load pointer to task_struct to R9
slag %r7,%r7,2 # *4 and test for svc 0
- jnz sysc_do_restart
+ jnz sysc_nr_ok
# svc 0: system call number in %r1
lghi %r0,NR_syscalls
clr %r1,%r0
- jnl sysc_do_restart
+ jnl sysc_nr_ok
lgfr %r7,%r1 # clear high word in r1
slag %r7,%r7,2 # svc 0: system call number in %r1
+sysc_nr_ok:
+ mvc SP_ARGS(8,%r15),SP_R7(%r15)
sysc_do_restart:
larl %r10,sys_call_table
#ifdef CONFIG_S390_SUPPORT
@@ -542,6 +545,7 @@
clr %r1,%r0
slag %r7,%r1,2
pgm_svcstd:
+ mvc SP_ARGS(8,%r15),SP_R7(%r15)
larl %r10,sys_call_table
#ifdef CONFIG_S390_SUPPORT
tm SP_PSW+3(%r15),0x01 # are we running in 31 bit mode ?
diff -urN linux-2.6/arch/s390/kernel/ptrace.c linux-2.6-s390/arch/s390/kernel/ptrace.c
--- linux-2.6/arch/s390/kernel/ptrace.c Mon May 10 04:33:19 2004
+++ linux-2.6-s390/arch/s390/kernel/ptrace.c Mon Jun 7 16:07:53 2004
@@ -141,7 +141,7 @@
/*
* psw and gprs are stored on the stack
*/
- tmp = *(addr_t *)((addr_t) __KSTK_PTREGS(child) + addr);
+ tmp = *(addr_t *)((addr_t) &__KSTK_PTREGS(child)->psw + addr);
if (addr == (addr_t) &dummy->regs.psw.mask)
/* Remove per bit from user psw. */
tmp &= ~PSW_MASK_PER;
@@ -215,7 +215,7 @@
high order bit but older gdb's rely on it */
data |= PSW_ADDR_AMODE;
#endif
- *(addr_t *)((addr_t) __KSTK_PTREGS(child) + addr) = data;
+ *(addr_t *)((addr_t) &__KSTK_PTREGS(child)->psw + addr) = data;
} else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
/*
@@ -360,7 +360,7 @@
PSW32_ADDR_AMODE31;
} else {
/* gpr 0-15 */
- tmp = *(__u32 *)((addr_t) __KSTK_PTREGS(child) +
+ tmp = *(__u32 *)((addr_t) &__KSTK_PTREGS(child)->psw +
addr*2 + 4);
}
} else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
@@ -439,8 +439,8 @@
(__u64) tmp & PSW32_ADDR_INSN;
} else {
/* gpr 0-15 */
- *(__u32*)((addr_t) __KSTK_PTREGS(child) + addr*2 + 4) =
- tmp;
+ *(__u32*)((addr_t) &__KSTK_PTREGS(child)->psw
+ + addr*2 + 4) = tmp;
}
} else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
/*
diff -urN linux-2.6/include/asm-s390/ptrace.h linux-2.6-s390/include/asm-s390/ptrace.h
--- linux-2.6/include/asm-s390/ptrace.h Mon May 10 04:32:54 2004
+++ linux-2.6-s390/include/asm-s390/ptrace.h Mon Jun 7 16:07:53 2004
@@ -303,6 +303,7 @@
*/
struct pt_regs
{
+ unsigned long args[1];
psw_t psw;
unsigned long gprs[NUM_GPRS];
unsigned long orig_gpr2;
^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2004-06-11 8:35 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-05-20 9:38 [PATCH] Add FUTEX_CMP_REQUEUE futex op Jakub Jelinek
2004-05-20 22:52 ` Andrew Morton
2004-05-21 6:06 ` Ulrich Drepper
2004-05-21 6:36 ` Andrew Morton
2004-05-21 7:15 ` Ulrich Drepper
2004-05-21 7:43 ` Jakub Jelinek
2004-05-22 16:58 ` Arnd Bergmann
2004-05-24 7:34 ` Jakub Jelinek
2004-05-24 8:12 ` Andrew Morton
2004-05-24 8:19 ` Jakub Jelinek
2004-05-28 13:09 ` DOCUMENTATION " bert hubert
2004-05-28 14:02 ` Jakub Jelinek
2004-05-28 15:39 ` bert hubert
2004-05-24 8:27 ` bert hubert
2004-05-24 17:34 ` Arnd Bergmann
2004-05-21 7:05 ` Ingo Molnar
2004-05-21 23:05 ` Andrew Morton
2004-05-22 10:10 ` Ingo Oeser
2004-05-23 17:33 ` Jakub Jelinek
2004-05-29 3:13 ` Rusty Russell
2004-06-07 16:03 Martin Schwidefsky
[not found] <mailman.1086629984.12568.linux-kernel2news@redhat.com>
2004-06-09 20:04 ` Pete Zaitcev
2004-06-09 22:08 ` Arnd Bergmann
2004-06-11 8:35 ` Martin Schwidefsky
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).