LKML Archive on lore.kernel.org help / color / mirror / Atom feed
* [PATCH 4/16] LTTng 0.6.36 for 2.6.18 : atomic UP operations on SMP @ 2006-11-24 21:55 Mathieu Desnoyers 2006-11-27 16:56 ` Christoph Hellwig 0 siblings, 1 reply; 13+ messages in thread From: Mathieu Desnoyers @ 2006-11-24 21:55 UTC (permalink / raw) To: linux-kernel, Christoph Hellwig, Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Thomas Gleixner, Tom Zanussi, Karim Yaghmour, Paul Mundt, Jes Sorensen, Richard J Moore, Martin J. Bligh, Michel Dagenais, Douglas Niehaus, ltt-dev, systemtap This patch adds a UP flavor of SMP operations which is intended to provide atomic modification of per-cpu data without suffering from the LOCK of memory barrier performance cost. Note that extreme care must be taken when accessing this data from different CPUs : smp_wmb() and smp_rmb() must be used explicitely. As this last scenario happens very rarely in LTTng, it provides an interesting performance gain. Some tests to see the speedup given by using atomic-up.h on per cpu variables. Non LOCKed atomic ops that I now use on SMP : A test ran on a 3GHz Pentium 4 shows that (20000 loops) : irq save/restore pair 210.60 ns cmpxchg 49.46 ns (76 % speedup) cmpxchg-up (no lock prefix) 9.00 ns (95 % speedup) On my 3GHz Pentium 4, it takes 255.83ns to log a 4 bytes event when the LOCK prefix is used (without atomic-up). When I enable my modified version, it drops to 205.63ns. Therefore, the speedup is : (205.63-255.83)/255.83 * 100% = -19.62 % (Test : 3*20000 loops of 4 bytes event log in flight recorder mode) patch04-2.6.18-lttng-core-0.6.36-atomic_up.diff Signed-off-by : Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> --BEGIN-- --- /dev/null +++ b/include/asm-i386/atomic-up.h @@ -0,0 +1,229 @@ +#ifndef __ARCH_I386_ATOMIC_UP__ +#define __ARCH_I386_ATOMIC_UP__ + +#include <linux/compiler.h> +#include <asm/processor.h> +#include <asm/atomic.h> + +/* + * atomic_up variants insure operation atomicity only if the variable is not + * shared between cpus. This is useful to have per-cpu atomic operations to + * protect from contexts like non-maskable interrupts without the LOCK prefix + * performance cost. + */ + +/** + * atomic_up_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ +static __inline__ void atomic_up_add(int i, atomic_t *v) +{ + __asm__ __volatile__( + "addl %1,%0" + :"+m" (v->counter) + :"ir" (i)); +} + +/** + * atomic_up_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +static __inline__ void atomic_up_sub(int i, atomic_t *v) +{ + __asm__ __volatile__( + "subl %1,%0" + :"+m" (v->counter) + :"ir" (i)); +} + +/** + * atomic_up_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static __inline__ int atomic_up_sub_and_test(int i, atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "subl %2,%0; sete %1" + :"+m" (v->counter), "=qm" (c) + :"ir" (i) : "memory"); + return c; +} + +/** + * atomic_up_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +static __inline__ void atomic_up_inc(atomic_t *v) +{ + __asm__ __volatile__( + "incl %0" + :"+m" (v->counter)); +} + +/** + * atomic_up_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +static __inline__ void atomic_up_dec(atomic_t *v) +{ + __asm__ __volatile__( + "decl %0" + :"+m" (v->counter)); +} + +/** + * atomic_up_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static __inline__ int atomic_up_dec_and_test(atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "decl %0; sete %1" + :"+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * atomic_up_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static __inline__ int atomic_up_inc_and_test(atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "incl %0; sete %1" + :"+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * atomic_up_add_negative - add and test if negative + * @v: pointer of type atomic_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static __inline__ int atomic_up_add_negative(int i, atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "addl %2,%0; sets %1" + :"+m" (v->counter), "=qm" (c) + :"ir" (i) : "memory"); + return c; +} + +/** + * atomic_up_add_return - add and return + * @v: pointer of type atomic_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns @i + @v + */ +static __inline__ int atomic_up_add_return(int i, atomic_t *v) +{ + int __i; +#ifdef CONFIG_M386 + unsigned long flags; + if(unlikely(boot_cpu_data.x86==3)) + goto no_xadd; +#endif + /* Modern 486+ processor */ + __i = i; + __asm__ __volatile__( + "xaddl %0, %1;" + :"=r"(i) + :"m"(v->counter), "0"(i)); + return i + __i; + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + local_irq_save(flags); + __i = atomic_up_read(v); + atomic_up_set(v, i + __i); + local_irq_restore(flags); + return i + __i; +#endif +} + +static __inline__ int atomic_up_sub_return(int i, atomic_t *v) +{ + return atomic_up_add_return(-i,v); +} + +#define atomic_up_cmpxchg(v, old, new) ((int)cmpxchg_up(&((v)->counter), \ + old, new)) +/* xchg always has a LOCK prefix */ +#define atomic_up_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * atomic_up_add_unless - add unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define atomic_up_add_unless(v, a, u) \ +({ \ + int c, old; \ + c = atomic_read(v); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic_up_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define atomic_up_inc_not_zero(v) atomic_up_add_unless((v), 1, 0) + +#define atomic_up_inc_return(v) (atomic_up_add_return(1,v)) +#define atomic_up_dec_return(v) (atomic_up_sub_return(1,v)) + +/* These are x86-specific, used by some header files */ +#define atomic_up_clear_mask(mask, addr) \ +__asm__ __volatile__("andl %0,%1" \ +: : "r" (~(mask)),"m" (*addr) : "memory") + +#define atomic_up_set_mask(mask, addr) \ +__asm__ __volatile__("orl %0,%1" \ +: : "r" (mask),"m" (*(addr)) : "memory") + +#endif --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -267,6 +267,9 @@ #define __HAVE_ARCH_CMPXCHG 1 #define cmpxchg(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ (unsigned long)(n),sizeof(*(ptr)))) +#define cmpxchg_up(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg_up((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) #endif static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, @@ -296,6 +299,33 @@ static inline unsigned long __cmpxchg(vo return old; } +static inline unsigned long __cmpxchg_up(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + #ifndef CONFIG_X86_CMPXCHG /* * Building a kernel capable running on 80386. It may be necessary to @@ -332,6 +362,17 @@ ({ \ (unsigned long)(n), sizeof(*(ptr))); \ __ret; \ }) +#define cmpxchg_up(ptr,o,n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 3)) \ + __ret = __cmpxchg_up((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + else \ + __ret = cmpxchg_386((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ +}) #endif #ifdef CONFIG_X86_CMPXCHG64 @@ -350,10 +391,26 @@ static inline unsigned long long __cmpxc return prev; } +static inline unsigned long long __cmpxchg64_up(volatile void *ptr, unsigned long long old, + unsigned long long new) +{ + unsigned long long prev; + __asm__ __volatile__("cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), + "m"(*__xg(ptr)), + "0"(old) + : "memory"); + return prev; +} + #define cmpxchg64(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\ (unsigned long long)(n))) - +#define cmpxchg64_up(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg64_up((ptr),(unsigned long long)(o),\ + (unsigned long long)(n))) #endif /* --- /dev/null +++ b/include/asm-x86_64/atomic-up.h @@ -0,0 +1,375 @@ +#ifndef __ARCH_X86_64_ATOMIC_UP__ +#define __ARCH_X86_64_ATOMIC_UP__ + +#include <asm/alternative.h> +#include <asm/atomic.h> + +/* + * atomic_up variants insure operation atomicity only if the variable is not + * shared between cpus. This is useful to have per-cpu atomic operations to + * protect from contexts like non-maskable interrupts without the LOCK prefix + * performance cost. + */ + +/** + * atomic_up_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ +static __inline__ void atomic_up_add(int i, atomic_t *v) +{ + __asm__ __volatile__( + "addl %1,%0" + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); +} + +/** + * atomic_up_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +static __inline__ void atomic_up_sub(int i, atomic_t *v) +{ + __asm__ __volatile__( + "subl %1,%0" + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); +} + +/** + * atomic_up_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static __inline__ int atomic_up_sub_and_test(int i, atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "subl %2,%0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic_up_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +static __inline__ void atomic_up_inc(atomic_t *v) +{ + __asm__ __volatile__( + "incl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + +/** + * atomic_up_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +static __inline__ void atomic_up_dec(atomic_t *v) +{ + __asm__ __volatile__( + "decl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + +/** + * atomic_up_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static __inline__ int atomic_up_dec_and_test(atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "decl %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic_up_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static __inline__ int atomic_up_inc_and_test(atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "incl %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic_up_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static __inline__ int atomic_up_add_negative(int i, atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "addl %2,%0; sets %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic_up_add_return - add and return + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static __inline__ int atomic_up_add_return(int i, atomic_t *v) +{ + int __i = i; + __asm__ __volatile__( + "xaddl %0, %1;" + :"=r"(i) + :"m"(v->counter), "0"(i)); + return i + __i; +} + +static __inline__ int atomic_up_sub_return(int i, atomic_t *v) +{ + return atomic_up_add_return(-i,v); +} + +#define atomic_up_inc_return(v) (atomic_up_add_return(1,v)) +#define atomic_up_dec_return(v) (atomic_up_sub_return(1,v)) + +/** + * atomic64_up_add - add integer to atomic64 variable + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v. + */ +static __inline__ void atomic64_up_add(long i, atomic64_t *v) +{ + __asm__ __volatile__( + "addq %1,%0" + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); +} + +/** + * atomic64_up_sub - subtract the atomic64 variable + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v. + */ +static __inline__ void atomic64_up_sub(long i, atomic64_t *v) +{ + __asm__ __volatile__( + "subq %1,%0" + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); +} + +/** + * atomic64_up_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static __inline__ int atomic64_up_sub_and_test(long i, atomic64_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "subq %2,%0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic64_up_inc - increment atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1. + */ +static __inline__ void atomic64_up_inc(atomic64_t *v) +{ + __asm__ __volatile__( + "incq %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + +/** + * atomic64_up_dec - decrement atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1. + */ +static __inline__ void atomic64_up_dec(atomic64_t *v) +{ + __asm__ __volatile__( + "decq %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + +/** + * atomic64_up_dec_and_test - decrement and test + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static __inline__ int atomic64_up_dec_and_test(atomic64_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "decq %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic64_up_inc_and_test - increment and test + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static __inline__ int atomic64_up_inc_and_test(atomic64_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "incq %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic64_up_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static __inline__ int atomic64_up_add_negative(long i, atomic64_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "addq %2,%0; sets %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic64_up_add_return - add and return + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static __inline__ long atomic64_up_add_return(long i, atomic64_t *v) +{ + long __i = i; + __asm__ __volatile__( + "xaddq %0, %1;" + :"=r"(i) + :"m"(v->counter), "0"(i)); + return i + __i; +} + +static __inline__ long atomic64_up_sub_return(long i, atomic64_t *v) +{ + return atomic64_up_add_return(-i,v); +} + +#define atomic64_up_inc_return(v) (atomic64_up_add_return(1,v)) +#define atomic64_up_dec_return(v) (atomic64_up_sub_return(1,v)) + +#define atomic_up_cmpxchg(v, old, new) ((int)cmpxchg_up(&((v)->counter), \ + old, new)) +/* Always has a lock prefix anyway */ +#define atomic_up_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * atomic_up_add_unless - add unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define atomic_up_add_unless(v, a, u) \ +({ \ + int c, old; \ + c = atomic_read(v); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic_up_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define atomic_up_inc_not_zero(v) atomic_up_add_unless((v), 1, 0) + +/* These are x86-specific, used by some header files */ +#define atomic_up_clear_mask(mask, addr) \ +__asm__ __volatile__("andl %0,%1" \ +: : "r" (~(mask)),"m" (*addr) : "memory") + +#define atomic_up_set_mask(mask, addr) \ +__asm__ __volatile__("orl %0,%1" \ +: : "r" ((unsigned)mask),"m" (*(addr)) : "memory") + +#endif --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -208,9 +208,45 @@ static inline unsigned long __cmpxchg(vo return old; } +static inline unsigned long __cmpxchg_up(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("cmpxchgl %k1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 8: + __asm__ __volatile__("cmpxchgq %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + #define cmpxchg(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ (unsigned long)(n),sizeof(*(ptr)))) +#define cmpxchg_up(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) #ifdef CONFIG_SMP #define smp_mb() mb() --- /dev/null +++ b/include/asm-powerpc/atomic-up.h @@ -0,0 +1,380 @@ +#ifndef _ASM_POWERPC_ATOMIC_UP_H_ +#define _ASM_POWERPC_ATOMIC_UP_H_ + +#ifdef __KERNEL__ +#include <linux/compiler.h> +#include <asm/synch.h> +#include <asm/asm-compat.h> +#include <asm/atomic.h> + +/* + * atomic_up variants insure operation atomicity only if the variable is not + * shared between cpus. This is useful to have per-cpu atomic operations to + * protect from contexts like non-maskable interrupts without the LOCK prefix + * performance cost. + */ + +static __inline__ void atomic_up_add(int a, atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%3 # atomic_up_add\n\ + add %0,%2,%0\n" + PPC405_ERR77(0,%3) +" stwcx. %0,0,%3 \n\ + bne- 1b" + : "=&r" (t), "+m" (v->counter) + : "r" (a), "r" (&v->counter) + : "cc"); +} + +static __inline__ int atomic_up_add_return(int a, atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # atomic_up_add_return\n\ + add %0,%1,%0\n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define atomic_up_add_negative(a, v) (atomic_up_add_return((a), (v)) < 0) + +static __inline__ void atomic_up_sub(int a, atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%3 # atomic_up_sub\n\ + subf %0,%2,%0\n" + PPC405_ERR77(0,%3) +" stwcx. %0,0,%3 \n\ + bne- 1b" + : "=&r" (t), "+m" (v->counter) + : "r" (a), "r" (&v->counter) + : "cc"); +} + +static __inline__ int atomic_up_sub_return(int a, atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # atomic_up_sub_return\n\ + subf %0,%1,%0\n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +static __inline__ void atomic_up_inc(atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # atomic_up_inc\n\ + addic %0,%0,1\n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t), "+m" (v->counter) + : "r" (&v->counter) + : "cc"); +} + +static __inline__ int atomic_up_inc_return(atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # atomic_up_inc_return\n\ + addic %0,%0,1\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1 \n\ + bne- 1b" + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +/* + * atomic_up_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define atomic_up_inc_and_test(v) (atomic_up_inc_return(v) == 0) + +static __inline__ void atomic_up_dec(atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # atomic_up_dec\n\ + addic %0,%0,-1\n" + PPC405_ERR77(0,%2)\ +" stwcx. %0,0,%2\n\ + bne- 1b" + : "=&r" (t), "+m" (v->counter) + : "r" (&v->counter) + : "cc"); +} + +static __inline__ int atomic_up_dec_return(atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # atomic_up_dec_return\n\ + addic %0,%0,-1\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1\n\ + bne- 1b" + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define atomic_up_cmpxchg(v, o, n) ((int)cmpxchg_up(&((v)->counter), (o), (n))) +#define atomic_up_xchg(v, new) (xchg_up(&((v)->counter), new)) + +/** + * atomic_up_add_unless - add unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static __inline__ int atomic_up_add_unless(atomic_t *v, int a, int u) +{ + int t; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%1 # atomic_up_add_unless\n\ + cmpw 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%1 \n\ + bne- 1b \n" +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&v->counter), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define atomic_up_inc_not_zero(v) atomic_up_add_unless((v), 1, 0) + +#define atomic_up_sub_and_test(a, v) (atomic_up_sub_return((a), (v)) == 0) +#define atomic_up_dec_and_test(v) (atomic_up_dec_return((v)) == 0) + +/* + * Atomically test *v and decrement if it is greater than 0. + * The function returns the old value of *v minus 1. + */ +static __inline__ int atomic_up_dec_if_positive(atomic_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # atomic_up_dec_if_positive\n\ + addic. %0,%0,-1\n\ + blt- 2f\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1\n\ + bne- 1b" + "\n\ +2:" : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#ifdef __powerpc64__ + +static __inline__ void atomic64_up_add(long a, atomic64_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%3 # atomic64_up_add\n\ + add %0,%2,%0\n\ + stdcx. %0,0,%3 \n\ + bne- 1b" + : "=&r" (t), "+m" (v->counter) + : "r" (a), "r" (&v->counter) + : "cc"); +} + +static __inline__ long atomic64_up_add_return(long a, atomic64_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 # atomic64_up_add_return\n\ + add %0,%1,%0\n\ + stdcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define atomic64_up_add_negative(a, v) (atomic64_up_add_return((a), (v)) < 0) + +static __inline__ void atomic64_up_sub(long a, atomic64_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%3 # atomic64_up_sub\n\ + subf %0,%2,%0\n\ + stdcx. %0,0,%3 \n\ + bne- 1b" + : "=&r" (t), "+m" (v->counter) + : "r" (a), "r" (&v->counter) + : "cc"); +} + +static __inline__ long atomic64_up_sub_return(long a, atomic64_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 # atomic64_up_sub_return\n\ + subf %0,%1,%0\n\ + stdcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +static __inline__ void atomic64_up_inc(atomic64_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 # atomic64_up_inc\n\ + addic %0,%0,1\n\ + stdcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t), "+m" (v->counter) + : "r" (&v->counter) + : "cc"); +} + +static __inline__ long atomic64_up_inc_return(atomic64_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # atomic64_up_inc_return\n\ + addic %0,%0,1\n\ + stdcx. %0,0,%1 \n\ + bne- 1b" + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +/* + * atomic64_up_inc_and_test - increment and test + * @v: pointer of type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define atomic64_up_inc_and_test(v) (atomic64_up_inc_return(v) == 0) + +static __inline__ void atomic64_up_dec(atomic64_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 # atomic64_up_dec\n\ + addic %0,%0,-1\n\ + stdcx. %0,0,%2\n\ + bne- 1b" + : "=&r" (t), "+m" (v->counter) + : "r" (&v->counter) + : "cc"); +} + +static __inline__ long atomic64_up_dec_return(atomic64_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # atomic64_up_dec_return\n\ + addic %0,%0,-1\n\ + stdcx. %0,0,%1\n\ + bne- 1b" + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define atomic64_up_sub_and_test(a, v) (atomic64_up_sub_return((a), (v)) == 0) +#define atomic64_up_dec_and_test(v) (atomic64_up_dec_return((v)) == 0) + +/* + * Atomically test *v and decrement if it is greater than 0. + * The function returns the old value of *v minus 1. + */ +static __inline__ long atomic64_up_dec_if_positive(atomic64_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # atomic64_up_dec_if_positive\n\ + addic. %0,%0,-1\n\ + blt- 2f\n\ + stdcx. %0,0,%1\n\ + bne- 1b" + "\n\ +2:" : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#endif /* __powerpc64__ */ + +#endif /* __KERNEL__ */ +#endif /* _ASM_POWERPC_ATOMIC_UP_H_ */ --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -235,6 +235,29 @@ __xchg_u32(volatile void *p, unsigned lo return prev; } +/* + * Atomic exchange + * + * Changes the memory location '*ptr' to be val and returns + * the previous value stored there. + */ +static __inline__ unsigned long +__xchg_u32_up(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 \n" + PPC405_ERR77(0,%2) +" stwcx. %3,0,%2 \n\ + bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned int *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __inline__ unsigned long __xchg_u64(volatile void *p, unsigned long val) @@ -254,6 +277,23 @@ __xchg_u64(volatile void *p, unsigned lo return prev; } + +static __inline__ unsigned long +__xchg_u64_up(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 \n" + PPC405_ERR77(0,%2) +" stdcx. %3,0,%2 \n\ + bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned long *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} #endif /* @@ -277,12 +317,32 @@ #endif return x; } +static __inline__ unsigned long +__xchg_up(volatile void *ptr, unsigned long x, unsigned int size) +{ + switch (size) { + case 4: + return __xchg_u32_up(ptr, x); +#ifdef CONFIG_PPC64 + case 8: + return __xchg_u64_up(ptr, x); +#endif + } + __xchg_called_with_bad_pointer(); + return x; +} #define xchg(ptr,x) \ ({ \ __typeof__(*(ptr)) _x_ = (x); \ (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ }) +#define xchg_up(ptr,x) \ + ({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_up((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ + }) + #define tas(ptr) (xchg((ptr),1)) /* @@ -314,6 +374,27 @@ __cmpxchg_u32(volatile unsigned int *p, return prev; } +static __inline__ unsigned long +__cmpxchg_u32_up(volatile unsigned int *p, unsigned long old, unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%2 # __cmpxchg_u32\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n" + PPC405_ERR77(0,%2) +" stwcx. %4,0,%2\n\ + bne- 1b" + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __inline__ unsigned long __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) @@ -336,6 +417,26 @@ __cmpxchg_u64(volatile unsigned long *p, return prev; } + +static __inline__ unsigned long +__cmpxchg_u64_up(volatile unsigned long *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%2 # __cmpxchg_u64\n\ + cmpd 0,%0,%3\n\ + bne- 2f\n\ + stdcx. %4,0,%2\n\ + bne- 1b" + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} #endif /* This function doesn't exist, so you'll get a linker error @@ -358,6 +459,22 @@ #endif return old; } +static __inline__ unsigned long +__cmpxchg_up(volatile void *ptr, unsigned long old, unsigned long new, + unsigned int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32_up(ptr, old, new); +#ifdef CONFIG_PPC64 + case 8: + return __cmpxchg_u64_up(ptr, old, new); +#endif + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + #define cmpxchg(ptr,o,n) \ ({ \ __typeof__(*(ptr)) _o_ = (o); \ @@ -366,6 +483,15 @@ #define cmpxchg(ptr,o,n) \ (unsigned long)_n_, sizeof(*(ptr))); \ }) + +#define cmpxchg_up(ptr,o,n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_up((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ + }) + #ifdef CONFIG_PPC64 /* * We handle most unaligned accesses in hardware. On the other hand --- /dev/null +++ b/include/asm-arm/atomic-up.h @@ -0,0 +1,6 @@ +#ifndef _ASM_ATOMIC_UP_H +#define _ASM_ATOMIC_UP_H + +#include <asm-generic/atomic-up.h> + +#endif --- /dev/null +++ b/include/asm-mips/atomic-up.h @@ -0,0 +1,6 @@ +#ifndef _ASM_ATOMIC_UP_H +#define _ASM_ATOMIC_UP_H + +#include <asm-generic/atomic-up.h> + +#endif --- /dev/null +++ b/include/asm-generic/atomic-up.h @@ -0,0 +1,49 @@ +#ifndef _ASM_GENERIC_ATOMIC_UP_H +#define _ASM_GENERIC_ATOMIC_UP_H + +/* Uniprocessor atomic operations. + * + * The generic version of up-only atomic ops falls back on atomic.h. + */ + +#include <asm/atomic.h> + +#define atomic_up_add atomic_add +#define atomic_up_sub atomic_sub +#define atomic_up_add_return atomic_add_return +#define atomic_up_sub_return atomic_sub_return +#define atomic_up_sub_if_positive atomic_sub_if_positive +#define atomic_up_cmpxchg atomic_cmpxchg +#define atomic_up_xchg atomic_xchg +#define atomic_up_add_unless atomic_add_unless +#define atomic_up_inc_not_zero atomic_inc_not_zero +#define atomic_up_dec_return atomic_dec_return +#define atomic_up_inc_return atomic_inc_return +#define atomic_up_sub_and_test atomic_sub_and_test +#define atomic_up_inc_and_test atomic_inc_and_test +#define atomic_up_dec_and_test atomic_dec_and_test +#define atomic_up_dec_if_positive atomic_dec_if_positive +#define atomic_up_inc atomic_inc +#define atomic_up_dec atomic_dec +#define atomic_up_add_negative atomic_add_negative + +#ifdef CONFIG_64BIT + +#define atomic64_up_add atomic64_add +#define atomic64_up_sub atomic64_sub +#define atomic64_up_add_return atomic64_add_return +#define atomic64_up_sub_return atomic64_sub_return +#define atomic64_up_sub_if_positive atomic64_sub_if_positive +#define atomic64_up_dec_return atomic64_dec_return +#define atomic64_up_inc_return atomic64_inc_return +#define atomic64_up_sub_and_test atomic64_sub_and_test +#define atomic64_up_inc_and_test atomic64_inc_and_test +#define atomic64_up_dec_and_test atomic64_dec_and_test +#define atomic64_up_dec_if_positive atomic64_dec_if_positive +#define atomic64_up_inc atomic64_inc +#define atomic64_up_dec atomic64_dec +#define atomic64_up_add_negative atomic64_add_negative + +#endif /* CONFIG_64BIT */ + +#endif /* _ASM_GENERIC_ATOMIC_UP_H */ --END-- OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 4/16] LTTng 0.6.36 for 2.6.18 : atomic UP operations on SMP 2006-11-24 21:55 [PATCH 4/16] LTTng 0.6.36 for 2.6.18 : atomic UP operations on SMP Mathieu Desnoyers @ 2006-11-27 16:56 ` Christoph Hellwig 2006-12-01 3:11 ` [PATCH 1/2] atomic.h atomic64_t standardization Mathieu Desnoyers 2006-12-01 3:14 ` [PATCH 2/2] local.h modifications Mathieu Desnoyers 0 siblings, 2 replies; 13+ messages in thread From: Christoph Hellwig @ 2006-11-27 16:56 UTC (permalink / raw) To: Mathieu Desnoyers Cc: linux-kernel, Christoph Hellwig, Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Thomas Gleixner, Tom Zanussi, Karim Yaghmour, Paul Mundt, Jes Sorensen, Richard J Moore, Martin J. Bligh, Michel Dagenais, Douglas Niehaus, ltt-dev, systemtap On Fri, Nov 24, 2006 at 04:55:18PM -0500, Mathieu Desnoyers wrote: > This patch adds a UP flavor of SMP operations which is intended to provide > atomic modification of per-cpu data without suffering from the LOCK of memory > barrier performance cost. Note that extreme care must be taken when accessing > this data from different CPUs : smp_wmb() and smp_rmb() must be used > explicitely. As this last scenario happens very rarely in LTTng, it provides a We already have local_t in asm/local.h for this purposed. Unfortunately several architecture implementations are rather suboptimal, but I'm sure the architecture maintainers would be interested in patches to optimize the various implementations. ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 1/2] atomic.h atomic64_t standardization 2006-11-27 16:56 ` Christoph Hellwig @ 2006-12-01 3:11 ` Mathieu Desnoyers 2006-12-01 3:24 ` Mathieu Desnoyers 2006-12-01 3:34 ` [PATCH 1/2] atomic.h atomic64_t standardization Paul Mundt 2006-12-01 3:14 ` [PATCH 2/2] local.h modifications Mathieu Desnoyers 1 sibling, 2 replies; 13+ messages in thread From: Mathieu Desnoyers @ 2006-12-01 3:11 UTC (permalink / raw) To: Christoph Hellwig, linux-kernel, Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Thomas Gleixner, Tom Zanussi, Karim Yaghmour, Paul Mundt, Jes Sorensen, Richard J Moore, Martin J. Bligh, Michel Dagenais, Douglas Niehaus, ltt-dev, systemtap * Christoph Hellwig (hch@infradead.org) wrote: > We already have local_t in asm/local.h for this purposed. Unfortunately > several architecture implementations are rather suboptimal, but I'm sure > the architecture maintainers would be interested in patches to optimize > the various implementations. > Hi Christoph, I just implemented some modifications over i386, x86_64, powerpc, mips and arm implementations (2.6.18) of atomic.h to add the atomic64_cmpxchg primitives. They are required for proper asm-generic/atomic.h atomic_long_t type. This patch also adds missing primitives to asm-generic/atomic.h. local.h modifications for these architectures follows in the next post. Mathieu ---BEGIN--- --- a/include/asm-i386/atomic.h +++ b/include/asm-i386/atomic.h @@ -207,8 +207,9 @@ static __inline__ int atomic_sub_return( return atomic_add_return(-i,v); } -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (old), (new))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) /** * atomic_add_unless - add unless the number is a given value @@ -221,7 +222,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -267,6 +267,9 @@ #define __HAVE_ARCH_CMPXCHG 1 #define cmpxchg(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ (unsigned long)(n),sizeof(*(ptr)))) +#define cmpxchg_local(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) #endif static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, @@ -296,6 +299,33 @@ static inline unsigned long __cmpxchg(vo return old; } +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + #ifndef CONFIG_X86_CMPXCHG /* * Building a kernel capable running on 80386. It may be necessary to @@ -332,6 +362,17 @@ ({ \ (unsigned long)(n), sizeof(*(ptr))); \ __ret; \ }) +#define cmpxchg_local(ptr,o,n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 3)) \ + __ret = __cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + else \ + __ret = cmpxchg_386((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ +}) #endif #ifdef CONFIG_X86_CMPXCHG64 @@ -350,10 +391,26 @@ static inline unsigned long long __cmpxc return prev; } +static inline unsigned long long __cmpxchg64_local(volatile void *ptr, + unsigned long long old, unsigned long long new) +{ + unsigned long long prev; + __asm__ __volatile__("cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), + "m"(*__xg(ptr)), + "0"(old) + : "memory"); + return prev; +} + #define cmpxchg64(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\ (unsigned long long)(n))) - +#define cmpxchg64_local(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg64_local((ptr),(unsigned long long)(o),\ + (unsigned long long)(n))) #endif /* --- a/include/asm-x86_64/atomic.h +++ b/include/asm-x86_64/atomic.h @@ -388,7 +388,12 @@ static __inline__ long atomic64_sub_retu #define atomic64_inc_return(v) (atomic64_add_return(1,v)) #define atomic64_dec_return(v) (atomic64_sub_return(1,v)) -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) +#define atomic64_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) /** @@ -402,7 +407,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ @@ -416,6 +421,9 @@ ({ \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +#define atomic64_add_unless(v, a, u) atomic_add_unless((v), (a), (u)) +#define atomic64_inc_not_zero(v) atomic_inc_not_zero((v)) + /* These are x86-specific, used by some header files */ #define atomic_clear_mask(mask, addr) \ __asm__ __volatile__(LOCK_PREFIX "andl %0,%1" \ --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -208,9 +208,45 @@ static inline unsigned long __cmpxchg(vo return old; } +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("cmpxchgl %k1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 8: + __asm__ __volatile__("cmpxchgq %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + #define cmpxchg(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ (unsigned long)(n),sizeof(*(ptr)))) +#define cmpxchg_local(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) #ifdef CONFIG_SMP #define smp_mb() mb() --- a/include/asm-powerpc/atomic.h +++ b/include/asm-powerpc/atomic.h @@ -165,7 +165,8 @@ static __inline__ int atomic_dec_return( return t; } -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) +#define atomic_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) /** @@ -411,6 +412,44 @@ static __inline__ long atomic64_dec_if_p return t; } +#define atomic64_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static __inline__ int atomic64_add_unless(atomic_t *v, long a, long u) +{ + long t; + + __asm__ __volatile__ ( + LWSYNC_ON_SMP +"1: ldarx %0,0,%1 # atomic_add_unless\n\ + cmpd 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stdcx. %0,0,%1 \n\ + bne- 1b \n" + ISYNC_ON_SMP +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&v->counter), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + #endif /* __powerpc64__ */ #include <asm-generic/atomic.h> --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -235,6 +235,29 @@ __xchg_u32(volatile void *p, unsigned lo return prev; } +/* + * Atomic exchange + * + * Changes the memory location '*ptr' to be val and returns + * the previous value stored there. + */ +static __inline__ unsigned long +__xchg_u32_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 \n" + PPC405_ERR77(0,%2) +" stwcx. %3,0,%2 \n\ + bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned int *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __inline__ unsigned long __xchg_u64(volatile void *p, unsigned long val) @@ -254,6 +277,23 @@ __xchg_u64(volatile void *p, unsigned lo return prev; } + +static __inline__ unsigned long +__xchg_u64_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 \n" + PPC405_ERR77(0,%2) +" stdcx. %3,0,%2 \n\ + bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned long *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} #endif /* @@ -277,12 +317,33 @@ #endif return x; } +static __inline__ unsigned long +__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) +{ + switch (size) { + case 4: + return __xchg_u32_local(ptr, x); +#ifdef CONFIG_PPC64 + case 8: + return __xchg_u64_local(ptr, x); +#endif + } + __xchg_called_with_bad_pointer(); + return x; +} #define xchg(ptr,x) \ ({ \ __typeof__(*(ptr)) _x_ = (x); \ (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ }) +#define xchg_local(ptr,x) \ + ({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_local((ptr), \ + (unsigned long)_x_, sizeof(*(ptr))); \ + }) + #define tas(ptr) (xchg((ptr),1)) /* @@ -314,6 +375,28 @@ __cmpxchg_u32(volatile unsigned int *p, return prev; } +static __inline__ unsigned long +__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, + unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%2 # __cmpxchg_u32\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n" + PPC405_ERR77(0,%2) +" stwcx. %4,0,%2\n\ + bne- 1b" + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __inline__ unsigned long __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) @@ -336,6 +419,27 @@ __cmpxchg_u64(volatile unsigned long *p, return prev; } + +static __inline__ unsigned long +__cmpxchg_u64_local(volatile unsigned long *p, unsigned long old, + unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%2 # __cmpxchg_u64\n\ + cmpd 0,%0,%3\n\ + bne- 2f\n\ + stdcx. %4,0,%2\n\ + bne- 1b" + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} #endif /* This function doesn't exist, so you'll get a linker error @@ -358,6 +462,22 @@ #endif return old; } +static __inline__ unsigned long +__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, + unsigned int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32_local(ptr, old, new); +#ifdef CONFIG_PPC64 + case 8: + return __cmpxchg_u64_local(ptr, old, new); +#endif + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + #define cmpxchg(ptr,o,n) \ ({ \ __typeof__(*(ptr)) _o_ = (o); \ @@ -366,6 +486,15 @@ #define cmpxchg(ptr,o,n) \ (unsigned long)_n_, sizeof(*(ptr))); \ }) + +#define cmpxchg_local(ptr,o,n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ + }) + #ifdef CONFIG_PPC64 /* * We handle most unaligned accesses in hardware. On the other hand --- a/include/asm-arm/atomic.h +++ b/include/asm-arm/atomic.h @@ -185,6 +185,7 @@ static inline int atomic_add_unless(atom c = old; return c != u; } + #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) #define atomic_add(i, v) (void) atomic_add_return(i, v) --- a/include/asm-mips/atomic.h +++ b/include/asm-mips/atomic.h @@ -292,8 +292,9 @@ static __inline__ int atomic_sub_if_posi return result; } -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_cmpxchg(v, o, n) \ + (((__typeof__((v)->counter)))cmpxchg(&((v)->counter), (o), (n))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) /** * atomic_add_unless - add unless the number is a given value @@ -306,7 +307,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ c = old; \ @@ -646,6 +647,29 @@ static __inline__ long atomic64_sub_if_p return result; } +#define atomic64_cmpxchg(v, o, n) \ + (((__typeof__((v)->counter)))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new))) + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = atomic_read(v); \ + while (c != (u) && (old = atomic64_cmpxchg((v), c, c + (a))) != c) \ + c = old; \ + c != (u); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + #define atomic64_dec_return(v) atomic64_sub_return(1,(v)) #define atomic64_inc_return(v) atomic64_add_return(1,(v)) --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -66,6 +66,79 @@ static inline void atomic_long_sub(long atomic64_sub(i, v); } +static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_sub_and_test(i, v); +} + +static inline int atomic_long_dec_and_test(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_dec_and_test(v); +} + +static inline int atomic_long_inc_and_test(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_inc_and_test(v); +} + +static inline int atomic_long_add_negative(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_add_negative(i, v); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_add_return(i, v); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_sub_return(i, v); +} + +static inline long atomic_long_inc_return(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_inc_return(v); +} + +static inline long atomic_long_dec_return(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_dec_return(v); +} + +#if 0 +/* Atomic add unless is only effective on atomic_t on powerpc (at least) */ +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_add_unless(v, a, u); +} + +static inline long atomic_long_inc_not_zero(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_inc_not_zero(v); +} +#endif //0 + #else typedef atomic_t atomic_long_t; @@ -113,5 +186,80 @@ static inline void atomic_long_sub(long atomic_sub(i, v); } +static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_sub_and_test(i, v); +} + +static inline int atomic_long_dec_and_test(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_dec_and_test(v); +} + +static inline int atomic_long_inc_and_test(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_inc_and_test(v); +} + +static inline int atomic_long_add_negative(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_add_negative(i, v); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_add_return(i, v); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_sub_return(i, v); +} + +static inline long atomic_long_inc_return(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_inc_return(v); +} + +static inline long atomic_long_dec_return(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_dec_return(v); +} + +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_add_unless(v, a, u); +} + +static inline long atomic_long_inc_not_zero(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_inc_not_zero(v); +} + #endif + +#define atomic_long_cmpxchg(l, old, new) \ + ((__typeof__((l)->counter))cmpxchg(&((l)->counter), (old), (new))) +#define atomic_long_xchg(l, new) (xchg(&((l)->counter), (new))) + #endif ---END--- OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/2] atomic.h atomic64_t standardization 2006-12-01 3:11 ` [PATCH 1/2] atomic.h atomic64_t standardization Mathieu Desnoyers @ 2006-12-01 3:24 ` Mathieu Desnoyers 2006-12-01 22:19 ` Mathieu Desnoyers 2006-12-01 3:34 ` [PATCH 1/2] atomic.h atomic64_t standardization Paul Mundt 1 sibling, 1 reply; 13+ messages in thread From: Mathieu Desnoyers @ 2006-12-01 3:24 UTC (permalink / raw) To: Christoph Hellwig, linux-kernel, Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Thomas Gleixner, Tom Zanussi, Karim Yaghmour, Paul Mundt, Jes Sorensen, Richard J Moore, Martin J. Bligh, Michel Dagenais, Douglas Niehaus, ltt-dev, systemtap New version, fixes PowerPC typo. --- a/include/asm-i386/atomic.h +++ b/include/asm-i386/atomic.h @@ -207,8 +207,9 @@ static __inline__ int atomic_sub_return( return atomic_add_return(-i,v); } -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (old), (new))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) /** * atomic_add_unless - add unless the number is a given value @@ -221,7 +222,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -267,6 +267,9 @@ #define __HAVE_ARCH_CMPXCHG 1 #define cmpxchg(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ (unsigned long)(n),sizeof(*(ptr)))) +#define cmpxchg_local(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) #endif static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, @@ -296,6 +299,33 @@ static inline unsigned long __cmpxchg(vo return old; } +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + #ifndef CONFIG_X86_CMPXCHG /* * Building a kernel capable running on 80386. It may be necessary to @@ -332,6 +362,17 @@ ({ \ (unsigned long)(n), sizeof(*(ptr))); \ __ret; \ }) +#define cmpxchg_local(ptr,o,n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 3)) \ + __ret = __cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + else \ + __ret = cmpxchg_386((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ +}) #endif #ifdef CONFIG_X86_CMPXCHG64 @@ -350,10 +391,26 @@ static inline unsigned long long __cmpxc return prev; } +static inline unsigned long long __cmpxchg64_local(volatile void *ptr, + unsigned long long old, unsigned long long new) +{ + unsigned long long prev; + __asm__ __volatile__("cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), + "m"(*__xg(ptr)), + "0"(old) + : "memory"); + return prev; +} + #define cmpxchg64(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\ (unsigned long long)(n))) - +#define cmpxchg64_local(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg64_local((ptr),(unsigned long long)(o),\ + (unsigned long long)(n))) #endif /* --- a/include/asm-x86_64/atomic.h +++ b/include/asm-x86_64/atomic.h @@ -388,7 +388,12 @@ static __inline__ long atomic64_sub_retu #define atomic64_inc_return(v) (atomic64_add_return(1,v)) #define atomic64_dec_return(v) (atomic64_sub_return(1,v)) -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) +#define atomic64_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) /** @@ -402,7 +407,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ @@ -416,6 +421,9 @@ ({ \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +#define atomic64_add_unless(v, a, u) atomic_add_unless((v), (a), (u)) +#define atomic64_inc_not_zero(v) atomic_inc_not_zero((v)) + /* These are x86-specific, used by some header files */ #define atomic_clear_mask(mask, addr) \ __asm__ __volatile__(LOCK_PREFIX "andl %0,%1" \ --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -208,9 +208,45 @@ static inline unsigned long __cmpxchg(vo return old; } +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("cmpxchgl %k1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 8: + __asm__ __volatile__("cmpxchgq %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + #define cmpxchg(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ (unsigned long)(n),sizeof(*(ptr)))) +#define cmpxchg_local(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) #ifdef CONFIG_SMP #define smp_mb() mb() --- a/include/asm-powerpc/atomic.h +++ b/include/asm-powerpc/atomic.h @@ -165,7 +165,8 @@ static __inline__ int atomic_dec_return( return t; } -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) +#define atomic_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) /** @@ -411,6 +412,44 @@ static __inline__ long atomic64_dec_if_p return t; } +#define atomic64_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long t; + + __asm__ __volatile__ ( + LWSYNC_ON_SMP +"1: ldarx %0,0,%1 # atomic_add_unless\n\ + cmpd 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stdcx. %0,0,%1 \n\ + bne- 1b \n" + ISYNC_ON_SMP +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&v->counter), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + #endif /* __powerpc64__ */ #include <asm-generic/atomic.h> --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -235,6 +235,29 @@ __xchg_u32(volatile void *p, unsigned lo return prev; } +/* + * Atomic exchange + * + * Changes the memory location '*ptr' to be val and returns + * the previous value stored there. + */ +static __inline__ unsigned long +__xchg_u32_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 \n" + PPC405_ERR77(0,%2) +" stwcx. %3,0,%2 \n\ + bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned int *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __inline__ unsigned long __xchg_u64(volatile void *p, unsigned long val) @@ -254,6 +277,23 @@ __xchg_u64(volatile void *p, unsigned lo return prev; } + +static __inline__ unsigned long +__xchg_u64_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 \n" + PPC405_ERR77(0,%2) +" stdcx. %3,0,%2 \n\ + bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned long *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} #endif /* @@ -277,12 +317,33 @@ #endif return x; } +static __inline__ unsigned long +__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) +{ + switch (size) { + case 4: + return __xchg_u32_local(ptr, x); +#ifdef CONFIG_PPC64 + case 8: + return __xchg_u64_local(ptr, x); +#endif + } + __xchg_called_with_bad_pointer(); + return x; +} #define xchg(ptr,x) \ ({ \ __typeof__(*(ptr)) _x_ = (x); \ (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ }) +#define xchg_local(ptr,x) \ + ({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_local((ptr), \ + (unsigned long)_x_, sizeof(*(ptr))); \ + }) + #define tas(ptr) (xchg((ptr),1)) /* @@ -314,6 +375,28 @@ __cmpxchg_u32(volatile unsigned int *p, return prev; } +static __inline__ unsigned long +__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, + unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%2 # __cmpxchg_u32\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n" + PPC405_ERR77(0,%2) +" stwcx. %4,0,%2\n\ + bne- 1b" + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __inline__ unsigned long __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) @@ -336,6 +419,27 @@ __cmpxchg_u64(volatile unsigned long *p, return prev; } + +static __inline__ unsigned long +__cmpxchg_u64_local(volatile unsigned long *p, unsigned long old, + unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%2 # __cmpxchg_u64\n\ + cmpd 0,%0,%3\n\ + bne- 2f\n\ + stdcx. %4,0,%2\n\ + bne- 1b" + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} #endif /* This function doesn't exist, so you'll get a linker error @@ -358,6 +462,22 @@ #endif return old; } +static __inline__ unsigned long +__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, + unsigned int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32_local(ptr, old, new); +#ifdef CONFIG_PPC64 + case 8: + return __cmpxchg_u64_local(ptr, old, new); +#endif + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + #define cmpxchg(ptr,o,n) \ ({ \ __typeof__(*(ptr)) _o_ = (o); \ @@ -366,6 +486,15 @@ #define cmpxchg(ptr,o,n) \ (unsigned long)_n_, sizeof(*(ptr))); \ }) + +#define cmpxchg_local(ptr,o,n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ + }) + #ifdef CONFIG_PPC64 /* * We handle most unaligned accesses in hardware. On the other hand --- a/include/asm-arm/atomic.h +++ b/include/asm-arm/atomic.h @@ -185,6 +185,7 @@ static inline int atomic_add_unless(atom c = old; return c != u; } + #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) #define atomic_add(i, v) (void) atomic_add_return(i, v) --- a/include/asm-mips/atomic.h +++ b/include/asm-mips/atomic.h @@ -292,8 +292,9 @@ static __inline__ int atomic_sub_if_posi return result; } -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_cmpxchg(v, o, n) \ + (((__typeof__((v)->counter)))cmpxchg(&((v)->counter), (o), (n))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) /** * atomic_add_unless - add unless the number is a given value @@ -306,7 +307,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ c = old; \ @@ -646,6 +647,29 @@ static __inline__ long atomic64_sub_if_p return result; } +#define atomic64_cmpxchg(v, o, n) \ + (((__typeof__((v)->counter)))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new))) + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = atomic_read(v); \ + while (c != (u) && (old = atomic64_cmpxchg((v), c, c + (a))) != c) \ + c = old; \ + c != (u); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + #define atomic64_dec_return(v) atomic64_sub_return(1,(v)) #define atomic64_inc_return(v) atomic64_add_return(1,(v)) --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -66,6 +66,79 @@ static inline void atomic_long_sub(long atomic64_sub(i, v); } +static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_sub_and_test(i, v); +} + +static inline int atomic_long_dec_and_test(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_dec_and_test(v); +} + +static inline int atomic_long_inc_and_test(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_inc_and_test(v); +} + +static inline int atomic_long_add_negative(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_add_negative(i, v); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_add_return(i, v); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_sub_return(i, v); +} + +static inline long atomic_long_inc_return(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_inc_return(v); +} + +static inline long atomic_long_dec_return(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return atomic64_dec_return(v); +} + +#if 0 +/* Atomic add unless is only effective on atomic_t on powerpc (at least) */ +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_add_unless(v, a, u); +} + +static inline long atomic_long_inc_not_zero(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_inc_not_zero(v); +} +#endif //0 + #else typedef atomic_t atomic_long_t; @@ -113,5 +186,80 @@ static inline void atomic_long_sub(long atomic_sub(i, v); } +static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_sub_and_test(i, v); +} + +static inline int atomic_long_dec_and_test(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_dec_and_test(v); +} + +static inline int atomic_long_inc_and_test(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_inc_and_test(v); +} + +static inline int atomic_long_add_negative(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_add_negative(i, v); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_add_return(i, v); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_sub_return(i, v); +} + +static inline long atomic_long_inc_return(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_inc_return(v); +} + +static inline long atomic_long_dec_return(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_dec_return(v); +} + +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_add_unless(v, a, u); +} + +static inline long atomic_long_inc_not_zero(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_inc_not_zero(v); +} + #endif + +#define atomic_long_cmpxchg(l, old, new) \ + ((__typeof__((l)->counter))cmpxchg(&((l)->counter), (old), (new))) +#define atomic_long_xchg(l, new) (xchg(&((l)->counter), (new))) + #endif OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/2] atomic.h atomic64_t standardization 2006-12-01 3:24 ` Mathieu Desnoyers @ 2006-12-01 22:19 ` Mathieu Desnoyers 2006-12-02 0:43 ` Nick Piggin 2006-12-05 17:08 ` [PATCH 1/2] atomic.h atomic64_t standardization for 2.6.19 Mathieu Desnoyers 0 siblings, 2 replies; 13+ messages in thread From: Mathieu Desnoyers @ 2006-12-01 22:19 UTC (permalink / raw) To: Christoph Hellwig, linux-kernel Cc: Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Thomas Gleixner, Tom Zanussi, Karim Yaghmour, Paul Mundt, Jes Sorensen, Richard J Moore, Martin J. Bligh, Michel Dagenais, Douglas Niehaus, ltt-dev, systemtap Hi, I finalized the work for atomic64_t cmpxchg and atomic64_add_unless on all architectures. asm-generic/atomic.h atomic_long_t is also streamlined. Review is welcome. Mathieu ---BEGIN--- --- a/include/asm-alpha/atomic.h +++ b/include/asm-alpha/atomic.h @@ -175,19 +175,64 @@ static __inline__ long atomic64_sub_retu return result; } -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +/** + * atomic_add_unless - add unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ - while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ c = old; \ + } \ c != (u); \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = atomic64_read(v); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic64_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) --- a/include/asm-alpha/system.h +++ b/include/asm-alpha/system.h @@ -443,6 +443,111 @@ #define xchg(ptr,x) \ (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ }) +static inline unsigned long +__xchg_u8_local(volatile char *m, unsigned long val) +{ + unsigned long ret, tmp, addr64; + + __asm__ __volatile__( + " andnot %4,7,%3\n" + " insbl %1,%4,%1\n" + "1: ldq_l %2,0(%3)\n" + " extbl %2,%4,%0\n" + " mskbl %2,%4,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%3)\n" + " beq %2,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) + : "r" ((long)m), "1" (val) : "memory"); + + return ret; +} + +static inline unsigned long +__xchg_u16_local(volatile short *m, unsigned long val) +{ + unsigned long ret, tmp, addr64; + + __asm__ __volatile__( + " andnot %4,7,%3\n" + " inswl %1,%4,%1\n" + "1: ldq_l %2,0(%3)\n" + " extwl %2,%4,%0\n" + " mskwl %2,%4,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%3)\n" + " beq %2,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) + : "r" ((long)m), "1" (val) : "memory"); + + return ret; +} + +static inline unsigned long +__xchg_u32_local(volatile int *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + "1: ldl_l %0,%4\n" + " bis $31,%3,%1\n" + " stl_c %1,%2\n" + " beq %1,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (val), "=&r" (dummy), "=m" (*m) + : "rI" (val), "m" (*m) : "memory"); + + return val; +} + +static inline unsigned long +__xchg_u64_local(volatile long *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + "1: ldq_l %0,%4\n" + " bis $31,%3,%1\n" + " stq_c %1,%2\n" + " beq %1,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (val), "=&r" (dummy), "=m" (*m) + : "rI" (val), "m" (*m) : "memory"); + + return val; +} + +#define __xchg_local(ptr, x, size) \ +({ \ + unsigned long __xchg__res; \ + volatile void *__xchg__ptr = (ptr); \ + switch (size) { \ + case 1: __xchg__res = __xchg_u8_local(__xchg__ptr, x); break; \ + case 2: __xchg__res = __xchg_u16_local(__xchg__ptr, x); break; \ + case 4: __xchg__res = __xchg_u32_local(__xchg__ptr, x); break; \ + case 8: __xchg__res = __xchg_u64_local(__xchg__ptr, x); break; \ + default: __xchg_called_with_bad_pointer(); __xchg__res = x; \ + } \ + __xchg__res; \ +}) + +#define xchg_local(ptr,x) \ + ({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_local((ptr), (unsigned long)_x_, \ + sizeof(*(ptr))); \ + }) + #define tas(ptr) (xchg((ptr),1)) @@ -596,6 +701,128 @@ #define cmpxchg(ptr,o,n) \ (unsigned long)_n_, sizeof(*(ptr))); \ }) +static inline unsigned long +__cmpxchg_u8_local(volatile char *m, long old, long new) +{ + unsigned long prev, tmp, cmp, addr64; + + __asm__ __volatile__( + " andnot %5,7,%4\n" + " insbl %1,%5,%1\n" + "1: ldq_l %2,0(%4)\n" + " extbl %2,%5,%0\n" + " cmpeq %0,%6,%3\n" + " beq %3,2f\n" + " mskbl %2,%5,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%4)\n" + " beq %2,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) + : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + + return prev; +} + +static inline unsigned long +__cmpxchg_u16_local(volatile short *m, long old, long new) +{ + unsigned long prev, tmp, cmp, addr64; + + __asm__ __volatile__( + " andnot %5,7,%4\n" + " inswl %1,%5,%1\n" + "1: ldq_l %2,0(%4)\n" + " extwl %2,%5,%0\n" + " cmpeq %0,%6,%3\n" + " beq %3,2f\n" + " mskwl %2,%5,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%4)\n" + " beq %2,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) + : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + + return prev; +} + +static inline unsigned long +__cmpxchg_u32_local(volatile int *m, int old, int new) +{ + unsigned long prev, cmp; + + __asm__ __volatile__( + "1: ldl_l %0,%5\n" + " cmpeq %0,%3,%1\n" + " beq %1,2f\n" + " mov %4,%1\n" + " stl_c %1,%2\n" + " beq %1,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r"(prev), "=&r"(cmp), "=m"(*m) + : "r"((long) old), "r"(new), "m"(*m) : "memory"); + + return prev; +} + +static inline unsigned long +__cmpxchg_u64_local(volatile long *m, unsigned long old, unsigned long new) +{ + unsigned long prev, cmp; + + __asm__ __volatile__( + "1: ldq_l %0,%5\n" + " cmpeq %0,%3,%1\n" + " beq %1,2f\n" + " mov %4,%1\n" + " stq_c %1,%2\n" + " beq %1,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r"(prev), "=&r"(cmp), "=m"(*m) + : "r"((long) old), "r"(new), "m"(*m) : "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, + int size) +{ + switch (size) { + case 1: + return __cmpxchg_u8_local(ptr, old, new); + case 2: + return __cmpxchg_u16_local(ptr, old, new); + case 4: + return __cmpxchg_u32_local(ptr, old, new); + case 8: + return __cmpxchg_u64_local(ptr, old, new); + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg_local(ptr,o,n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ + }) + #endif /* __ASSEMBLY__ */ #define arch_align_stack(x) (x) --- a/include/asm-arm/atomic.h +++ b/include/asm-arm/atomic.h @@ -185,6 +185,7 @@ static inline int atomic_add_unless(atom c = old; return c != u; } + #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) #define atomic_add(i, v) (void) atomic_add_return(i, v) --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -66,6 +66,76 @@ static inline void atomic_long_sub(long atomic64_sub(i, v); } +static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_sub_and_test(i, v); +} + +static inline int atomic_long_dec_and_test(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_dec_and_test(v); +} + +static inline int atomic_long_inc_and_test(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_inc_and_test(v); +} + +static inline int atomic_long_add_negative(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_add_negative(i, v); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_add_return(i, v); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_sub_return(i, v); +} + +static inline long atomic_long_inc_return(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_inc_return(v); +} + +static inline long atomic_long_dec_return(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_dec_return(v); +} + +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_add_unless(v, a, u); +} + +static inline long atomic_long_inc_not_zero(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_inc_not_zero(v); +} + #else typedef atomic_t atomic_long_t; @@ -113,5 +183,80 @@ static inline void atomic_long_sub(long atomic_sub(i, v); } +static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_sub_and_test(i, v); +} + +static inline int atomic_long_dec_and_test(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_dec_and_test(v); +} + +static inline int atomic_long_inc_and_test(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_inc_and_test(v); +} + +static inline int atomic_long_add_negative(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_add_negative(i, v); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_add_return(i, v); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_sub_return(i, v); +} + +static inline long atomic_long_inc_return(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_inc_return(v); +} + +static inline long atomic_long_dec_return(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_dec_return(v); +} + +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_add_unless(v, a, u); +} + +static inline long atomic_long_inc_not_zero(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_inc_not_zero(v); +} + #endif + +#define atomic_long_cmpxchg(l, old, new) \ + ((long)cmpxchg(&((l)->counter), (old), (new))) +#define atomic_long_xchg(l, new) (xchg(&((l)->counter), (new))) + #endif --- a/include/asm-i386/atomic.h +++ b/include/asm-i386/atomic.h @@ -207,8 +207,9 @@ static __inline__ int atomic_sub_return( return atomic_add_return(-i,v); } -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (old), (new))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) /** * atomic_add_unless - add unless the number is a given value @@ -221,7 +222,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -267,6 +267,9 @@ #define __HAVE_ARCH_CMPXCHG 1 #define cmpxchg(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ (unsigned long)(n),sizeof(*(ptr)))) +#define cmpxchg_local(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) #endif static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, @@ -296,6 +299,33 @@ static inline unsigned long __cmpxchg(vo return old; } +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + #ifndef CONFIG_X86_CMPXCHG /* * Building a kernel capable running on 80386. It may be necessary to @@ -332,6 +362,17 @@ ({ \ (unsigned long)(n), sizeof(*(ptr))); \ __ret; \ }) +#define cmpxchg_local(ptr,o,n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 3)) \ + __ret = __cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + else \ + __ret = cmpxchg_386((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ +}) #endif #ifdef CONFIG_X86_CMPXCHG64 @@ -350,10 +391,26 @@ static inline unsigned long long __cmpxc return prev; } +static inline unsigned long long __cmpxchg64_local(volatile void *ptr, + unsigned long long old, unsigned long long new) +{ + unsigned long long prev; + __asm__ __volatile__("cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), + "m"(*__xg(ptr)), + "0"(old) + : "memory"); + return prev; +} + #define cmpxchg64(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\ (unsigned long long)(n))) - +#define cmpxchg64_local(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg64_local((ptr),(unsigned long long)(o),\ + (unsigned long long)(n))) #endif /* --- a/include/asm-ia64/atomic.h +++ b/include/asm-ia64/atomic.h @@ -88,12 +88,17 @@ ia64_atomic64_sub (__s64 i, atomic64_t * return new; } -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic64_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__(v->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ @@ -107,6 +112,22 @@ ({ \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__(v->counter) c, old; \ + c = atomic64_read(v); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic64_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + #define atomic_add_return(i,v) \ ({ \ int __ia64_aar_i = (i); \ --- a/include/asm-mips/atomic.h +++ b/include/asm-mips/atomic.h @@ -292,8 +292,9 @@ static __inline__ int atomic_sub_if_posi return result; } -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_cmpxchg(v, o, n) \ + (((__typeof__((v)->counter)))cmpxchg(&((v)->counter), (o), (n))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) /** * atomic_add_unless - add unless the number is a given value @@ -306,7 +307,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ c = old; \ @@ -646,6 +647,29 @@ static __inline__ long atomic64_sub_if_p return result; } +#define atomic64_cmpxchg(v, o, n) \ + (((__typeof__((v)->counter)))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new))) + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = atomic_read(v); \ + while (c != (u) && (old = atomic64_cmpxchg((v), c, c + (a))) != c) \ + c = old; \ + c != (u); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + #define atomic64_dec_return(v) atomic64_sub_return(1,(v)) #define atomic64_inc_return(v) atomic64_add_return(1,(v)) --- a/include/asm-parisc/atomic.h +++ b/include/asm-parisc/atomic.h @@ -163,7 +163,8 @@ static __inline__ int atomic_read(const } /* exported interface */ -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) +#define atomic_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) /** @@ -177,7 +178,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ c = old; \ @@ -270,6 +271,31 @@ #define atomic64_inc_and_test(v) (atomi #define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0) #define atomic64_sub_and_test(i,v) (atomic64_sub_return((i),(v)) == 0) +/* exported interface */ +#define atomic64_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = atomic64_read(v); \ + while (c != (u) && (old = atomic64_cmpxchg((v), c, c + (a))) != c) \ + c = old; \ + c != (u); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + + #endif /* __LP64__ */ #include <asm-generic/atomic.h> --- a/include/asm-powerpc/atomic.h +++ b/include/asm-powerpc/atomic.h @@ -165,7 +165,8 @@ static __inline__ int atomic_dec_return( return t; } -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) +#define atomic_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) /** @@ -411,6 +412,44 @@ static __inline__ long atomic64_dec_if_p return t; } +#define atomic64_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long t; + + __asm__ __volatile__ ( + LWSYNC_ON_SMP +"1: ldarx %0,0,%1 # atomic_add_unless\n\ + cmpd 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stdcx. %0,0,%1 \n\ + bne- 1b \n" + ISYNC_ON_SMP +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&v->counter), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + #endif /* __powerpc64__ */ #include <asm-generic/atomic.h> --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -235,6 +235,29 @@ __xchg_u32(volatile void *p, unsigned lo return prev; } +/* + * Atomic exchange + * + * Changes the memory location '*ptr' to be val and returns + * the previous value stored there. + */ +static __inline__ unsigned long +__xchg_u32_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 \n" + PPC405_ERR77(0,%2) +" stwcx. %3,0,%2 \n\ + bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned int *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __inline__ unsigned long __xchg_u64(volatile void *p, unsigned long val) @@ -254,6 +277,23 @@ __xchg_u64(volatile void *p, unsigned lo return prev; } + +static __inline__ unsigned long +__xchg_u64_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 \n" + PPC405_ERR77(0,%2) +" stdcx. %3,0,%2 \n\ + bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned long *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} #endif /* @@ -277,12 +317,33 @@ #endif return x; } +static __inline__ unsigned long +__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) +{ + switch (size) { + case 4: + return __xchg_u32_local(ptr, x); +#ifdef CONFIG_PPC64 + case 8: + return __xchg_u64_local(ptr, x); +#endif + } + __xchg_called_with_bad_pointer(); + return x; +} #define xchg(ptr,x) \ ({ \ __typeof__(*(ptr)) _x_ = (x); \ (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ }) +#define xchg_local(ptr,x) \ + ({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_local((ptr), \ + (unsigned long)_x_, sizeof(*(ptr))); \ + }) + #define tas(ptr) (xchg((ptr),1)) /* @@ -314,6 +375,28 @@ __cmpxchg_u32(volatile unsigned int *p, return prev; } +static __inline__ unsigned long +__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, + unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%2 # __cmpxchg_u32\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n" + PPC405_ERR77(0,%2) +" stwcx. %4,0,%2\n\ + bne- 1b" + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __inline__ unsigned long __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) @@ -336,6 +419,27 @@ __cmpxchg_u64(volatile unsigned long *p, return prev; } + +static __inline__ unsigned long +__cmpxchg_u64_local(volatile unsigned long *p, unsigned long old, + unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%2 # __cmpxchg_u64\n\ + cmpd 0,%0,%3\n\ + bne- 2f\n\ + stdcx. %4,0,%2\n\ + bne- 1b" + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} #endif /* This function doesn't exist, so you'll get a linker error @@ -358,6 +462,22 @@ #endif return old; } +static __inline__ unsigned long +__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, + unsigned int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32_local(ptr, old, new); +#ifdef CONFIG_PPC64 + case 8: + return __cmpxchg_u64_local(ptr, old, new); +#endif + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + #define cmpxchg(ptr,o,n) \ ({ \ __typeof__(*(ptr)) _o_ = (o); \ @@ -366,6 +486,15 @@ #define cmpxchg(ptr,o,n) \ (unsigned long)_n_, sizeof(*(ptr))); \ }) + +#define cmpxchg_local(ptr,o,n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ + }) + #ifdef CONFIG_PPC64 /* * We handle most unaligned accesses in hardware. On the other hand --- a/include/asm-sparc64/atomic.h +++ b/include/asm-sparc64/atomic.h @@ -70,12 +70,13 @@ #define atomic64_dec(v) atomic64_sub(1, #define atomic_add_negative(i, v) (atomic_add_ret(i, v) < 0) #define atomic64_add_negative(i, v) (atomic64_add_ret(i, v) < 0) -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) +#define atomic_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ @@ -89,6 +90,26 @@ ({ \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +#define atomic64_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = atomic64_read(v); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic64_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + likely(c != (u)); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + /* Atomic operations are already serializing */ #ifdef CONFIG_SMP #define smp_mb__before_atomic_dec() membar_storeload_loadload(); --- a/include/asm-x86_64/atomic.h +++ b/include/asm-x86_64/atomic.h @@ -388,7 +388,12 @@ static __inline__ long atomic64_sub_retu #define atomic64_inc_return(v) (atomic64_add_return(1,v)) #define atomic64_dec_return(v) (atomic64_sub_return(1,v)) -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) +#define atomic64_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) /** @@ -402,7 +407,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ @@ -416,6 +421,31 @@ ({ \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = atomic64_read(v); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic64_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + /* These are x86-specific, used by some header files */ #define atomic_clear_mask(mask, addr) \ __asm__ __volatile__(LOCK_PREFIX "andl %0,%1" \ --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -208,9 +208,45 @@ static inline unsigned long __cmpxchg(vo return old; } +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("cmpxchgl %k1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 8: + __asm__ __volatile__("cmpxchgq %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + #define cmpxchg(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ (unsigned long)(n),sizeof(*(ptr)))) +#define cmpxchg_local(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) #ifdef CONFIG_SMP #define smp_mb() mb() ---END--- OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/2] atomic.h atomic64_t standardization 2006-12-01 22:19 ` Mathieu Desnoyers @ 2006-12-02 0:43 ` Nick Piggin 2006-12-05 17:08 ` [PATCH 1/2] atomic.h atomic64_t standardization for 2.6.19 Mathieu Desnoyers 1 sibling, 0 replies; 13+ messages in thread From: Nick Piggin @ 2006-12-02 0:43 UTC (permalink / raw) To: Mathieu Desnoyers Cc: Christoph Hellwig, linux-kernel, Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Thomas Gleixner, Tom Zanussi, Karim Yaghmour, Paul Mundt, Jes Sorensen, Richard J Moore, Martin J. Bligh, Michel Dagenais, Douglas Niehaus, ltt-dev, systemtap Mathieu Desnoyers wrote: > Hi, > > I finalized the work for atomic64_t cmpxchg and atomic64_add_unless on all > architectures. asm-generic/atomic.h atomic_long_t is also streamlined. > > Review is welcome. Beautiful! Now I can do the rwsem consolidation. Thanks. Nick -- SUSE Labs, Novell Inc. Send instant messages to your online friends http://au.messenger.yahoo.com ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/2] atomic.h atomic64_t standardization for 2.6.19 2006-12-01 22:19 ` Mathieu Desnoyers 2006-12-02 0:43 ` Nick Piggin @ 2006-12-05 17:08 ` Mathieu Desnoyers 1 sibling, 0 replies; 13+ messages in thread From: Mathieu Desnoyers @ 2006-12-05 17:08 UTC (permalink / raw) To: linux-kernel, Andrew Morton Cc: Christoph Hellwig, Nick Piggin, Ingo Molnar, Greg Kroah-Hartman, Martin J. Bligh, Michel Dagenais, ltt-dev, systemtap Hi, Here is the 2.6.19-friendly diff of the atomic.h atomic64_t standardization. As it seems to be useful to at least one locking primitive (rwsem) in addition of LTTng (which is not in the mainline though), I think it should be considered for inclusion. Regards, Mathieu ---BEGIN--- --- a/include/asm-alpha/atomic.h +++ b/include/asm-alpha/atomic.h @@ -175,19 +175,64 @@ static __inline__ long atomic64_sub_retu return result; } -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +/** + * atomic_add_unless - add unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ - while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ c = old; \ + } \ c != (u); \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = atomic64_read(v); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic64_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) --- a/include/asm-alpha/system.h +++ b/include/asm-alpha/system.h @@ -443,6 +443,111 @@ #define xchg(ptr,x) \ (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ }) +static inline unsigned long +__xchg_u8_local(volatile char *m, unsigned long val) +{ + unsigned long ret, tmp, addr64; + + __asm__ __volatile__( + " andnot %4,7,%3\n" + " insbl %1,%4,%1\n" + "1: ldq_l %2,0(%3)\n" + " extbl %2,%4,%0\n" + " mskbl %2,%4,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%3)\n" + " beq %2,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) + : "r" ((long)m), "1" (val) : "memory"); + + return ret; +} + +static inline unsigned long +__xchg_u16_local(volatile short *m, unsigned long val) +{ + unsigned long ret, tmp, addr64; + + __asm__ __volatile__( + " andnot %4,7,%3\n" + " inswl %1,%4,%1\n" + "1: ldq_l %2,0(%3)\n" + " extwl %2,%4,%0\n" + " mskwl %2,%4,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%3)\n" + " beq %2,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) + : "r" ((long)m), "1" (val) : "memory"); + + return ret; +} + +static inline unsigned long +__xchg_u32_local(volatile int *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + "1: ldl_l %0,%4\n" + " bis $31,%3,%1\n" + " stl_c %1,%2\n" + " beq %1,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (val), "=&r" (dummy), "=m" (*m) + : "rI" (val), "m" (*m) : "memory"); + + return val; +} + +static inline unsigned long +__xchg_u64_local(volatile long *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + "1: ldq_l %0,%4\n" + " bis $31,%3,%1\n" + " stq_c %1,%2\n" + " beq %1,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (val), "=&r" (dummy), "=m" (*m) + : "rI" (val), "m" (*m) : "memory"); + + return val; +} + +#define __xchg_local(ptr, x, size) \ +({ \ + unsigned long __xchg__res; \ + volatile void *__xchg__ptr = (ptr); \ + switch (size) { \ + case 1: __xchg__res = __xchg_u8_local(__xchg__ptr, x); break; \ + case 2: __xchg__res = __xchg_u16_local(__xchg__ptr, x); break; \ + case 4: __xchg__res = __xchg_u32_local(__xchg__ptr, x); break; \ + case 8: __xchg__res = __xchg_u64_local(__xchg__ptr, x); break; \ + default: __xchg_called_with_bad_pointer(); __xchg__res = x; \ + } \ + __xchg__res; \ +}) + +#define xchg_local(ptr,x) \ + ({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_local((ptr), (unsigned long)_x_, \ + sizeof(*(ptr))); \ + }) + #define tas(ptr) (xchg((ptr),1)) @@ -596,6 +701,128 @@ #define cmpxchg(ptr,o,n) \ (unsigned long)_n_, sizeof(*(ptr))); \ }) +static inline unsigned long +__cmpxchg_u8_local(volatile char *m, long old, long new) +{ + unsigned long prev, tmp, cmp, addr64; + + __asm__ __volatile__( + " andnot %5,7,%4\n" + " insbl %1,%5,%1\n" + "1: ldq_l %2,0(%4)\n" + " extbl %2,%5,%0\n" + " cmpeq %0,%6,%3\n" + " beq %3,2f\n" + " mskbl %2,%5,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%4)\n" + " beq %2,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) + : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + + return prev; +} + +static inline unsigned long +__cmpxchg_u16_local(volatile short *m, long old, long new) +{ + unsigned long prev, tmp, cmp, addr64; + + __asm__ __volatile__( + " andnot %5,7,%4\n" + " inswl %1,%5,%1\n" + "1: ldq_l %2,0(%4)\n" + " extwl %2,%5,%0\n" + " cmpeq %0,%6,%3\n" + " beq %3,2f\n" + " mskwl %2,%5,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%4)\n" + " beq %2,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) + : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + + return prev; +} + +static inline unsigned long +__cmpxchg_u32_local(volatile int *m, int old, int new) +{ + unsigned long prev, cmp; + + __asm__ __volatile__( + "1: ldl_l %0,%5\n" + " cmpeq %0,%3,%1\n" + " beq %1,2f\n" + " mov %4,%1\n" + " stl_c %1,%2\n" + " beq %1,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r"(prev), "=&r"(cmp), "=m"(*m) + : "r"((long) old), "r"(new), "m"(*m) : "memory"); + + return prev; +} + +static inline unsigned long +__cmpxchg_u64_local(volatile long *m, unsigned long old, unsigned long new) +{ + unsigned long prev, cmp; + + __asm__ __volatile__( + "1: ldq_l %0,%5\n" + " cmpeq %0,%3,%1\n" + " beq %1,2f\n" + " mov %4,%1\n" + " stq_c %1,%2\n" + " beq %1,3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r"(prev), "=&r"(cmp), "=m"(*m) + : "r"((long) old), "r"(new), "m"(*m) : "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, + int size) +{ + switch (size) { + case 1: + return __cmpxchg_u8_local(ptr, old, new); + case 2: + return __cmpxchg_u16_local(ptr, old, new); + case 4: + return __cmpxchg_u32_local(ptr, old, new); + case 8: + return __cmpxchg_u64_local(ptr, old, new); + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg_local(ptr,o,n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ + }) + #endif /* __ASSEMBLY__ */ #define arch_align_stack(x) (x) --- a/include/asm-arm/atomic.h +++ b/include/asm-arm/atomic.h @@ -185,6 +185,7 @@ static inline int atomic_add_unless(atom c = old; return c != u; } + #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) #define atomic_add(i, v) (void) atomic_add_return(i, v) --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -66,6 +66,76 @@ static inline void atomic_long_sub(long atomic64_sub(i, v); } +static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_sub_and_test(i, v); +} + +static inline int atomic_long_dec_and_test(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_dec_and_test(v); +} + +static inline int atomic_long_inc_and_test(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_inc_and_test(v); +} + +static inline int atomic_long_add_negative(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_add_negative(i, v); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_add_return(i, v); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_sub_return(i, v); +} + +static inline long atomic_long_inc_return(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_inc_return(v); +} + +static inline long atomic_long_dec_return(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_dec_return(v); +} + +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_add_unless(v, a, u); +} + +static inline long atomic_long_inc_not_zero(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_inc_not_zero(v); +} + #else typedef atomic_t atomic_long_t; @@ -113,5 +183,80 @@ static inline void atomic_long_sub(long atomic_sub(i, v); } +static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_sub_and_test(i, v); +} + +static inline int atomic_long_dec_and_test(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_dec_and_test(v); +} + +static inline int atomic_long_inc_and_test(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_inc_and_test(v); +} + +static inline int atomic_long_add_negative(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return atomic_add_negative(i, v); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_add_return(i, v); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_sub_return(i, v); +} + +static inline long atomic_long_inc_return(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_inc_return(v); +} + +static inline long atomic_long_dec_return(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_dec_return(v); +} + +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_add_unless(v, a, u); +} + +static inline long atomic_long_inc_not_zero(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_inc_not_zero(v); +} + #endif + +#define atomic_long_cmpxchg(l, old, new) \ + ((long)cmpxchg(&((l)->counter), (old), (new))) +#define atomic_long_xchg(l, new) (xchg(&((l)->counter), (new))) + #endif --- a/include/asm-i386/atomic.h +++ b/include/asm-i386/atomic.h @@ -207,8 +207,9 @@ static __inline__ int atomic_sub_return( return atomic_add_return(-i,v); } -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (old), (new))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) /** * atomic_add_unless - add unless the number is a given value @@ -221,7 +222,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -270,6 +270,9 @@ #define cmpxchg(ptr,o,n)\ #define sync_cmpxchg(ptr,o,n)\ ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\ (unsigned long)(n),sizeof(*(ptr)))) +#define cmpxchg_local(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) #endif static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, @@ -332,6 +335,33 @@ static inline unsigned long __sync_cmpxc return old; } +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + #ifndef CONFIG_X86_CMPXCHG /* * Building a kernel capable running on 80386. It may be necessary to @@ -368,6 +398,17 @@ ({ \ (unsigned long)(n), sizeof(*(ptr))); \ __ret; \ }) +#define cmpxchg_local(ptr,o,n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + if (likely(boot_cpu_data.x86 > 3)) \ + __ret = __cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + else \ + __ret = cmpxchg_386((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ +}) #endif #ifdef CONFIG_X86_CMPXCHG64 @@ -386,10 +427,26 @@ static inline unsigned long long __cmpxc return prev; } +static inline unsigned long long __cmpxchg64_local(volatile void *ptr, + unsigned long long old, unsigned long long new) +{ + unsigned long long prev; + __asm__ __volatile__("cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), + "m"(*__xg(ptr)), + "0"(old) + : "memory"); + return prev; +} + #define cmpxchg64(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\ (unsigned long long)(n))) - +#define cmpxchg64_local(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg64_local((ptr),(unsigned long long)(o),\ + (unsigned long long)(n))) #endif /* --- a/include/asm-ia64/atomic.h +++ b/include/asm-ia64/atomic.h @@ -88,12 +88,17 @@ ia64_atomic64_sub (__s64 i, atomic64_t * return new; } -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic64_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__(v->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ @@ -107,6 +112,22 @@ ({ \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__(v->counter) c, old; \ + c = atomic64_read(v); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic64_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + #define atomic_add_return(i,v) \ ({ \ int __ia64_aar_i = (i); \ --- a/include/asm-mips/atomic.h +++ b/include/asm-mips/atomic.h @@ -292,8 +292,9 @@ static __inline__ int atomic_sub_if_posi return result; } -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_cmpxchg(v, o, n) \ + (((__typeof__((v)->counter)))cmpxchg(&((v)->counter), (o), (n))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) /** * atomic_add_unless - add unless the number is a given value @@ -306,7 +307,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ c = old; \ @@ -646,6 +647,29 @@ static __inline__ long atomic64_sub_if_p return result; } +#define atomic64_cmpxchg(v, o, n) \ + (((__typeof__((v)->counter)))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new))) + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = atomic_read(v); \ + while (c != (u) && (old = atomic64_cmpxchg((v), c, c + (a))) != c) \ + c = old; \ + c != (u); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + #define atomic64_dec_return(v) atomic64_sub_return(1,(v)) #define atomic64_inc_return(v) atomic64_add_return(1,(v)) --- a/include/asm-parisc/atomic.h +++ b/include/asm-parisc/atomic.h @@ -163,7 +163,8 @@ static __inline__ int atomic_read(const } /* exported interface */ -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) +#define atomic_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) /** @@ -177,7 +178,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \ c = old; \ @@ -270,6 +271,31 @@ #define atomic64_inc_and_test(v) (atomi #define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0) #define atomic64_sub_and_test(i,v) (atomic64_sub_return((i),(v)) == 0) +/* exported interface */ +#define atomic64_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = atomic64_read(v); \ + while (c != (u) && (old = atomic64_cmpxchg((v), c, c + (a))) != c) \ + c = old; \ + c != (u); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + + #endif /* __LP64__ */ #include <asm-generic/atomic.h> --- a/include/asm-powerpc/atomic.h +++ b/include/asm-powerpc/atomic.h @@ -165,7 +165,8 @@ static __inline__ int atomic_dec_return( return t; } -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) +#define atomic_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) /** @@ -411,6 +412,44 @@ static __inline__ long atomic64_dec_if_p return t; } +#define atomic64_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long t; + + __asm__ __volatile__ ( + LWSYNC_ON_SMP +"1: ldarx %0,0,%1 # atomic_add_unless\n\ + cmpd 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stdcx. %0,0,%1 \n\ + bne- 1b \n" + ISYNC_ON_SMP +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&v->counter), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + #endif /* __powerpc64__ */ #include <asm-generic/atomic.h> --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -226,6 +226,29 @@ __xchg_u32(volatile void *p, unsigned lo return prev; } +/* + * Atomic exchange + * + * Changes the memory location '*ptr' to be val and returns + * the previous value stored there. + */ +static __inline__ unsigned long +__xchg_u32_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 \n" + PPC405_ERR77(0,%2) +" stwcx. %3,0,%2 \n\ + bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned int *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __inline__ unsigned long __xchg_u64(volatile void *p, unsigned long val) @@ -245,6 +268,23 @@ __xchg_u64(volatile void *p, unsigned lo return prev; } + +static __inline__ unsigned long +__xchg_u64_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 \n" + PPC405_ERR77(0,%2) +" stdcx. %3,0,%2 \n\ + bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned long *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} #endif /* @@ -268,12 +308,33 @@ #endif return x; } +static __inline__ unsigned long +__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) +{ + switch (size) { + case 4: + return __xchg_u32_local(ptr, x); +#ifdef CONFIG_PPC64 + case 8: + return __xchg_u64_local(ptr, x); +#endif + } + __xchg_called_with_bad_pointer(); + return x; +} #define xchg(ptr,x) \ ({ \ __typeof__(*(ptr)) _x_ = (x); \ (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ }) +#define xchg_local(ptr,x) \ + ({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_local((ptr), \ + (unsigned long)_x_, sizeof(*(ptr))); \ + }) + #define tas(ptr) (xchg((ptr),1)) /* @@ -305,6 +366,28 @@ __cmpxchg_u32(volatile unsigned int *p, return prev; } +static __inline__ unsigned long +__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, + unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%2 # __cmpxchg_u32\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n" + PPC405_ERR77(0,%2) +" stwcx. %4,0,%2\n\ + bne- 1b" + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __inline__ unsigned long __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) @@ -327,6 +410,27 @@ __cmpxchg_u64(volatile unsigned long *p, return prev; } + +static __inline__ unsigned long +__cmpxchg_u64_local(volatile unsigned long *p, unsigned long old, + unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%2 # __cmpxchg_u64\n\ + cmpd 0,%0,%3\n\ + bne- 2f\n\ + stdcx. %4,0,%2\n\ + bne- 1b" + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} #endif /* This function doesn't exist, so you'll get a linker error @@ -349,6 +453,22 @@ #endif return old; } +static __inline__ unsigned long +__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, + unsigned int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32_local(ptr, old, new); +#ifdef CONFIG_PPC64 + case 8: + return __cmpxchg_u64_local(ptr, old, new); +#endif + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + #define cmpxchg(ptr,o,n) \ ({ \ __typeof__(*(ptr)) _o_ = (o); \ @@ -357,6 +477,15 @@ #define cmpxchg(ptr,o,n) \ (unsigned long)_n_, sizeof(*(ptr))); \ }) + +#define cmpxchg_local(ptr,o,n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ + }) + #ifdef CONFIG_PPC64 /* * We handle most unaligned accesses in hardware. On the other hand --- a/include/asm-sparc64/atomic.h +++ b/include/asm-sparc64/atomic.h @@ -70,12 +70,13 @@ #define atomic64_dec(v) atomic64_sub(1, #define atomic_add_negative(i, v) (atomic_add_ret(i, v) < 0) #define atomic64_add_negative(i, v) (atomic64_add_ret(i, v) < 0) -#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) +#define atomic_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ @@ -89,6 +90,26 @@ ({ \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +#define atomic64_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = atomic64_read(v); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic64_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + likely(c != (u)); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + /* Atomic operations are already serializing */ #ifdef CONFIG_SMP #define smp_mb__before_atomic_dec() membar_storeload_loadload(); --- a/include/asm-x86_64/atomic.h +++ b/include/asm-x86_64/atomic.h @@ -388,7 +388,12 @@ static __inline__ long atomic64_sub_retu #define atomic64_inc_return(v) (atomic64_add_return(1,v)) #define atomic64_dec_return(v) (atomic64_sub_return(1,v)) -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) +#define atomic64_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), old, new)) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) /** @@ -402,7 +407,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ @@ -416,6 +421,31 @@ ({ \ }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define atomic64_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = atomic64_read(v); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = atomic64_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + /* These are x86-specific, used by some header files */ #define atomic_clear_mask(mask, addr) \ __asm__ __volatile__(LOCK_PREFIX "andl %0,%1" \ --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -209,9 +209,45 @@ static inline unsigned long __cmpxchg(vo return old; } +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("cmpxchgl %k1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 8: + __asm__ __volatile__("cmpxchgq %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + #define cmpxchg(ptr,o,n)\ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ (unsigned long)(n),sizeof(*(ptr)))) +#define cmpxchg_local(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) #ifdef CONFIG_SMP #define smp_mb() mb() ---END--- OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/2] atomic.h atomic64_t standardization 2006-12-01 3:11 ` [PATCH 1/2] atomic.h atomic64_t standardization Mathieu Desnoyers 2006-12-01 3:24 ` Mathieu Desnoyers @ 2006-12-01 3:34 ` Paul Mundt 2006-12-01 3:41 ` Mathieu Desnoyers 1 sibling, 1 reply; 13+ messages in thread From: Paul Mundt @ 2006-12-01 3:34 UTC (permalink / raw) To: Mathieu Desnoyers Cc: Christoph Hellwig, linux-kernel, Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Thomas Gleixner, Tom Zanussi, Karim Yaghmour, Jes Sorensen, Richard J Moore, Martin J. Bligh, Michel Dagenais, Douglas Niehaus, ltt-dev, systemtap On Thu, Nov 30, 2006 at 10:11:53PM -0500, Mathieu Desnoyers wrote: > --- a/include/asm-generic/atomic.h > +++ b/include/asm-generic/atomic.h [snip] > +#if 0 > +/* Atomic add unless is only effective on atomic_t on powerpc (at least) */ > +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) > +{ > + atomic_t *v = (atomic_t *)l; > + > + return atomic_add_unless(v, a, u); > +} > + > +static inline long atomic_long_inc_not_zero(atomic_long_t *l) > +{ > + atomic_t *v = (atomic_t *)l; > + > + return atomic_inc_not_zero(v); > +} > +#endif //0 > + Why is this in the patch? ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/2] atomic.h atomic64_t standardization 2006-12-01 3:34 ` [PATCH 1/2] atomic.h atomic64_t standardization Paul Mundt @ 2006-12-01 3:41 ` Mathieu Desnoyers 0 siblings, 0 replies; 13+ messages in thread From: Mathieu Desnoyers @ 2006-12-01 3:41 UTC (permalink / raw) To: Paul Mundt, Christoph Hellwig, linux-kernel, Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Thomas Gleixner, Tom Zanussi, Karim Yaghmour, Jes Sorensen, Richard J Moore, Martin J. Bligh, Michel Dagenais, Douglas Niehaus, ltt-dev, systemtap * Paul Mundt (lethal@linux-sh.org) wrote: > On Thu, Nov 30, 2006 at 10:11:53PM -0500, Mathieu Desnoyers wrote: > > --- a/include/asm-generic/atomic.h > > +++ b/include/asm-generic/atomic.h > [snip] > > +#if 0 > > +/* Atomic add unless is only effective on atomic_t on powerpc (at least) */ > > +static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) > > +{ > > + atomic_t *v = (atomic_t *)l; > > + > > + return atomic_add_unless(v, a, u); > > +} > > + > > +static inline long atomic_long_inc_not_zero(atomic_long_t *l) > > +{ > > + atomic_t *v = (atomic_t *)l; > > + > > + return atomic_inc_not_zero(v); > > +} > > +#endif //0 > > + > > Why is this in the patch? > Oops, I forgot to remove these comments after I fixed it in the powerpc code. Code for all other architectures will have to modified too : I just modified i386, x86_64, mips, arm and powerpc. Thanks for reporting. Mathieu Here is the fix : --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -122,22 +122,19 @@ static inline long atomic_long_dec_retur return atomic64_dec_return(v); } -#if 0 -/* Atomic add unless is only effective on atomic_t on powerpc (at least) */ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) { - atomic_t *v = (atomic_t *)l; + atomic64_t *v = (atomic64_t *)l; - return atomic_add_unless(v, a, u); + return atomic64_add_unless(v, a, u); } static inline long atomic_long_inc_not_zero(atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + atomic64_t *v = (atomic64_t *)l; - return atomic_inc_not_zero(v); + return atomic64_inc_not_zero(v); } -#endif //0 #else OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 2/2] local.h modifications 2006-11-27 16:56 ` Christoph Hellwig 2006-12-01 3:11 ` [PATCH 1/2] atomic.h atomic64_t standardization Mathieu Desnoyers @ 2006-12-01 3:14 ` Mathieu Desnoyers 2006-12-01 3:24 ` Mathieu Desnoyers 1 sibling, 1 reply; 13+ messages in thread From: Mathieu Desnoyers @ 2006-12-01 3:14 UTC (permalink / raw) To: Christoph Hellwig, linux-kernel, Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Thomas Gleixner, Tom Zanussi, Karim Yaghmour, Paul Mundt, Jes Sorensen, Richard J Moore, Martin J. Bligh, Michel Dagenais, Douglas Niehaus, ltt-dev, systemtap * Christoph Hellwig (hch@infradead.org) wrote: > We already have local_t in asm/local.h for this purposed. Unfortunately > several architecture implementations are rather suboptimal, but I'm sure > the architecture maintainers would be interested in patches to optimize > the various implementations. > Hi Christoph, Here are the local.h modifications for i386, x86_64, powerpc, mips and arm (and asm-generic). It adds support for various per-cpu atomic operations. It applies on 2.6.18. Mathieu ---BEGIN-- --- a/include/asm-i386/atomic.h +++ b/include/asm-i386/atomic.h @@ -207,8 +207,9 @@ static __inline__ int atomic_sub_return( return atomic_add_return(-i,v); } -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (old), (new))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) /** * atomic_add_unless - add unless the number is a given value @@ -221,7 +222,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ --- a/include/asm-x86_64/local.h +++ b/include/asm-x86_64/local.h @@ -45,6 +45,139 @@ static inline void local_sub(long i, loc :"ir" (i), "m" (v->counter)); } +/** + * local_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer to type local_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static __inline__ int local_sub_and_test(long i, local_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "subq %2,%0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * local_dec_and_test - decrement and test + * @v: pointer to type local_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static __inline__ int local_dec_and_test(local_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "decq %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * local_inc_and_test - increment and test + * @v: pointer to type local_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static __inline__ int local_inc_and_test(local_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "incq %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * local_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer to type local_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static __inline__ int local_add_negative(long i, local_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "addq %2,%0; sets %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * local_add_return - add and return + * @i: integer value to add + * @v: pointer to type local_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static __inline__ long local_add_return(long i, local_t *v) +{ + long __i = i; + __asm__ __volatile__( + "xaddq %0, %1;" + :"=r"(i) + :"m"(v->counter), "0"(i)); + return i + __i; +} + +static __inline__ long local_sub_return(long i, local_t *v) +{ + return local_add_return(-i,v); +} + +#define local_inc_return(v) (local_add_return(1,v)) +#define local_dec_return(v) (local_sub_return(1,v)) + +#define local_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg_local(&((v)->counter), (old), (new))) +/* Always has a lock prefix anyway */ +#define local_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * atomic_up_add_unless - add unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define local_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = local_read(v); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(v) local_add_unless((v), 1, 0) + /* On x86-64 these are better than the atomic variants on SMP kernels because they dont use a lock prefix. */ #define __local_inc(l) local_inc(l) @@ -85,4 +218,4 @@ #define __cpu_local_dec(v) cpu_local_dec #define __cpu_local_add(i, v) cpu_local_add((i), (v)) #define __cpu_local_sub(i, v) cpu_local_sub((i), (v)) -#endif /* _ARCH_I386_LOCAL_H */ +#endif /* _ARCH_X8664_LOCAL_H */ --- a/include/asm-powerpc/local.h +++ b/include/asm-powerpc/local.h @@ -1 +1,340 @@ -#include <asm-generic/local.h> +#ifndef _ARCH_POWERPC_LOCAL_H +#define _ARCH_POWERPC_LOCAL_H + +#include <linux/percpu.h> +#include <asm/atomic.h> + +typedef struct +{ + volatile long counter; +} local_t; + +#define LOCAL_INIT(i) { (i) } + +#define local_read(v) ((v)->counter) +#define local_set(v,i) (((v)->counter) = (i)) + +#define local_add(i,l) atomic_long_add((i),(&(l)->a)) +#define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) +#define local_inc(l) atomic_long_inc(&(l)->a) +#define local_dec(l) atomic_long_dec(&(l)->a) + +#ifndef __powerpc64__ + +static __inline__ int local_add_return(int a, local_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # local_add_return\n\ + add %0,%1,%0\n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define local_add_negative(a, v) (local_add_return((a), (v)) < 0) + +static __inline__ int local_sub_return(int a, local_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # local_sub_return\n\ + subf %0,%1,%0\n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +static __inline__ int local_inc_return(local_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # local_inc_return\n\ + addic %0,%0,1\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1 \n\ + bne- 1b" + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +/* + * local_inc_and_test - increment and test + * @v: pointer of type local_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define local_inc_and_test(v) (local_inc_return(v) == 0) + +static __inline__ int local_dec_return(local_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # local_dec_return\n\ + addic %0,%0,-1\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1\n\ + bne- 1b" + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define local_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define local_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * local_add_unless - add unless the number is a given value + * @v: pointer of type local_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static __inline__ int local_add_unless(local_t *v, int a, int u) +{ + int t; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%1 # local_add_unless\n\ + cmpw 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%1 \n\ + bne- 1b \n" +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&v->counter), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define local_inc_not_zero(v) local_add_unless((v), 1, 0) + +#define local_sub_and_test(a, v) (local_sub_return((a), (v)) == 0) +#define local_dec_and_test(v) (local_dec_return((v)) == 0) + +/* + * Atomically test *v and decrement if it is greater than 0. + * The function returns the old value of *v minus 1. + */ +static __inline__ int local_dec_if_positive(local_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # local_dec_if_positive\n\ + addic. %0,%0,-1\n\ + blt- 2f\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1\n\ + bne- 1b" + "\n\ +2:" : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#else /* __powerpc64__ */ + +static __inline__ long local_add_return(long a, local_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 # local_add_return\n\ + add %0,%1,%0\n\ + stdcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define local_add_negative(a, v) (local_add_return((a), (v)) < 0) + +static __inline__ long local_sub_return(long a, local_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 # local_sub_return\n\ + subf %0,%1,%0\n\ + stdcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +static __inline__ long local_inc_return(local_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # local_inc_return\n\ + addic %0,%0,1\n\ + stdcx. %0,0,%1 \n\ + bne- 1b" + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +/* + * local_inc_and_test - increment and test + * @v: pointer of type local_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define local_inc_and_test(v) (local_inc_return(v) == 0) + +static __inline__ long local_dec_return(local_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # local_dec_return\n\ + addic %0,%0,-1\n\ + stdcx. %0,0,%1\n\ + bne- 1b" + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define local_sub_and_test(a, v) (local_sub_return((a), (v)) == 0) +#define local_dec_and_test(v) (local_dec_return((v)) == 0) + +/* + * Atomically test *v and decrement if it is greater than 0. + * The function returns the old value of *v minus 1. + */ +static __inline__ long local_dec_if_positive(local_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # local_dec_if_positive\n\ + addic. %0,%0,-1\n\ + blt- 2f\n\ + stdcx. %0,0,%1\n\ + bne- 1b" + "\n\ +2:" : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define local_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define local_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * local_add_unless - add unless the number is a given value + * @v: pointer of type local_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static __inline__ int local_add_unless(atomic_t *v, long a, long u) +{ + long t; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%1 # atomic_add_unless\n\ + cmpd 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stdcx. %0,0,%1 \n\ + bne- 1b \n" +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&v->counter), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define local_inc_not_zero(v) local_add_unless((v), 1, 0) + +#endif /* !__powerpc64__ */ + +/* Use these for per-cpu local_t variables: on some archs they are + * much more efficient than these naive implementations. Note they take + * a variable, not an address. + * + * This could be done better if we moved the per cpu data directly + * after GS. + */ + +/* Need to disable preemption for the cpu local counters otherwise we could + still access a variable of a previous CPU in a non atomic way. */ +#define cpu_local_wrap_v(v) \ + ({ local_t res__; \ + preempt_disable(); \ + res__ = (v); \ + preempt_enable(); \ + res__; }) +#define cpu_local_wrap(v) \ + ({ preempt_disable(); \ + v; \ + preempt_enable(); }) \ + +#define cpu_local_read(v) cpu_local_wrap_v(local_read(&__get_cpu_var(v))) +#define cpu_local_set(v, i) cpu_local_wrap(local_set(&__get_cpu_var(v), (i))) +#define cpu_local_inc(v) cpu_local_wrap(local_inc(&__get_cpu_var(v))) +#define cpu_local_dec(v) cpu_local_wrap(local_dec(&__get_cpu_var(v))) +#define cpu_local_add(i, v) cpu_local_wrap(local_add((i), &__get_cpu_var(v))) +#define cpu_local_sub(i, v) cpu_local_wrap(local_sub((i), &__get_cpu_var(v))) + +#define __cpu_local_inc(v) cpu_local_inc(v) +#define __cpu_local_dec(v) cpu_local_dec(v) +#define __cpu_local_add(i, v) cpu_local_add((i), (v)) +#define __cpu_local_sub(i, v) cpu_local_sub((i), (v)) + +#endif /* _ARCH_POWERPC_LOCAL_H */ --- a/include/asm-mips/local.h +++ b/include/asm-mips/local.h @@ -1,60 +1 @@ -#ifndef _ASM_LOCAL_H -#define _ASM_LOCAL_H - -#include <linux/percpu.h> -#include <asm/atomic.h> - -#ifdef CONFIG_32BIT - -typedef atomic_t local_t; - -#define LOCAL_INIT(i) ATOMIC_INIT(i) -#define local_read(v) atomic_read(v) -#define local_set(v,i) atomic_set(v,i) - -#define local_inc(v) atomic_inc(v) -#define local_dec(v) atomic_dec(v) -#define local_add(i, v) atomic_add(i, v) -#define local_sub(i, v) atomic_sub(i, v) - -#endif - -#ifdef CONFIG_64BIT - -typedef atomic64_t local_t; - -#define LOCAL_INIT(i) ATOMIC64_INIT(i) -#define local_read(v) atomic64_read(v) -#define local_set(v,i) atomic64_set(v,i) - -#define local_inc(v) atomic64_inc(v) -#define local_dec(v) atomic64_dec(v) -#define local_add(i, v) atomic64_add(i, v) -#define local_sub(i, v) atomic64_sub(i, v) - -#endif - -#define __local_inc(v) ((v)->counter++) -#define __local_dec(v) ((v)->counter--) -#define __local_add(i,v) ((v)->counter+=(i)) -#define __local_sub(i,v) ((v)->counter-=(i)) - -/* - * Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) - -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) - -#endif /* _ASM_LOCAL_H */ +#include <asm-generic/local.h> --- a/include/asm-generic/local.h +++ b/include/asm-generic/local.h @@ -33,6 +33,19 @@ #define local_dec(l) atomic_long_dec(&(l #define local_add(i,l) atomic_long_add((i),(&(l)->a)) #define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) +#define local_sub_and_test(i, l) atomic_long_sub_and_test((i), (&(l)->a)) +#define local_dec_and_test(l) atomic_long_dec_and_test(&(l)->a) +#define local_inc_and_test(l) atomic_long_inc_and_test(&(l)->a) +#define local_add_negative(i, l) atomic_long_add_negative((i), (&(l)->a)) +#define local_add_return(i, l) atomic_long_add_return((i), (&(l)->a)) +#define local_sub_return(i, l) atomic_long_sub_return((i), (&(l)->a)) +#define local_inc_return(l) atomic_long_inc_return(&(l)->a) + +#define local_cmpxchg(l, old, new) atomic_long_cmpxchg((&(l)->a), (old), (new)) +#define local_xchg(l, new) atomic_long_xchg((&(l)->a), (new)) +#define local_add_unless(l, a, u) atomic_long_add_unless((&(l)->a), (a), (u)) +#define local_inc_not_zero(l) atomic_long_inc_not_zero(&(l)->a) + /* Non-atomic variants, ie. preemption disabled and won't be touched * in interrupt, etc. Some archs can optimize this case well. */ #define __local_inc(l) local_set((l), local_read(l) + 1) ---END--- OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] local.h modifications 2006-12-01 3:14 ` [PATCH 2/2] local.h modifications Mathieu Desnoyers @ 2006-12-01 3:24 ` Mathieu Desnoyers 2006-12-01 22:21 ` Mathieu Desnoyers 0 siblings, 1 reply; 13+ messages in thread From: Mathieu Desnoyers @ 2006-12-01 3:24 UTC (permalink / raw) To: Christoph Hellwig, linux-kernel, Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Thomas Gleixner, Tom Zanussi, Karim Yaghmour, Paul Mundt, Jes Sorensen, Richard J Moore, Martin J. Bligh, Michel Dagenais, Douglas Niehaus, ltt-dev, systemtap New version, fixes PowerPC typo (cut'n'pasted from the atomic.h typo). Mathieu --- a/include/asm-i386/atomic.h +++ b/include/asm-i386/atomic.h @@ -207,8 +207,9 @@ static __inline__ int atomic_sub_return( return atomic_add_return(-i,v); } -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (old), (new))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) /** * atomic_add_unless - add unless the number is a given value @@ -221,7 +222,7 @@ #define atomic_xchg(v, new) (xchg(&((v)- */ #define atomic_add_unless(v, a, u) \ ({ \ - int c, old; \ + __typeof__((v)->counter) c, old; \ c = atomic_read(v); \ for (;;) { \ if (unlikely(c == (u))) \ --- a/include/asm-x86_64/local.h +++ b/include/asm-x86_64/local.h @@ -45,6 +45,139 @@ static inline void local_sub(long i, loc :"ir" (i), "m" (v->counter)); } +/** + * local_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer to type local_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static __inline__ int local_sub_and_test(long i, local_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "subq %2,%0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * local_dec_and_test - decrement and test + * @v: pointer to type local_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static __inline__ int local_dec_and_test(local_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "decq %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * local_inc_and_test - increment and test + * @v: pointer to type local_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static __inline__ int local_inc_and_test(local_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "incq %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * local_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer to type local_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static __inline__ int local_add_negative(long i, local_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + "addq %2,%0; sets %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * local_add_return - add and return + * @i: integer value to add + * @v: pointer to type local_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static __inline__ long local_add_return(long i, local_t *v) +{ + long __i = i; + __asm__ __volatile__( + "xaddq %0, %1;" + :"=r"(i) + :"m"(v->counter), "0"(i)); + return i + __i; +} + +static __inline__ long local_sub_return(long i, local_t *v) +{ + return local_add_return(-i,v); +} + +#define local_inc_return(v) (local_add_return(1,v)) +#define local_dec_return(v) (local_sub_return(1,v)) + +#define local_cmpxchg(v, old, new) \ + ((__typeof__((v)->counter))cmpxchg_local(&((v)->counter), (old), (new))) +/* Always has a lock prefix anyway */ +#define local_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * atomic_up_add_unless - add unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +#define local_add_unless(v, a, u) \ +({ \ + __typeof__((v)->counter) c, old; \ + c = local_read(v); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((v), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(v) local_add_unless((v), 1, 0) + /* On x86-64 these are better than the atomic variants on SMP kernels because they dont use a lock prefix. */ #define __local_inc(l) local_inc(l) @@ -85,4 +218,4 @@ #define __cpu_local_dec(v) cpu_local_dec #define __cpu_local_add(i, v) cpu_local_add((i), (v)) #define __cpu_local_sub(i, v) cpu_local_sub((i), (v)) -#endif /* _ARCH_I386_LOCAL_H */ +#endif /* _ARCH_X8664_LOCAL_H */ --- a/include/asm-powerpc/local.h +++ b/include/asm-powerpc/local.h @@ -1 +1,340 @@ -#include <asm-generic/local.h> +#ifndef _ARCH_POWERPC_LOCAL_H +#define _ARCH_POWERPC_LOCAL_H + +#include <linux/percpu.h> +#include <asm/atomic.h> + +typedef struct +{ + volatile long counter; +} local_t; + +#define LOCAL_INIT(i) { (i) } + +#define local_read(v) ((v)->counter) +#define local_set(v,i) (((v)->counter) = (i)) + +#define local_add(i,l) atomic_long_add((i),(&(l)->a)) +#define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) +#define local_inc(l) atomic_long_inc(&(l)->a) +#define local_dec(l) atomic_long_dec(&(l)->a) + +#ifndef __powerpc64__ + +static __inline__ int local_add_return(int a, local_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # local_add_return\n\ + add %0,%1,%0\n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define local_add_negative(a, v) (local_add_return((a), (v)) < 0) + +static __inline__ int local_sub_return(int a, local_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # local_sub_return\n\ + subf %0,%1,%0\n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +static __inline__ int local_inc_return(local_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # local_inc_return\n\ + addic %0,%0,1\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1 \n\ + bne- 1b" + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +/* + * local_inc_and_test - increment and test + * @v: pointer of type local_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define local_inc_and_test(v) (local_inc_return(v) == 0) + +static __inline__ int local_dec_return(local_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # local_dec_return\n\ + addic %0,%0,-1\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1\n\ + bne- 1b" + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define local_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define local_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * local_add_unless - add unless the number is a given value + * @v: pointer of type local_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static __inline__ int local_add_unless(local_t *v, int a, int u) +{ + int t; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%1 # local_add_unless\n\ + cmpw 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%1 \n\ + bne- 1b \n" +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&v->counter), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define local_inc_not_zero(v) local_add_unless((v), 1, 0) + +#define local_sub_and_test(a, v) (local_sub_return((a), (v)) == 0) +#define local_dec_and_test(v) (local_dec_return((v)) == 0) + +/* + * Atomically test *v and decrement if it is greater than 0. + * The function returns the old value of *v minus 1. + */ +static __inline__ int local_dec_if_positive(local_t *v) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # local_dec_if_positive\n\ + addic. %0,%0,-1\n\ + blt- 2f\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1\n\ + bne- 1b" + "\n\ +2:" : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#else /* __powerpc64__ */ + +static __inline__ long local_add_return(long a, local_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 # local_add_return\n\ + add %0,%1,%0\n\ + stdcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define local_add_negative(a, v) (local_add_return((a), (v)) < 0) + +static __inline__ long local_sub_return(long a, local_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 # local_sub_return\n\ + subf %0,%1,%0\n\ + stdcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +static __inline__ long local_inc_return(local_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # local_inc_return\n\ + addic %0,%0,1\n\ + stdcx. %0,0,%1 \n\ + bne- 1b" + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +/* + * local_inc_and_test - increment and test + * @v: pointer of type local_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define local_inc_and_test(v) (local_inc_return(v) == 0) + +static __inline__ long local_dec_return(local_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # local_dec_return\n\ + addic %0,%0,-1\n\ + stdcx. %0,0,%1\n\ + bne- 1b" + : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define local_sub_and_test(a, v) (local_sub_return((a), (v)) == 0) +#define local_dec_and_test(v) (local_dec_return((v)) == 0) + +/* + * Atomically test *v and decrement if it is greater than 0. + * The function returns the old value of *v minus 1. + */ +static __inline__ long local_dec_if_positive(local_t *v) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # local_dec_if_positive\n\ + addic. %0,%0,-1\n\ + blt- 2f\n\ + stdcx. %0,0,%1\n\ + bne- 1b" + "\n\ +2:" : "=&r" (t) + : "r" (&v->counter) + : "cc", "memory"); + + return t; +} + +#define local_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define local_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * local_add_unless - add unless the number is a given value + * @v: pointer of type local_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static __inline__ int local_add_unless(local_t *v, long a, long u) +{ + long t; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%1 # atomic_add_unless\n\ + cmpd 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stdcx. %0,0,%1 \n\ + bne- 1b \n" +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&v->counter), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define local_inc_not_zero(v) local_add_unless((v), 1, 0) + +#endif /* !__powerpc64__ */ + +/* Use these for per-cpu local_t variables: on some archs they are + * much more efficient than these naive implementations. Note they take + * a variable, not an address. + * + * This could be done better if we moved the per cpu data directly + * after GS. + */ + +/* Need to disable preemption for the cpu local counters otherwise we could + still access a variable of a previous CPU in a non atomic way. */ +#define cpu_local_wrap_v(v) \ + ({ local_t res__; \ + preempt_disable(); \ + res__ = (v); \ + preempt_enable(); \ + res__; }) +#define cpu_local_wrap(v) \ + ({ preempt_disable(); \ + v; \ + preempt_enable(); }) \ + +#define cpu_local_read(v) cpu_local_wrap_v(local_read(&__get_cpu_var(v))) +#define cpu_local_set(v, i) cpu_local_wrap(local_set(&__get_cpu_var(v), (i))) +#define cpu_local_inc(v) cpu_local_wrap(local_inc(&__get_cpu_var(v))) +#define cpu_local_dec(v) cpu_local_wrap(local_dec(&__get_cpu_var(v))) +#define cpu_local_add(i, v) cpu_local_wrap(local_add((i), &__get_cpu_var(v))) +#define cpu_local_sub(i, v) cpu_local_wrap(local_sub((i), &__get_cpu_var(v))) + +#define __cpu_local_inc(v) cpu_local_inc(v) +#define __cpu_local_dec(v) cpu_local_dec(v) +#define __cpu_local_add(i, v) cpu_local_add((i), (v)) +#define __cpu_local_sub(i, v) cpu_local_sub((i), (v)) + +#endif /* _ARCH_POWERPC_LOCAL_H */ --- a/include/asm-mips/local.h +++ b/include/asm-mips/local.h @@ -1,60 +1 @@ -#ifndef _ASM_LOCAL_H -#define _ASM_LOCAL_H - -#include <linux/percpu.h> -#include <asm/atomic.h> - -#ifdef CONFIG_32BIT - -typedef atomic_t local_t; - -#define LOCAL_INIT(i) ATOMIC_INIT(i) -#define local_read(v) atomic_read(v) -#define local_set(v,i) atomic_set(v,i) - -#define local_inc(v) atomic_inc(v) -#define local_dec(v) atomic_dec(v) -#define local_add(i, v) atomic_add(i, v) -#define local_sub(i, v) atomic_sub(i, v) - -#endif - -#ifdef CONFIG_64BIT - -typedef atomic64_t local_t; - -#define LOCAL_INIT(i) ATOMIC64_INIT(i) -#define local_read(v) atomic64_read(v) -#define local_set(v,i) atomic64_set(v,i) - -#define local_inc(v) atomic64_inc(v) -#define local_dec(v) atomic64_dec(v) -#define local_add(i, v) atomic64_add(i, v) -#define local_sub(i, v) atomic64_sub(i, v) - -#endif - -#define __local_inc(v) ((v)->counter++) -#define __local_dec(v) ((v)->counter--) -#define __local_add(i,v) ((v)->counter+=(i)) -#define __local_sub(i,v) ((v)->counter-=(i)) - -/* - * Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) - -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) - -#endif /* _ASM_LOCAL_H */ +#include <asm-generic/local.h> --- a/include/asm-generic/local.h +++ b/include/asm-generic/local.h @@ -33,6 +33,19 @@ #define local_dec(l) atomic_long_dec(&(l #define local_add(i,l) atomic_long_add((i),(&(l)->a)) #define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) +#define local_sub_and_test(i, l) atomic_long_sub_and_test((i), (&(l)->a)) +#define local_dec_and_test(l) atomic_long_dec_and_test(&(l)->a) +#define local_inc_and_test(l) atomic_long_inc_and_test(&(l)->a) +#define local_add_negative(i, l) atomic_long_add_negative((i), (&(l)->a)) +#define local_add_return(i, l) atomic_long_add_return((i), (&(l)->a)) +#define local_sub_return(i, l) atomic_long_sub_return((i), (&(l)->a)) +#define local_inc_return(l) atomic_long_inc_return(&(l)->a) + +#define local_cmpxchg(l, old, new) atomic_long_cmpxchg((&(l)->a), (old), (new)) +#define local_xchg(l, new) atomic_long_xchg((&(l)->a), (new)) +#define local_add_unless(l, a, u) atomic_long_add_unless((&(l)->a), (a), (u)) +#define local_inc_not_zero(l) atomic_long_inc_not_zero(&(l)->a) + /* Non-atomic variants, ie. preemption disabled and won't be touched * in interrupt, etc. Some archs can optimize this case well. */ #define __local_inc(l) local_set((l), local_read(l) + 1) OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] local.h modifications 2006-12-01 3:24 ` Mathieu Desnoyers @ 2006-12-01 22:21 ` Mathieu Desnoyers 2006-12-05 17:14 ` [PATCH 2/2] local.h modifications for 2.6.19 Mathieu Desnoyers 0 siblings, 1 reply; 13+ messages in thread From: Mathieu Desnoyers @ 2006-12-01 22:21 UTC (permalink / raw) To: Christoph Hellwig, linux-kernel Cc: Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Thomas Gleixner, Tom Zanussi, Karim Yaghmour, Paul Mundt, Jes Sorensen, Richard J Moore, Martin J. Bligh, Michel Dagenais, Douglas Niehaus, ltt-dev, systemtap Hi, I also completed the support for all atomic operations in local.h for all architectures. The local_t type is now identical on each architectures : it contains an atomic_long_t field, just like the asm-generic implementation. Please review. Mathieu ---BEGIN--- --- a/include/asm-alpha/local.h +++ b/include/asm-alpha/local.h @@ -4,37 +4,115 @@ #define _ALPHA_LOCAL_H #include <linux/percpu.h> #include <asm/atomic.h> -typedef atomic64_t local_t; +typedef struct +{ + atomic_long_t a; +} local_t; -#define LOCAL_INIT(i) ATOMIC64_INIT(i) -#define local_read(v) atomic64_read(v) -#define local_set(v,i) atomic64_set(v,i) +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l,i) atomic_long_set(&(l)->a, (i)) +#define local_inc(l) atomic_long_inc(&(l)->a) +#define local_dec(l) atomic_long_dec(&(l)->a) +#define local_add(i,l) atomic_long_add((i),(&(l)->a)) +#define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) -#define local_inc(v) atomic64_inc(v) -#define local_dec(v) atomic64_dec(v) -#define local_add(i, v) atomic64_add(i, v) -#define local_sub(i, v) atomic64_sub(i, v) +static __inline__ long local_add_return(long i, local_t * l) +{ + long temp, result; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " addq %0,%3,%2\n" + " addq %0,%3,%0\n" + " stq_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (l->a.counter), "=&r" (result) + :"Ir" (i), "m" (l->a.counter) : "memory"); + return result; +} -#define __local_inc(v) ((v)->counter++) -#define __local_dec(v) ((v)->counter++) -#define __local_add(i,v) ((v)->counter+=(i)) -#define __local_sub(i,v) ((v)->counter-=(i)) +static __inline__ long local_sub_return(long i, local_t * v) +{ + long temp, result; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " subq %0,%3,%2\n" + " subq %0,%3,%0\n" + " stq_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (l->a.counter), "=&r" (result) + :"Ir" (i), "m" (l->a.counter) : "memory"); + return result; +} + +#define local_cmpxchg(l, old, new) \ + ((long)cmpxchg_local(&((l)->a.counter), old, new)) +#define local_xchg(l, new) (xchg_local(&((l)->a.counter), new)) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read(l); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((l), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) + +#define local_dec_return(l) local_sub_return(1,(l)) + +#define local_inc_return(l) local_add_return(1,(l)) + +#define local_sub_and_test(i,l) (local_sub_return((i), (l)) == 0) + +#define local_inc_and_test(l) (local_add_return(1, (l)) == 0) + +#define local_dec_and_test(l) (local_sub_return(1, (l)) == 0) + +/* Verify if faster than atomic ops */ +#define __local_inc(l) ((l)->a.counter++) +#define __local_dec(l) ((l)->a.counter++) +#define __local_add(i,l) ((l)->a.counter+=(i)) +#define __local_sub(i,l) ((l)->a.counter-=(i)) /* Use these for per-cpu local_t variables: on some archs they are * much more efficient than these naive implementations. Note they take * a variable, not an address. */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) - -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) +#define cpu_local_read(l) local_read(&__get_cpu_var(l)) +#define cpu_local_set(l, i) local_set(&__get_cpu_var(l), (i)) + +#define cpu_local_inc(l) local_inc(&__get_cpu_var(l)) +#define cpu_local_dec(l) local_dec(&__get_cpu_var(l)) +#define cpu_local_add(i, l) local_add((i), &__get_cpu_var(l)) +#define cpu_local_sub(i, l) local_sub((i), &__get_cpu_var(l)) + +#define __cpu_local_inc(l) __local_inc(&__get_cpu_var(l)) +#define __cpu_local_dec(l) __local_dec(&__get_cpu_var(l)) +#define __cpu_local_add(i, l) __local_add((i), &__get_cpu_var(l)) +#define __cpu_local_sub(i, l) __local_sub((i), &__get_cpu_var(l)) #endif /* _ALPHA_LOCAL_H */ --- a/include/asm-generic/local.h +++ b/include/asm-generic/local.h @@ -33,6 +33,19 @@ #define local_dec(l) atomic_long_dec(&(l #define local_add(i,l) atomic_long_add((i),(&(l)->a)) #define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) +#define local_sub_and_test(i, l) atomic_long_sub_and_test((i), (&(l)->a)) +#define local_dec_and_test(l) atomic_long_dec_and_test(&(l)->a) +#define local_inc_and_test(l) atomic_long_inc_and_test(&(l)->a) +#define local_add_negative(i, l) atomic_long_add_negative((i), (&(l)->a)) +#define local_add_return(i, l) atomic_long_add_return((i), (&(l)->a)) +#define local_sub_return(i, l) atomic_long_sub_return((i), (&(l)->a)) +#define local_inc_return(l) atomic_long_inc_return(&(l)->a) + +#define local_cmpxchg(l, old, new) atomic_long_cmpxchg((&(l)->a), (old), (new)) +#define local_xchg(l, new) atomic_long_xchg((&(l)->a), (new)) +#define local_add_unless(l, a, u) atomic_long_add_unless((&(l)->a), (a), (u)) +#define local_inc_not_zero(l) atomic_long_inc_not_zero(&(l)->a) + /* Non-atomic variants, ie. preemption disabled and won't be touched * in interrupt, etc. Some archs can optimize this case well. */ #define __local_inc(l) local_set((l), local_read(l) + 1) @@ -44,19 +57,19 @@ #define __local_sub(i,l) local_set((l), * much more efficient than these naive implementations. Note they take * a variable (eg. mystruct.foo), not an address. */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) +#define cpu_local_read(l) local_read(&__get_cpu_var(l)) +#define cpu_local_set(l, i) local_set(&__get_cpu_var(l), (i)) +#define cpu_local_inc(l) local_inc(&__get_cpu_var(l)) +#define cpu_local_dec(l) local_dec(&__get_cpu_var(l)) +#define cpu_local_add(i, l) local_add((i), &__get_cpu_var(l)) +#define cpu_local_sub(i, l) local_sub((i), &__get_cpu_var(l)) /* Non-atomic increments, ie. preemption disabled and won't be touched * in interrupt, etc. Some archs can optimize this case well. */ -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) +#define __cpu_local_inc(l) __local_inc(&__get_cpu_var(l)) +#define __cpu_local_dec(l) __local_dec(&__get_cpu_var(l)) +#define __cpu_local_add(i, l) __local_add((i), &__get_cpu_var(l)) +#define __cpu_local_sub(i, l) __local_sub((i), &__get_cpu_var(l)) #endif /* _ASM_GENERIC_LOCAL_H */ --- a/include/asm-i386/local.h +++ b/include/asm-i386/local.h @@ -2,47 +2,198 @@ #ifndef _ARCH_I386_LOCAL_H #define _ARCH_I386_LOCAL_H #include <linux/percpu.h> +#include <asm/system.h> +#include <asm/atomic.h> typedef struct { - volatile long counter; + atomic_long_t a; } local_t; -#define LOCAL_INIT(i) { (i) } +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } -#define local_read(v) ((v)->counter) -#define local_set(v,i) (((v)->counter) = (i)) +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l,i) atomic_long_set(&(l)->a, (i)) -static __inline__ void local_inc(local_t *v) +static __inline__ void local_inc(local_t *l) { __asm__ __volatile__( "incl %0" - :"+m" (v->counter)); + :"+m" (l->a.counter)); } -static __inline__ void local_dec(local_t *v) +static __inline__ void local_dec(local_t *l) { __asm__ __volatile__( "decl %0" - :"+m" (v->counter)); + :"+m" (l->a.counter)); } -static __inline__ void local_add(long i, local_t *v) +static __inline__ void local_add(long i, local_t *l) { __asm__ __volatile__( "addl %1,%0" - :"+m" (v->counter) + :"+m" (l->a.counter) :"ir" (i)); } -static __inline__ void local_sub(long i, local_t *v) +static __inline__ void local_sub(long i, local_t *l) { __asm__ __volatile__( "subl %1,%0" - :"+m" (v->counter) + :"+m" (l->a.counter) :"ir" (i)); } +/** + * local_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @l: pointer of type local_t + * + * Atomically subtracts @i from @l and returns + * true if the result is zero, or false for all + * other cases. + */ +static __inline__ int local_sub_and_test(long i, local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "subl %2,%0; sete %1" + :"+m" (l->a.counter), "=qm" (c) + :"ir" (i) : "memory"); + return c; +} + +/** + * local_dec_and_test - decrement and test + * @l: pointer of type local_t + * + * Atomically decrements @l by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static __inline__ int local_dec_and_test(local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "decl %0; sete %1" + :"+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * local_inc_and_test - increment and test + * @l: pointer of type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static __inline__ int local_inc_and_test(local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "incl %0; sete %1" + :"+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * local_add_negative - add and test if negative + * @l: pointer of type local_t + * @i: integer value to add + * + * Atomically adds @i to @l and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static __inline__ int local_add_negative(long i, local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "addl %2,%0; sets %1" + :"+m" (l->a.counter), "=qm" (c) + :"ir" (i) : "memory"); + return c; +} + +/** + * local_add_return - add and return + * @l: pointer of type local_t + * @i: integer value to add + * + * Atomically adds @i to @l and returns @i + @l + */ +static __inline__ long local_add_return(long i, local_t *l) +{ + long __i; +#ifdef CONFIG_M386 + unsigned long flags; + if(unlikely(boot_cpu_data.x86==3)) + goto no_xadd; +#endif + /* Modern 486+ processor */ + __i = i; + __asm__ __volatile__( + "xaddl %0, %1;" + :"=r"(i) + :"m"(l->a.counter), "0"(i)); + return i + __i; + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + local_irq_save(flags); + __i = local_read(l); + local_set(l, i + __i); + local_irq_restore(flags); + return i + __i; +#endif +} + +static __inline__ long local_sub_return(long i, local_t *l) +{ + return local_add_return(-i,l); +} + +#define local_inc_return(l) (local_add_return(1,l)) +#define local_dec_return(l) (local_sub_return(1,l)) + +#define local_cmpxchg(l, o, n) \ + ((long)cmpxchg_local(&((l)->a.counter), (o), (n))) +/* Always has a lock prefix anyway */ +#define local_xchg(l, new) (xchg(&((l)->a.counter), new)) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read(l); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((l), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + /* On x86, these are no better than the atomic variants. */ #define __local_inc(l) local_inc(l) #define __local_dec(l) local_dec(l) @@ -56,27 +207,27 @@ #define __local_sub(i,l) local_sub((i),( /* Need to disable preemption for the cpu local counters otherwise we could still access a variable of a previous CPU in a non atomic way. */ -#define cpu_local_wrap_v(v) \ +#define cpu_local_wrap_v(l) \ ({ local_t res__; \ preempt_disable(); \ - res__ = (v); \ + res__ = (l); \ preempt_enable(); \ res__; }) -#define cpu_local_wrap(v) \ +#define cpu_local_wrap(l) \ ({ preempt_disable(); \ - v; \ + l; \ preempt_enable(); }) \ -#define cpu_local_read(v) cpu_local_wrap_v(local_read(&__get_cpu_var(v))) -#define cpu_local_set(v, i) cpu_local_wrap(local_set(&__get_cpu_var(v), (i))) -#define cpu_local_inc(v) cpu_local_wrap(local_inc(&__get_cpu_var(v))) -#define cpu_local_dec(v) cpu_local_wrap(local_dec(&__get_cpu_var(v))) -#define cpu_local_add(i, v) cpu_local_wrap(local_add((i), &__get_cpu_var(v))) -#define cpu_local_sub(i, v) cpu_local_wrap(local_sub((i), &__get_cpu_var(v))) - -#define __cpu_local_inc(v) cpu_local_inc(v) -#define __cpu_local_dec(v) cpu_local_dec(v) -#define __cpu_local_add(i, v) cpu_local_add((i), (v)) -#define __cpu_local_sub(i, v) cpu_local_sub((i), (v)) +#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) +#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) +#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) +#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) +#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) +#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) + +#define __cpu_local_inc(l) cpu_local_inc(l) +#define __cpu_local_dec(l) cpu_local_dec(l) +#define __cpu_local_add(i, l) cpu_local_add((i), (l)) +#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) #endif /* _ARCH_I386_LOCAL_H */ --- a/include/asm-ia64/local.h +++ b/include/asm-ia64/local.h @@ -1,50 +1 @@ -#ifndef _ASM_IA64_LOCAL_H -#define _ASM_IA64_LOCAL_H - -/* - * Copyright (C) 2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - */ - -#include <linux/percpu.h> - -typedef struct { - atomic64_t val; -} local_t; - -#define LOCAL_INIT(i) ((local_t) { { (i) } }) -#define local_read(l) atomic64_read(&(l)->val) -#define local_set(l, i) atomic64_set(&(l)->val, i) -#define local_inc(l) atomic64_inc(&(l)->val) -#define local_dec(l) atomic64_dec(&(l)->val) -#define local_add(i, l) atomic64_add((i), &(l)->val) -#define local_sub(i, l) atomic64_sub((i), &(l)->val) - -/* Non-atomic variants, i.e., preemption disabled and won't be touched in interrupt, etc. */ - -#define __local_inc(l) (++(l)->val.counter) -#define __local_dec(l) (--(l)->val.counter) -#define __local_add(i,l) ((l)->val.counter += (i)) -#define __local_sub(i,l) ((l)->val.counter -= (i)) - -/* - * Use these for per-cpu local_t variables. Note they take a variable (eg. mystruct.foo), - * not an address. - */ -#define cpu_local_read(v) local_read(&__ia64_per_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__ia64_per_cpu_var(v), (i)) -#define cpu_local_inc(v) local_inc(&__ia64_per_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__ia64_per_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__ia64_per_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__ia64_per_cpu_var(v)) - -/* - * Non-atomic increments, i.e., preemption disabled and won't be touched in interrupt, - * etc. - */ -#define __cpu_local_inc(v) __local_inc(&__ia64_per_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__ia64_per_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__ia64_per_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__ia64_per_cpu_var(v)) - -#endif /* _ASM_IA64_LOCAL_H */ +#include <asm-generic/local.h> --- a/include/asm-mips/local.h +++ b/include/asm-mips/local.h @@ -1,60 +1 @@ -#ifndef _ASM_LOCAL_H -#define _ASM_LOCAL_H - -#include <linux/percpu.h> -#include <asm/atomic.h> - -#ifdef CONFIG_32BIT - -typedef atomic_t local_t; - -#define LOCAL_INIT(i) ATOMIC_INIT(i) -#define local_read(v) atomic_read(v) -#define local_set(v,i) atomic_set(v,i) - -#define local_inc(v) atomic_inc(v) -#define local_dec(v) atomic_dec(v) -#define local_add(i, v) atomic_add(i, v) -#define local_sub(i, v) atomic_sub(i, v) - -#endif - -#ifdef CONFIG_64BIT - -typedef atomic64_t local_t; - -#define LOCAL_INIT(i) ATOMIC64_INIT(i) -#define local_read(v) atomic64_read(v) -#define local_set(v,i) atomic64_set(v,i) - -#define local_inc(v) atomic64_inc(v) -#define local_dec(v) atomic64_dec(v) -#define local_add(i, v) atomic64_add(i, v) -#define local_sub(i, v) atomic64_sub(i, v) - -#endif - -#define __local_inc(v) ((v)->counter++) -#define __local_dec(v) ((v)->counter--) -#define __local_add(i,v) ((v)->counter+=(i)) -#define __local_sub(i,v) ((v)->counter-=(i)) - -/* - * Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) - -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) - -#endif /* _ASM_LOCAL_H */ +#include <asm-generic/local.h> --- a/include/asm-parisc/local.h +++ b/include/asm-parisc/local.h @@ -1,40 +1 @@ -#ifndef _ARCH_PARISC_LOCAL_H -#define _ARCH_PARISC_LOCAL_H - -#include <linux/percpu.h> -#include <asm/atomic.h> - -typedef atomic_long_t local_t; - -#define LOCAL_INIT(i) ATOMIC_LONG_INIT(i) -#define local_read(v) atomic_long_read(v) -#define local_set(v,i) atomic_long_set(v,i) - -#define local_inc(v) atomic_long_inc(v) -#define local_dec(v) atomic_long_dec(v) -#define local_add(i, v) atomic_long_add(i, v) -#define local_sub(i, v) atomic_long_sub(i, v) - -#define __local_inc(v) ((v)->counter++) -#define __local_dec(v) ((v)->counter--) -#define __local_add(i,v) ((v)->counter+=(i)) -#define __local_sub(i,v) ((v)->counter-=(i)) - -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) - -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) - -#endif /* _ARCH_PARISC_LOCAL_H */ +#include <asm-generic/local.h> --- a/include/asm-powerpc/local.h +++ b/include/asm-powerpc/local.h @@ -1 +1,345 @@ -#include <asm-generic/local.h> +#ifndef _ARCH_POWERPC_LOCAL_H +#define _ARCH_POWERPC_LOCAL_H + +#include <linux/percpu.h> +#include <asm/atomic.h> + +typedef struct +{ + atomic_long_t a; +} local_t; + +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } + +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l,i) atomic_long_set(&(l)->a, (i)) + +#define local_add(i,l) atomic_long_add((i),(&(l)->a)) +#define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) +#define local_inc(l) atomic_long_inc(&(l)->a) +#define local_dec(l) atomic_long_dec(&(l)->a) + +#ifndef __powerpc64__ + +static __inline__ int local_add_return(int a, local_t *l) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # local_add_return\n\ + add %0,%1,%0\n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) + +static __inline__ int local_sub_return(int a, local_t *l) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # local_sub_return\n\ + subf %0,%1,%0\n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +static __inline__ int local_inc_return(local_t *l) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # local_inc_return\n\ + addic %0,%0,1\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1 \n\ + bne- 1b" + : "=&r" (t) + : "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +/* + * local_inc_and_test - increment and test + * @l: pointer of type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define local_inc_and_test(l) (local_inc_return(l) == 0) + +static __inline__ int local_dec_return(local_t *l) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # local_dec_return\n\ + addic %0,%0,-1\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1\n\ + bne- 1b" + : "=&r" (t) + : "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +#define local_cmpxchg(l, o, n) \ + ((long)cmpxchg(&((l)->a.counter), (o), (n))) +#define local_xchg(l, new) (xchg(&((l)->a.counter), new)) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +static __inline__ int local_add_unless(local_t *l, int a, int u) +{ + int t; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%1 # local_add_unless\n\ + cmpw 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%1 \n\ + bne- 1b \n" +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&(l->a.counter)), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0) +#define local_dec_and_test(l) (local_dec_return((l)) == 0) + +/* + * Atomically test *l and decrement if it is greater than 0. + * The function returns the old value of *l minus 1. + */ +static __inline__ int local_dec_if_positive(local_t *l) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # local_dec_if_positive\n\ + addic. %0,%0,-1\n\ + blt- 2f\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1\n\ + bne- 1b" + "\n\ +2:" : "=&r" (t) + : "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +#else /* __powerpc64__ */ + +static __inline__ long local_add_return(long a, local_t *l) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 # local_add_return\n\ + add %0,%1,%0\n\ + stdcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) + +static __inline__ long local_sub_return(long a, local_t *l) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 # local_sub_return\n\ + subf %0,%1,%0\n\ + stdcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +static __inline__ long local_inc_return(local_t *l) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # local_inc_return\n\ + addic %0,%0,1\n\ + stdcx. %0,0,%1 \n\ + bne- 1b" + : "=&r" (t) + : "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +/* + * local_inc_and_test - increment and test + * @l: pointer of type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define local_inc_and_test(l) (local_inc_return(l) == 0) + +static __inline__ long local_dec_return(local_t *l) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # local_dec_return\n\ + addic %0,%0,-1\n\ + stdcx. %0,0,%1\n\ + bne- 1b" + : "=&r" (t) + : "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0) +#define local_dec_and_test(l) (local_dec_return((l)) == 0) + +/* + * Atomically test *l and decrement if it is greater than 0. + * The function returns the old value of *l minus 1. + */ +static __inline__ long local_dec_if_positive(local_t *l) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # local_dec_if_positive\n\ + addic. %0,%0,-1\n\ + blt- 2f\n\ + stdcx. %0,0,%1\n\ + bne- 1b" + "\n\ +2:" : "=&r" (t) + : "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +#define local_cmpxchg(l, o, n) \ + ((__typeof__((l)->a.counter))cmpxchg_local(&((l)->a.counter), (o), (n))) +#define local_xchg(l, new) (xchg_local(&((l)->a.counter), new)) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +static __inline__ int local_add_unless(local_t *l, long a, long u) +{ + long t; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%1 # local_add_unless\n\ + cmpd 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stdcx. %0,0,%1 \n\ + bne- 1b \n" +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&(l->a.counter)), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +#endif /* !__powerpc64__ */ + +/* Use these for per-cpu local_t variables: on some archs they are + * much more efficient than these naive implementations. Note they take + * a variable, not an address. + * + * This could be done better if we moved the per cpu data directly + * after GS. + */ + +#define __local_inc(l) ((l)->a.counter++) +#define __local_dec(l) ((l)->a.counter++) +#define __local_add(i,l) ((l)->a.counter+=(i)) +#define __local_sub(i,l) ((l)->a.counter-=(i)) + +/* Need to disable preemption for the cpu local counters otherwise we could + still access a variable of a previous CPU in a non atomic way. */ +#define cpu_local_wrap_v(l) \ + ({ local_t res__; \ + preempt_disable(); \ + res__ = (l); \ + preempt_enable(); \ + res__; }) +#define cpu_local_wrap(l) \ + ({ preempt_disable(); \ + l; \ + preempt_enable(); }) \ + +#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) +#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) +#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) +#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) +#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) +#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) + +#define __cpu_local_inc(l) cpu_local_inc(l) +#define __cpu_local_dec(l) cpu_local_dec(l) +#define __cpu_local_add(i, l) cpu_local_add((i), (l)) +#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) + +#endif /* _ARCH_POWERPC_LOCAL_H */ --- a/include/asm-s390/local.h +++ b/include/asm-s390/local.h @@ -1,58 +1 @@ -#ifndef _ASM_LOCAL_H -#define _ASM_LOCAL_H - -#include <linux/percpu.h> -#include <asm/atomic.h> - -#ifndef __s390x__ - -typedef atomic_t local_t; - -#define LOCAL_INIT(i) ATOMIC_INIT(i) -#define local_read(v) atomic_read(v) -#define local_set(v,i) atomic_set(v,i) - -#define local_inc(v) atomic_inc(v) -#define local_dec(v) atomic_dec(v) -#define local_add(i, v) atomic_add(i, v) -#define local_sub(i, v) atomic_sub(i, v) - -#else - -typedef atomic64_t local_t; - -#define LOCAL_INIT(i) ATOMIC64_INIT(i) -#define local_read(v) atomic64_read(v) -#define local_set(v,i) atomic64_set(v,i) - -#define local_inc(v) atomic64_inc(v) -#define local_dec(v) atomic64_dec(v) -#define local_add(i, v) atomic64_add(i, v) -#define local_sub(i, v) atomic64_sub(i, v) - -#endif - -#define __local_inc(v) ((v)->counter++) -#define __local_dec(v) ((v)->counter--) -#define __local_add(i,v) ((v)->counter+=(i)) -#define __local_sub(i,v) ((v)->counter-=(i)) - -/* - * Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) - -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) - -#endif /* _ASM_LOCAL_H */ +#include <asm-generic/local.h> --- a/include/asm-sparc64/local.h +++ b/include/asm-sparc64/local.h @@ -1,40 +1 @@ -#ifndef _ARCH_SPARC64_LOCAL_H -#define _ARCH_SPARC64_LOCAL_H - -#include <linux/percpu.h> -#include <asm/atomic.h> - -typedef atomic64_t local_t; - -#define LOCAL_INIT(i) ATOMIC64_INIT(i) -#define local_read(v) atomic64_read(v) -#define local_set(v,i) atomic64_set(v,i) - -#define local_inc(v) atomic64_inc(v) -#define local_dec(v) atomic64_dec(v) -#define local_add(i, v) atomic64_add(i, v) -#define local_sub(i, v) atomic64_sub(i, v) - -#define __local_inc(v) ((v)->counter++) -#define __local_dec(v) ((v)->counter--) -#define __local_add(i,v) ((v)->counter+=(i)) -#define __local_sub(i,v) ((v)->counter-=(i)) - -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) - -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) - -#endif /* _ARCH_SPARC64_LOCAL_H */ +#include <asm-generic/local.h> --- a/include/asm-x86_64/local.h +++ b/include/asm-x86_64/local.h @@ -2,49 +2,183 @@ #ifndef _ARCH_X8664_LOCAL_H #define _ARCH_X8664_LOCAL_H #include <linux/percpu.h> +#include <asm/atomic.h> typedef struct { - volatile long counter; + atomic_long_t a; } local_t; -#define LOCAL_INIT(i) { (i) } +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } -#define local_read(v) ((v)->counter) -#define local_set(v,i) (((v)->counter) = (i)) +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l,i) atomic_long_set(&(l)->a, (i)) -static inline void local_inc(local_t *v) +static inline void local_inc(local_t *l) { __asm__ __volatile__( "incq %0" - :"=m" (v->counter) - :"m" (v->counter)); + :"=m" (l->a.counter) + :"m" (l->a.counter)); } -static inline void local_dec(local_t *v) +static inline void local_dec(local_t *l) { __asm__ __volatile__( "decq %0" - :"=m" (v->counter) - :"m" (v->counter)); + :"=m" (l->a.counter) + :"m" (l->a.counter)); } -static inline void local_add(long i, local_t *v) +static inline void local_add(long i, local_t *l) { __asm__ __volatile__( "addq %1,%0" - :"=m" (v->counter) - :"ir" (i), "m" (v->counter)); + :"=m" (l->a.counter) + :"ir" (i), "m" (l->a.counter)); } -static inline void local_sub(long i, local_t *v) +static inline void local_sub(long i, local_t *l) { __asm__ __volatile__( "subq %1,%0" - :"=m" (v->counter) - :"ir" (i), "m" (v->counter)); + :"=m" (l->a.counter) + :"ir" (i), "m" (l->a.counter)); } +/** + * local_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @l: pointer to type local_t + * + * Atomically subtracts @i from @l and returns + * true if the result is zero, or false for all + * other cases. + */ +static __inline__ int local_sub_and_test(long i, local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "subq %2,%0; sete %1" + :"=m" (l->a.counter), "=qm" (c) + :"ir" (i), "m" (l->a.counter) : "memory"); + return c; +} + +/** + * local_dec_and_test - decrement and test + * @l: pointer to type local_t + * + * Atomically decrements @l by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static __inline__ int local_dec_and_test(local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "decq %0; sete %1" + :"=m" (l->a.counter), "=qm" (c) + :"m" (l->a.counter) : "memory"); + return c != 0; +} + +/** + * local_inc_and_test - increment and test + * @l: pointer to type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static __inline__ int local_inc_and_test(local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "incq %0; sete %1" + :"=m" (l->a.counter), "=qm" (c) + :"m" (l->a.counter) : "memory"); + return c != 0; +} + +/** + * local_add_negative - add and test if negative + * @i: integer value to add + * @l: pointer to type local_t + * + * Atomically adds @i to @l and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static __inline__ int local_add_negative(long i, local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "addq %2,%0; sets %1" + :"=m" (l->a.counter), "=qm" (c) + :"ir" (i), "m" (l->a.counter) : "memory"); + return c; +} + +/** + * local_add_return - add and return + * @i: integer value to add + * @l: pointer to type local_t + * + * Atomically adds @i to @l and returns @i + @l + */ +static __inline__ long local_add_return(long i, local_t *l) +{ + long __i = i; + __asm__ __volatile__( + "xaddq %0, %1;" + :"=r"(i) + :"m"(l->a.counter), "0"(i)); + return i + __i; +} + +static __inline__ long local_sub_return(long i, local_t *l) +{ + return local_add_return(-i,l); +} + +#define local_inc_return(l) (local_add_return(1,l)) +#define local_dec_return(l) (local_sub_return(1,l)) + +#define local_cmpxchg(l, o, n) \ + ((long)cmpxchg_local(&((l)->a.counter), (o), (n))) +/* Always has a lock prefix anyway */ +#define local_xchg(l, new) (xchg(&((l)->a.counter), new)) + +/** + * atomic_up_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read(l); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((l), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + /* On x86-64 these are better than the atomic variants on SMP kernels because they dont use a lock prefix. */ #define __local_inc(l) local_inc(l) @@ -62,27 +196,27 @@ #define __local_sub(i,l) local_sub((i),( /* Need to disable preemption for the cpu local counters otherwise we could still access a variable of a previous CPU in a non atomic way. */ -#define cpu_local_wrap_v(v) \ +#define cpu_local_wrap_v(l) \ ({ local_t res__; \ preempt_disable(); \ - res__ = (v); \ + res__ = (l); \ preempt_enable(); \ res__; }) -#define cpu_local_wrap(v) \ +#define cpu_local_wrap(l) \ ({ preempt_disable(); \ - v; \ + l; \ preempt_enable(); }) \ -#define cpu_local_read(v) cpu_local_wrap_v(local_read(&__get_cpu_var(v))) -#define cpu_local_set(v, i) cpu_local_wrap(local_set(&__get_cpu_var(v), (i))) -#define cpu_local_inc(v) cpu_local_wrap(local_inc(&__get_cpu_var(v))) -#define cpu_local_dec(v) cpu_local_wrap(local_dec(&__get_cpu_var(v))) -#define cpu_local_add(i, v) cpu_local_wrap(local_add((i), &__get_cpu_var(v))) -#define cpu_local_sub(i, v) cpu_local_wrap(local_sub((i), &__get_cpu_var(v))) +#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) +#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) +#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) +#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) +#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) +#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) -#define __cpu_local_inc(v) cpu_local_inc(v) -#define __cpu_local_dec(v) cpu_local_dec(v) -#define __cpu_local_add(i, v) cpu_local_add((i), (v)) -#define __cpu_local_sub(i, v) cpu_local_sub((i), (v)) +#define __cpu_local_inc(l) cpu_local_inc(l) +#define __cpu_local_dec(l) cpu_local_dec(l) +#define __cpu_local_add(i, l) cpu_local_add((i), (l)) +#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) -#endif /* _ARCH_I386_LOCAL_H */ +#endif /* _ARCH_X8664_LOCAL_H */ ---END--- OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] local.h modifications for 2.6.19 2006-12-01 22:21 ` Mathieu Desnoyers @ 2006-12-05 17:14 ` Mathieu Desnoyers 0 siblings, 0 replies; 13+ messages in thread From: Mathieu Desnoyers @ 2006-12-05 17:14 UTC (permalink / raw) To: linux-kernel, Andrew Morton Cc: Christoph Hellwig, Nick Piggin, Ingo Molnar, Greg Kroah-Hartman, Martin J. Bligh, Michel Dagenais, ltt-dev, systemtap Hi, Here is the complete support for all atomic operations in local.h for all architectures. The local_t type is now identical on each architectures : it contains an atomic_long_t field, just like the asm-generic implementation. This patch applies on 2.6.19. It is currently useful to my LTTng tracer, which is not in the mainline. I could foresee other users : any frequently used per-cpu atomic counter that is used by any code executing asynchronously on a processor. Therefore, I think it should be considered for inclusion for 2.6.20. Regards, Mathieu ---BEGIN--- --- a/include/asm-alpha/local.h +++ b/include/asm-alpha/local.h @@ -4,37 +4,115 @@ #define _ALPHA_LOCAL_H #include <linux/percpu.h> #include <asm/atomic.h> -typedef atomic64_t local_t; +typedef struct +{ + atomic_long_t a; +} local_t; -#define LOCAL_INIT(i) ATOMIC64_INIT(i) -#define local_read(v) atomic64_read(v) -#define local_set(v,i) atomic64_set(v,i) +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l,i) atomic_long_set(&(l)->a, (i)) +#define local_inc(l) atomic_long_inc(&(l)->a) +#define local_dec(l) atomic_long_dec(&(l)->a) +#define local_add(i,l) atomic_long_add((i),(&(l)->a)) +#define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) -#define local_inc(v) atomic64_inc(v) -#define local_dec(v) atomic64_dec(v) -#define local_add(i, v) atomic64_add(i, v) -#define local_sub(i, v) atomic64_sub(i, v) +static __inline__ long local_add_return(long i, local_t * l) +{ + long temp, result; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " addq %0,%3,%2\n" + " addq %0,%3,%0\n" + " stq_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (l->a.counter), "=&r" (result) + :"Ir" (i), "m" (l->a.counter) : "memory"); + return result; +} -#define __local_inc(v) ((v)->counter++) -#define __local_dec(v) ((v)->counter++) -#define __local_add(i,v) ((v)->counter+=(i)) -#define __local_sub(i,v) ((v)->counter-=(i)) +static __inline__ long local_sub_return(long i, local_t * v) +{ + long temp, result; + __asm__ __volatile__( + "1: ldq_l %0,%1\n" + " subq %0,%3,%2\n" + " subq %0,%3,%0\n" + " stq_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (l->a.counter), "=&r" (result) + :"Ir" (i), "m" (l->a.counter) : "memory"); + return result; +} + +#define local_cmpxchg(l, old, new) \ + ((long)cmpxchg_local(&((l)->a.counter), old, new)) +#define local_xchg(l, new) (xchg_local(&((l)->a.counter), new)) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read(l); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((l), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) + +#define local_dec_return(l) local_sub_return(1,(l)) + +#define local_inc_return(l) local_add_return(1,(l)) + +#define local_sub_and_test(i,l) (local_sub_return((i), (l)) == 0) + +#define local_inc_and_test(l) (local_add_return(1, (l)) == 0) + +#define local_dec_and_test(l) (local_sub_return(1, (l)) == 0) + +/* Verify if faster than atomic ops */ +#define __local_inc(l) ((l)->a.counter++) +#define __local_dec(l) ((l)->a.counter++) +#define __local_add(i,l) ((l)->a.counter+=(i)) +#define __local_sub(i,l) ((l)->a.counter-=(i)) /* Use these for per-cpu local_t variables: on some archs they are * much more efficient than these naive implementations. Note they take * a variable, not an address. */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) - -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) +#define cpu_local_read(l) local_read(&__get_cpu_var(l)) +#define cpu_local_set(l, i) local_set(&__get_cpu_var(l), (i)) + +#define cpu_local_inc(l) local_inc(&__get_cpu_var(l)) +#define cpu_local_dec(l) local_dec(&__get_cpu_var(l)) +#define cpu_local_add(i, l) local_add((i), &__get_cpu_var(l)) +#define cpu_local_sub(i, l) local_sub((i), &__get_cpu_var(l)) + +#define __cpu_local_inc(l) __local_inc(&__get_cpu_var(l)) +#define __cpu_local_dec(l) __local_dec(&__get_cpu_var(l)) +#define __cpu_local_add(i, l) __local_add((i), &__get_cpu_var(l)) +#define __cpu_local_sub(i, l) __local_sub((i), &__get_cpu_var(l)) #endif /* _ALPHA_LOCAL_H */ --- a/include/asm-generic/local.h +++ b/include/asm-generic/local.h @@ -33,6 +33,19 @@ #define local_dec(l) atomic_long_dec(&(l #define local_add(i,l) atomic_long_add((i),(&(l)->a)) #define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) +#define local_sub_and_test(i, l) atomic_long_sub_and_test((i), (&(l)->a)) +#define local_dec_and_test(l) atomic_long_dec_and_test(&(l)->a) +#define local_inc_and_test(l) atomic_long_inc_and_test(&(l)->a) +#define local_add_negative(i, l) atomic_long_add_negative((i), (&(l)->a)) +#define local_add_return(i, l) atomic_long_add_return((i), (&(l)->a)) +#define local_sub_return(i, l) atomic_long_sub_return((i), (&(l)->a)) +#define local_inc_return(l) atomic_long_inc_return(&(l)->a) + +#define local_cmpxchg(l, old, new) atomic_long_cmpxchg((&(l)->a), (old), (new)) +#define local_xchg(l, new) atomic_long_xchg((&(l)->a), (new)) +#define local_add_unless(l, a, u) atomic_long_add_unless((&(l)->a), (a), (u)) +#define local_inc_not_zero(l) atomic_long_inc_not_zero(&(l)->a) + /* Non-atomic variants, ie. preemption disabled and won't be touched * in interrupt, etc. Some archs can optimize this case well. */ #define __local_inc(l) local_set((l), local_read(l) + 1) @@ -44,19 +57,19 @@ #define __local_sub(i,l) local_set((l), * much more efficient than these naive implementations. Note they take * a variable (eg. mystruct.foo), not an address. */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) +#define cpu_local_read(l) local_read(&__get_cpu_var(l)) +#define cpu_local_set(l, i) local_set(&__get_cpu_var(l), (i)) +#define cpu_local_inc(l) local_inc(&__get_cpu_var(l)) +#define cpu_local_dec(l) local_dec(&__get_cpu_var(l)) +#define cpu_local_add(i, l) local_add((i), &__get_cpu_var(l)) +#define cpu_local_sub(i, l) local_sub((i), &__get_cpu_var(l)) /* Non-atomic increments, ie. preemption disabled and won't be touched * in interrupt, etc. Some archs can optimize this case well. */ -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) +#define __cpu_local_inc(l) __local_inc(&__get_cpu_var(l)) +#define __cpu_local_dec(l) __local_dec(&__get_cpu_var(l)) +#define __cpu_local_add(i, l) __local_add((i), &__get_cpu_var(l)) +#define __cpu_local_sub(i, l) __local_sub((i), &__get_cpu_var(l)) #endif /* _ASM_GENERIC_LOCAL_H */ --- a/include/asm-i386/local.h +++ b/include/asm-i386/local.h @@ -2,47 +2,198 @@ #ifndef _ARCH_I386_LOCAL_H #define _ARCH_I386_LOCAL_H #include <linux/percpu.h> +#include <asm/system.h> +#include <asm/atomic.h> typedef struct { - volatile long counter; + atomic_long_t a; } local_t; -#define LOCAL_INIT(i) { (i) } +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } -#define local_read(v) ((v)->counter) -#define local_set(v,i) (((v)->counter) = (i)) +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l,i) atomic_long_set(&(l)->a, (i)) -static __inline__ void local_inc(local_t *v) +static __inline__ void local_inc(local_t *l) { __asm__ __volatile__( "incl %0" - :"+m" (v->counter)); + :"+m" (l->a.counter)); } -static __inline__ void local_dec(local_t *v) +static __inline__ void local_dec(local_t *l) { __asm__ __volatile__( "decl %0" - :"+m" (v->counter)); + :"+m" (l->a.counter)); } -static __inline__ void local_add(long i, local_t *v) +static __inline__ void local_add(long i, local_t *l) { __asm__ __volatile__( "addl %1,%0" - :"+m" (v->counter) + :"+m" (l->a.counter) :"ir" (i)); } -static __inline__ void local_sub(long i, local_t *v) +static __inline__ void local_sub(long i, local_t *l) { __asm__ __volatile__( "subl %1,%0" - :"+m" (v->counter) + :"+m" (l->a.counter) :"ir" (i)); } +/** + * local_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @l: pointer of type local_t + * + * Atomically subtracts @i from @l and returns + * true if the result is zero, or false for all + * other cases. + */ +static __inline__ int local_sub_and_test(long i, local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "subl %2,%0; sete %1" + :"+m" (l->a.counter), "=qm" (c) + :"ir" (i) : "memory"); + return c; +} + +/** + * local_dec_and_test - decrement and test + * @l: pointer of type local_t + * + * Atomically decrements @l by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static __inline__ int local_dec_and_test(local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "decl %0; sete %1" + :"+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * local_inc_and_test - increment and test + * @l: pointer of type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static __inline__ int local_inc_and_test(local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "incl %0; sete %1" + :"+m" (l->a.counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * local_add_negative - add and test if negative + * @l: pointer of type local_t + * @i: integer value to add + * + * Atomically adds @i to @l and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static __inline__ int local_add_negative(long i, local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "addl %2,%0; sets %1" + :"+m" (l->a.counter), "=qm" (c) + :"ir" (i) : "memory"); + return c; +} + +/** + * local_add_return - add and return + * @l: pointer of type local_t + * @i: integer value to add + * + * Atomically adds @i to @l and returns @i + @l + */ +static __inline__ long local_add_return(long i, local_t *l) +{ + long __i; +#ifdef CONFIG_M386 + unsigned long flags; + if(unlikely(boot_cpu_data.x86==3)) + goto no_xadd; +#endif + /* Modern 486+ processor */ + __i = i; + __asm__ __volatile__( + "xaddl %0, %1;" + :"=r"(i) + :"m"(l->a.counter), "0"(i)); + return i + __i; + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + local_irq_save(flags); + __i = local_read(l); + local_set(l, i + __i); + local_irq_restore(flags); + return i + __i; +#endif +} + +static __inline__ long local_sub_return(long i, local_t *l) +{ + return local_add_return(-i,l); +} + +#define local_inc_return(l) (local_add_return(1,l)) +#define local_dec_return(l) (local_sub_return(1,l)) + +#define local_cmpxchg(l, o, n) \ + ((long)cmpxchg_local(&((l)->a.counter), (o), (n))) +/* Always has a lock prefix anyway */ +#define local_xchg(l, new) (xchg(&((l)->a.counter), new)) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read(l); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((l), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + /* On x86, these are no better than the atomic variants. */ #define __local_inc(l) local_inc(l) #define __local_dec(l) local_dec(l) @@ -56,27 +207,27 @@ #define __local_sub(i,l) local_sub((i),( /* Need to disable preemption for the cpu local counters otherwise we could still access a variable of a previous CPU in a non atomic way. */ -#define cpu_local_wrap_v(v) \ +#define cpu_local_wrap_v(l) \ ({ local_t res__; \ preempt_disable(); \ - res__ = (v); \ + res__ = (l); \ preempt_enable(); \ res__; }) -#define cpu_local_wrap(v) \ +#define cpu_local_wrap(l) \ ({ preempt_disable(); \ - v; \ + l; \ preempt_enable(); }) \ -#define cpu_local_read(v) cpu_local_wrap_v(local_read(&__get_cpu_var(v))) -#define cpu_local_set(v, i) cpu_local_wrap(local_set(&__get_cpu_var(v), (i))) -#define cpu_local_inc(v) cpu_local_wrap(local_inc(&__get_cpu_var(v))) -#define cpu_local_dec(v) cpu_local_wrap(local_dec(&__get_cpu_var(v))) -#define cpu_local_add(i, v) cpu_local_wrap(local_add((i), &__get_cpu_var(v))) -#define cpu_local_sub(i, v) cpu_local_wrap(local_sub((i), &__get_cpu_var(v))) - -#define __cpu_local_inc(v) cpu_local_inc(v) -#define __cpu_local_dec(v) cpu_local_dec(v) -#define __cpu_local_add(i, v) cpu_local_add((i), (v)) -#define __cpu_local_sub(i, v) cpu_local_sub((i), (v)) +#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) +#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) +#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) +#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) +#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) +#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) + +#define __cpu_local_inc(l) cpu_local_inc(l) +#define __cpu_local_dec(l) cpu_local_dec(l) +#define __cpu_local_add(i, l) cpu_local_add((i), (l)) +#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) #endif /* _ARCH_I386_LOCAL_H */ --- a/include/asm-ia64/local.h +++ b/include/asm-ia64/local.h @@ -1,50 +1 @@ -#ifndef _ASM_IA64_LOCAL_H -#define _ASM_IA64_LOCAL_H - -/* - * Copyright (C) 2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - */ - -#include <linux/percpu.h> - -typedef struct { - atomic64_t val; -} local_t; - -#define LOCAL_INIT(i) ((local_t) { { (i) } }) -#define local_read(l) atomic64_read(&(l)->val) -#define local_set(l, i) atomic64_set(&(l)->val, i) -#define local_inc(l) atomic64_inc(&(l)->val) -#define local_dec(l) atomic64_dec(&(l)->val) -#define local_add(i, l) atomic64_add((i), &(l)->val) -#define local_sub(i, l) atomic64_sub((i), &(l)->val) - -/* Non-atomic variants, i.e., preemption disabled and won't be touched in interrupt, etc. */ - -#define __local_inc(l) (++(l)->val.counter) -#define __local_dec(l) (--(l)->val.counter) -#define __local_add(i,l) ((l)->val.counter += (i)) -#define __local_sub(i,l) ((l)->val.counter -= (i)) - -/* - * Use these for per-cpu local_t variables. Note they take a variable (eg. mystruct.foo), - * not an address. - */ -#define cpu_local_read(v) local_read(&__ia64_per_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__ia64_per_cpu_var(v), (i)) -#define cpu_local_inc(v) local_inc(&__ia64_per_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__ia64_per_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__ia64_per_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__ia64_per_cpu_var(v)) - -/* - * Non-atomic increments, i.e., preemption disabled and won't be touched in interrupt, - * etc. - */ -#define __cpu_local_inc(v) __local_inc(&__ia64_per_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__ia64_per_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__ia64_per_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__ia64_per_cpu_var(v)) - -#endif /* _ASM_IA64_LOCAL_H */ +#include <asm-generic/local.h> --- a/include/asm-mips/local.h +++ b/include/asm-mips/local.h @@ -1,60 +1 @@ -#ifndef _ASM_LOCAL_H -#define _ASM_LOCAL_H - -#include <linux/percpu.h> -#include <asm/atomic.h> - -#ifdef CONFIG_32BIT - -typedef atomic_t local_t; - -#define LOCAL_INIT(i) ATOMIC_INIT(i) -#define local_read(v) atomic_read(v) -#define local_set(v,i) atomic_set(v,i) - -#define local_inc(v) atomic_inc(v) -#define local_dec(v) atomic_dec(v) -#define local_add(i, v) atomic_add(i, v) -#define local_sub(i, v) atomic_sub(i, v) - -#endif - -#ifdef CONFIG_64BIT - -typedef atomic64_t local_t; - -#define LOCAL_INIT(i) ATOMIC64_INIT(i) -#define local_read(v) atomic64_read(v) -#define local_set(v,i) atomic64_set(v,i) - -#define local_inc(v) atomic64_inc(v) -#define local_dec(v) atomic64_dec(v) -#define local_add(i, v) atomic64_add(i, v) -#define local_sub(i, v) atomic64_sub(i, v) - -#endif - -#define __local_inc(v) ((v)->counter++) -#define __local_dec(v) ((v)->counter--) -#define __local_add(i,v) ((v)->counter+=(i)) -#define __local_sub(i,v) ((v)->counter-=(i)) - -/* - * Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) - -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) - -#endif /* _ASM_LOCAL_H */ +#include <asm-generic/local.h> --- a/include/asm-parisc/local.h +++ b/include/asm-parisc/local.h @@ -1,40 +1 @@ -#ifndef _ARCH_PARISC_LOCAL_H -#define _ARCH_PARISC_LOCAL_H - -#include <linux/percpu.h> -#include <asm/atomic.h> - -typedef atomic_long_t local_t; - -#define LOCAL_INIT(i) ATOMIC_LONG_INIT(i) -#define local_read(v) atomic_long_read(v) -#define local_set(v,i) atomic_long_set(v,i) - -#define local_inc(v) atomic_long_inc(v) -#define local_dec(v) atomic_long_dec(v) -#define local_add(i, v) atomic_long_add(i, v) -#define local_sub(i, v) atomic_long_sub(i, v) - -#define __local_inc(v) ((v)->counter++) -#define __local_dec(v) ((v)->counter--) -#define __local_add(i,v) ((v)->counter+=(i)) -#define __local_sub(i,v) ((v)->counter-=(i)) - -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) - -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) - -#endif /* _ARCH_PARISC_LOCAL_H */ +#include <asm-generic/local.h> --- a/include/asm-powerpc/local.h +++ b/include/asm-powerpc/local.h @@ -1 +1,345 @@ -#include <asm-generic/local.h> +#ifndef _ARCH_POWERPC_LOCAL_H +#define _ARCH_POWERPC_LOCAL_H + +#include <linux/percpu.h> +#include <asm/atomic.h> + +typedef struct +{ + atomic_long_t a; +} local_t; + +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } + +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l,i) atomic_long_set(&(l)->a, (i)) + +#define local_add(i,l) atomic_long_add((i),(&(l)->a)) +#define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) +#define local_inc(l) atomic_long_inc(&(l)->a) +#define local_dec(l) atomic_long_dec(&(l)->a) + +#ifndef __powerpc64__ + +static __inline__ int local_add_return(int a, local_t *l) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # local_add_return\n\ + add %0,%1,%0\n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) + +static __inline__ int local_sub_return(int a, local_t *l) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%2 # local_sub_return\n\ + subf %0,%1,%0\n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +static __inline__ int local_inc_return(local_t *l) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # local_inc_return\n\ + addic %0,%0,1\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1 \n\ + bne- 1b" + : "=&r" (t) + : "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +/* + * local_inc_and_test - increment and test + * @l: pointer of type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define local_inc_and_test(l) (local_inc_return(l) == 0) + +static __inline__ int local_dec_return(local_t *l) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # local_dec_return\n\ + addic %0,%0,-1\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1\n\ + bne- 1b" + : "=&r" (t) + : "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +#define local_cmpxchg(l, o, n) \ + ((long)cmpxchg(&((l)->a.counter), (o), (n))) +#define local_xchg(l, new) (xchg(&((l)->a.counter), new)) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +static __inline__ int local_add_unless(local_t *l, int a, int u) +{ + int t; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%1 # local_add_unless\n\ + cmpw 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stwcx. %0,0,%1 \n\ + bne- 1b \n" +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&(l->a.counter)), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0) +#define local_dec_and_test(l) (local_dec_return((l)) == 0) + +/* + * Atomically test *l and decrement if it is greater than 0. + * The function returns the old value of *l minus 1. + */ +static __inline__ int local_dec_if_positive(local_t *l) +{ + int t; + + __asm__ __volatile__( +"1: lwarx %0,0,%1 # local_dec_if_positive\n\ + addic. %0,%0,-1\n\ + blt- 2f\n" + PPC405_ERR77(0,%1) +" stwcx. %0,0,%1\n\ + bne- 1b" + "\n\ +2:" : "=&r" (t) + : "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +#else /* __powerpc64__ */ + +static __inline__ long local_add_return(long a, local_t *l) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 # local_add_return\n\ + add %0,%1,%0\n\ + stdcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) + +static __inline__ long local_sub_return(long a, local_t *l) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%2 # local_sub_return\n\ + subf %0,%1,%0\n\ + stdcx. %0,0,%2 \n\ + bne- 1b" + : "=&r" (t) + : "r" (a), "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +static __inline__ long local_inc_return(local_t *l) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # local_inc_return\n\ + addic %0,%0,1\n\ + stdcx. %0,0,%1 \n\ + bne- 1b" + : "=&r" (t) + : "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +/* + * local_inc_and_test - increment and test + * @l: pointer of type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define local_inc_and_test(l) (local_inc_return(l) == 0) + +static __inline__ long local_dec_return(local_t *l) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # local_dec_return\n\ + addic %0,%0,-1\n\ + stdcx. %0,0,%1\n\ + bne- 1b" + : "=&r" (t) + : "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0) +#define local_dec_and_test(l) (local_dec_return((l)) == 0) + +/* + * Atomically test *l and decrement if it is greater than 0. + * The function returns the old value of *l minus 1. + */ +static __inline__ long local_dec_if_positive(local_t *l) +{ + long t; + + __asm__ __volatile__( +"1: ldarx %0,0,%1 # local_dec_if_positive\n\ + addic. %0,%0,-1\n\ + blt- 2f\n\ + stdcx. %0,0,%1\n\ + bne- 1b" + "\n\ +2:" : "=&r" (t) + : "r" (&(l->a.counter)) + : "cc", "memory"); + + return t; +} + +#define local_cmpxchg(l, o, n) \ + ((__typeof__((l)->a.counter))cmpxchg_local(&((l)->a.counter), (o), (n))) +#define local_xchg(l, new) (xchg_local(&((l)->a.counter), new)) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +static __inline__ int local_add_unless(local_t *l, long a, long u) +{ + long t; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%1 # local_add_unless\n\ + cmpd 0,%0,%3 \n\ + beq- 2f \n\ + add %0,%2,%0 \n" + PPC405_ERR77(0,%2) +" stdcx. %0,0,%1 \n\ + bne- 1b \n" +" subf %0,%2,%0 \n\ +2:" + : "=&r" (t) + : "r" (&(l->a.counter)), "r" (a), "r" (u) + : "cc", "memory"); + + return t != u; +} + +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +#endif /* !__powerpc64__ */ + +/* Use these for per-cpu local_t variables: on some archs they are + * much more efficient than these naive implementations. Note they take + * a variable, not an address. + * + * This could be done better if we moved the per cpu data directly + * after GS. + */ + +#define __local_inc(l) ((l)->a.counter++) +#define __local_dec(l) ((l)->a.counter++) +#define __local_add(i,l) ((l)->a.counter+=(i)) +#define __local_sub(i,l) ((l)->a.counter-=(i)) + +/* Need to disable preemption for the cpu local counters otherwise we could + still access a variable of a previous CPU in a non atomic way. */ +#define cpu_local_wrap_v(l) \ + ({ local_t res__; \ + preempt_disable(); \ + res__ = (l); \ + preempt_enable(); \ + res__; }) +#define cpu_local_wrap(l) \ + ({ preempt_disable(); \ + l; \ + preempt_enable(); }) \ + +#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) +#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) +#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) +#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) +#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) +#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) + +#define __cpu_local_inc(l) cpu_local_inc(l) +#define __cpu_local_dec(l) cpu_local_dec(l) +#define __cpu_local_add(i, l) cpu_local_add((i), (l)) +#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) + +#endif /* _ARCH_POWERPC_LOCAL_H */ --- a/include/asm-s390/local.h +++ b/include/asm-s390/local.h @@ -1,58 +1 @@ -#ifndef _ASM_LOCAL_H -#define _ASM_LOCAL_H - -#include <linux/percpu.h> -#include <asm/atomic.h> - -#ifndef __s390x__ - -typedef atomic_t local_t; - -#define LOCAL_INIT(i) ATOMIC_INIT(i) -#define local_read(v) atomic_read(v) -#define local_set(v,i) atomic_set(v,i) - -#define local_inc(v) atomic_inc(v) -#define local_dec(v) atomic_dec(v) -#define local_add(i, v) atomic_add(i, v) -#define local_sub(i, v) atomic_sub(i, v) - -#else - -typedef atomic64_t local_t; - -#define LOCAL_INIT(i) ATOMIC64_INIT(i) -#define local_read(v) atomic64_read(v) -#define local_set(v,i) atomic64_set(v,i) - -#define local_inc(v) atomic64_inc(v) -#define local_dec(v) atomic64_dec(v) -#define local_add(i, v) atomic64_add(i, v) -#define local_sub(i, v) atomic64_sub(i, v) - -#endif - -#define __local_inc(v) ((v)->counter++) -#define __local_dec(v) ((v)->counter--) -#define __local_add(i,v) ((v)->counter+=(i)) -#define __local_sub(i,v) ((v)->counter-=(i)) - -/* - * Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) - -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) - -#endif /* _ASM_LOCAL_H */ +#include <asm-generic/local.h> --- a/include/asm-sparc64/local.h +++ b/include/asm-sparc64/local.h @@ -1,40 +1 @@ -#ifndef _ARCH_SPARC64_LOCAL_H -#define _ARCH_SPARC64_LOCAL_H - -#include <linux/percpu.h> -#include <asm/atomic.h> - -typedef atomic64_t local_t; - -#define LOCAL_INIT(i) ATOMIC64_INIT(i) -#define local_read(v) atomic64_read(v) -#define local_set(v,i) atomic64_set(v,i) - -#define local_inc(v) atomic64_inc(v) -#define local_dec(v) atomic64_dec(v) -#define local_add(i, v) atomic64_add(i, v) -#define local_sub(i, v) atomic64_sub(i, v) - -#define __local_inc(v) ((v)->counter++) -#define __local_dec(v) ((v)->counter--) -#define __local_add(i,v) ((v)->counter+=(i)) -#define __local_sub(i,v) ((v)->counter-=(i)) - -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) - -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) - -#endif /* _ARCH_SPARC64_LOCAL_H */ +#include <asm-generic/local.h> --- a/include/asm-x86_64/local.h +++ b/include/asm-x86_64/local.h @@ -2,49 +2,183 @@ #ifndef _ARCH_X8664_LOCAL_H #define _ARCH_X8664_LOCAL_H #include <linux/percpu.h> +#include <asm/atomic.h> typedef struct { - volatile long counter; + atomic_long_t a; } local_t; -#define LOCAL_INIT(i) { (i) } +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } -#define local_read(v) ((v)->counter) -#define local_set(v,i) (((v)->counter) = (i)) +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l,i) atomic_long_set(&(l)->a, (i)) -static inline void local_inc(local_t *v) +static inline void local_inc(local_t *l) { __asm__ __volatile__( "incq %0" - :"=m" (v->counter) - :"m" (v->counter)); + :"=m" (l->a.counter) + :"m" (l->a.counter)); } -static inline void local_dec(local_t *v) +static inline void local_dec(local_t *l) { __asm__ __volatile__( "decq %0" - :"=m" (v->counter) - :"m" (v->counter)); + :"=m" (l->a.counter) + :"m" (l->a.counter)); } -static inline void local_add(long i, local_t *v) +static inline void local_add(long i, local_t *l) { __asm__ __volatile__( "addq %1,%0" - :"=m" (v->counter) - :"ir" (i), "m" (v->counter)); + :"=m" (l->a.counter) + :"ir" (i), "m" (l->a.counter)); } -static inline void local_sub(long i, local_t *v) +static inline void local_sub(long i, local_t *l) { __asm__ __volatile__( "subq %1,%0" - :"=m" (v->counter) - :"ir" (i), "m" (v->counter)); + :"=m" (l->a.counter) + :"ir" (i), "m" (l->a.counter)); } +/** + * local_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @l: pointer to type local_t + * + * Atomically subtracts @i from @l and returns + * true if the result is zero, or false for all + * other cases. + */ +static __inline__ int local_sub_and_test(long i, local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "subq %2,%0; sete %1" + :"=m" (l->a.counter), "=qm" (c) + :"ir" (i), "m" (l->a.counter) : "memory"); + return c; +} + +/** + * local_dec_and_test - decrement and test + * @l: pointer to type local_t + * + * Atomically decrements @l by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static __inline__ int local_dec_and_test(local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "decq %0; sete %1" + :"=m" (l->a.counter), "=qm" (c) + :"m" (l->a.counter) : "memory"); + return c != 0; +} + +/** + * local_inc_and_test - increment and test + * @l: pointer to type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static __inline__ int local_inc_and_test(local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "incq %0; sete %1" + :"=m" (l->a.counter), "=qm" (c) + :"m" (l->a.counter) : "memory"); + return c != 0; +} + +/** + * local_add_negative - add and test if negative + * @i: integer value to add + * @l: pointer to type local_t + * + * Atomically adds @i to @l and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static __inline__ int local_add_negative(long i, local_t *l) +{ + unsigned char c; + + __asm__ __volatile__( + "addq %2,%0; sets %1" + :"=m" (l->a.counter), "=qm" (c) + :"ir" (i), "m" (l->a.counter) : "memory"); + return c; +} + +/** + * local_add_return - add and return + * @i: integer value to add + * @l: pointer to type local_t + * + * Atomically adds @i to @l and returns @i + @l + */ +static __inline__ long local_add_return(long i, local_t *l) +{ + long __i = i; + __asm__ __volatile__( + "xaddq %0, %1;" + :"=r"(i) + :"m"(l->a.counter), "0"(i)); + return i + __i; +} + +static __inline__ long local_sub_return(long i, local_t *l) +{ + return local_add_return(-i,l); +} + +#define local_inc_return(l) (local_add_return(1,l)) +#define local_dec_return(l) (local_sub_return(1,l)) + +#define local_cmpxchg(l, o, n) \ + ((long)cmpxchg_local(&((l)->a.counter), (o), (n))) +/* Always has a lock prefix anyway */ +#define local_xchg(l, new) (xchg(&((l)->a.counter), new)) + +/** + * atomic_up_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read(l); \ + for (;;) { \ + if (unlikely(c == (u))) \ + break; \ + old = local_cmpxchg((l), c, c + (a)); \ + if (likely(old == c)) \ + break; \ + c = old; \ + } \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + /* On x86-64 these are better than the atomic variants on SMP kernels because they dont use a lock prefix. */ #define __local_inc(l) local_inc(l) @@ -62,27 +196,27 @@ #define __local_sub(i,l) local_sub((i),( /* Need to disable preemption for the cpu local counters otherwise we could still access a variable of a previous CPU in a non atomic way. */ -#define cpu_local_wrap_v(v) \ +#define cpu_local_wrap_v(l) \ ({ local_t res__; \ preempt_disable(); \ - res__ = (v); \ + res__ = (l); \ preempt_enable(); \ res__; }) -#define cpu_local_wrap(v) \ +#define cpu_local_wrap(l) \ ({ preempt_disable(); \ - v; \ + l; \ preempt_enable(); }) \ -#define cpu_local_read(v) cpu_local_wrap_v(local_read(&__get_cpu_var(v))) -#define cpu_local_set(v, i) cpu_local_wrap(local_set(&__get_cpu_var(v), (i))) -#define cpu_local_inc(v) cpu_local_wrap(local_inc(&__get_cpu_var(v))) -#define cpu_local_dec(v) cpu_local_wrap(local_dec(&__get_cpu_var(v))) -#define cpu_local_add(i, v) cpu_local_wrap(local_add((i), &__get_cpu_var(v))) -#define cpu_local_sub(i, v) cpu_local_wrap(local_sub((i), &__get_cpu_var(v))) +#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) +#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) +#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) +#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) +#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) +#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) -#define __cpu_local_inc(v) cpu_local_inc(v) -#define __cpu_local_dec(v) cpu_local_dec(v) -#define __cpu_local_add(i, v) cpu_local_add((i), (v)) -#define __cpu_local_sub(i, v) cpu_local_sub((i), (v)) +#define __cpu_local_inc(l) cpu_local_inc(l) +#define __cpu_local_dec(l) cpu_local_dec(l) +#define __cpu_local_add(i, l) cpu_local_add((i), (l)) +#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) -#endif /* _ARCH_I386_LOCAL_H */ +#endif /* _ARCH_X8664_LOCAL_H */ ---END--- OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2006-12-05 17:14 UTC | newest] Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2006-11-24 21:55 [PATCH 4/16] LTTng 0.6.36 for 2.6.18 : atomic UP operations on SMP Mathieu Desnoyers 2006-11-27 16:56 ` Christoph Hellwig 2006-12-01 3:11 ` [PATCH 1/2] atomic.h atomic64_t standardization Mathieu Desnoyers 2006-12-01 3:24 ` Mathieu Desnoyers 2006-12-01 22:19 ` Mathieu Desnoyers 2006-12-02 0:43 ` Nick Piggin 2006-12-05 17:08 ` [PATCH 1/2] atomic.h atomic64_t standardization for 2.6.19 Mathieu Desnoyers 2006-12-01 3:34 ` [PATCH 1/2] atomic.h atomic64_t standardization Paul Mundt 2006-12-01 3:41 ` Mathieu Desnoyers 2006-12-01 3:14 ` [PATCH 2/2] local.h modifications Mathieu Desnoyers 2006-12-01 3:24 ` Mathieu Desnoyers 2006-12-01 22:21 ` Mathieu Desnoyers 2006-12-05 17:14 ` [PATCH 2/2] local.h modifications for 2.6.19 Mathieu Desnoyers
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).