LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] add strncmp to PowerPC
@ 2008-02-29 16:04 Steven Rostedt
2008-03-01 3:04 ` Benjamin Herrenschmidt
0 siblings, 1 reply; 10+ messages in thread
From: Steven Rostedt @ 2008-02-29 16:04 UTC (permalink / raw)
To: paulus, linuxppc-dev; +Cc: LKML
strncmp is defined in assembly for bootup, but it is not defined in the
normal running kernel. This patch takes the strncmp code from the bootup
and copies it to the kernel proper.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
arch/powerpc/kernel/ppc_ksyms.c | 1 +
arch/powerpc/lib/string.S | 12 ++++++++++++
include/asm-powerpc/string.h | 2 ++
3 files changed, 15 insertions(+)
Index: linux-sched-devel.git/arch/powerpc/kernel/ppc_ksyms.c
===================================================================
--- linux-sched-devel.git.orig/arch/powerpc/kernel/ppc_ksyms.c 2008-02-27 14:01:38.000000000 -0800
+++ linux-sched-devel.git/arch/powerpc/kernel/ppc_ksyms.c 2008-02-29 07:24:22.000000000 -0800
@@ -78,6 +78,7 @@ EXPORT_SYMBOL(strncpy);
EXPORT_SYMBOL(strcat);
EXPORT_SYMBOL(strlen);
EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strncmp);
EXPORT_SYMBOL(csum_partial);
EXPORT_SYMBOL(csum_partial_copy_generic);
Index: linux-sched-devel.git/arch/powerpc/lib/string.S
===================================================================
--- linux-sched-devel.git.orig/arch/powerpc/lib/string.S 2008-02-27 14:01:38.000000000 -0800
+++ linux-sched-devel.git/arch/powerpc/lib/string.S 2008-02-29 07:24:22.000000000 -0800
@@ -75,6 +75,18 @@ _GLOBAL(strcmp)
beq 1b
blr
+_GLOBAL(strncmp)
+ mtctr r5
+ addi r5,r3,-1
+ addi r4,r4,-1
+1: lbzu r3,1(r5)
+ cmpwi 1,r3,0
+ lbzu r0,1(r4)
+ subf. r3,r0,r3
+ beqlr 1
+ bdnzt eq,1b
+ blr
+
_GLOBAL(strlen)
addi r4,r3,-1
1: lbzu r0,1(r4)
Index: linux-sched-devel.git/include/asm-powerpc/string.h
===================================================================
--- linux-sched-devel.git.orig/include/asm-powerpc/string.h 2008-02-27 14:01:58.000000000 -0800
+++ linux-sched-devel.git/include/asm-powerpc/string.h 2008-02-29 07:24:22.000000000 -0800
@@ -7,6 +7,7 @@
#define __HAVE_ARCH_STRNCPY
#define __HAVE_ARCH_STRLEN
#define __HAVE_ARCH_STRCMP
+#define __HAVE_ARCH_STRNCMP
#define __HAVE_ARCH_STRCAT
#define __HAVE_ARCH_MEMSET
#define __HAVE_ARCH_MEMCPY
@@ -18,6 +19,7 @@ extern char * strcpy(char *,const char *
extern char * strncpy(char *,const char *, __kernel_size_t);
extern __kernel_size_t strlen(const char *);
extern int strcmp(const char *,const char *);
+extern int strncmp(const char *,const char *,__kernel_size_t);
extern char * strcat(char *, const char *);
extern void * memset(void *,int,__kernel_size_t);
extern void * memcpy(void *,const void *,__kernel_size_t);
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] add strncmp to PowerPC
2008-02-29 16:04 [PATCH] add strncmp to PowerPC Steven Rostedt
@ 2008-03-01 3:04 ` Benjamin Herrenschmidt
2008-03-01 3:56 ` Steven Rostedt
2008-03-05 4:03 ` Paul Mackerras
0 siblings, 2 replies; 10+ messages in thread
From: Benjamin Herrenschmidt @ 2008-03-01 3:04 UTC (permalink / raw)
To: Steven Rostedt; +Cc: paulus, linuxppc-dev, LKML
On Fri, 2008-02-29 at 11:04 -0500, Steven Rostedt wrote:
> strncmp is defined in assembly for bootup, but it is not defined in the
> normal running kernel. This patch takes the strncmp code from the bootup
> and copies it to the kernel proper.
>
> Signed-off-by: Steven Rostedt <srostedt@redhat.com>
> ---
Do we have any indication that it performs better than the C one ?
Ben.
> arch/powerpc/kernel/ppc_ksyms.c | 1 +
> arch/powerpc/lib/string.S | 12 ++++++++++++
> include/asm-powerpc/string.h | 2 ++
> 3 files changed, 15 insertions(+)
>
> Index: linux-sched-devel.git/arch/powerpc/kernel/ppc_ksyms.c
> ===================================================================
> --- linux-sched-devel.git.orig/arch/powerpc/kernel/ppc_ksyms.c 2008-02-27 14:01:38.000000000 -0800
> +++ linux-sched-devel.git/arch/powerpc/kernel/ppc_ksyms.c 2008-02-29 07:24:22.000000000 -0800
> @@ -78,6 +78,7 @@ EXPORT_SYMBOL(strncpy);
> EXPORT_SYMBOL(strcat);
> EXPORT_SYMBOL(strlen);
> EXPORT_SYMBOL(strcmp);
> +EXPORT_SYMBOL(strncmp);
>
> EXPORT_SYMBOL(csum_partial);
> EXPORT_SYMBOL(csum_partial_copy_generic);
> Index: linux-sched-devel.git/arch/powerpc/lib/string.S
> ===================================================================
> --- linux-sched-devel.git.orig/arch/powerpc/lib/string.S 2008-02-27 14:01:38.000000000 -0800
> +++ linux-sched-devel.git/arch/powerpc/lib/string.S 2008-02-29 07:24:22.000000000 -0800
> @@ -75,6 +75,18 @@ _GLOBAL(strcmp)
> beq 1b
> blr
>
> +_GLOBAL(strncmp)
> + mtctr r5
> + addi r5,r3,-1
> + addi r4,r4,-1
> +1: lbzu r3,1(r5)
> + cmpwi 1,r3,0
> + lbzu r0,1(r4)
> + subf. r3,r0,r3
> + beqlr 1
> + bdnzt eq,1b
> + blr
> +
> _GLOBAL(strlen)
> addi r4,r3,-1
> 1: lbzu r0,1(r4)
> Index: linux-sched-devel.git/include/asm-powerpc/string.h
> ===================================================================
> --- linux-sched-devel.git.orig/include/asm-powerpc/string.h 2008-02-27 14:01:58.000000000 -0800
> +++ linux-sched-devel.git/include/asm-powerpc/string.h 2008-02-29 07:24:22.000000000 -0800
> @@ -7,6 +7,7 @@
> #define __HAVE_ARCH_STRNCPY
> #define __HAVE_ARCH_STRLEN
> #define __HAVE_ARCH_STRCMP
> +#define __HAVE_ARCH_STRNCMP
> #define __HAVE_ARCH_STRCAT
> #define __HAVE_ARCH_MEMSET
> #define __HAVE_ARCH_MEMCPY
> @@ -18,6 +19,7 @@ extern char * strcpy(char *,const char *
> extern char * strncpy(char *,const char *, __kernel_size_t);
> extern __kernel_size_t strlen(const char *);
> extern int strcmp(const char *,const char *);
> +extern int strncmp(const char *,const char *,__kernel_size_t);
> extern char * strcat(char *, const char *);
> extern void * memset(void *,int,__kernel_size_t);
> extern void * memcpy(void *,const void *,__kernel_size_t);
>
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] add strncmp to PowerPC
2008-03-01 3:04 ` Benjamin Herrenschmidt
@ 2008-03-01 3:56 ` Steven Rostedt
2008-03-03 9:54 ` Gabriel Paubert
2008-03-05 4:03 ` Paul Mackerras
1 sibling, 1 reply; 10+ messages in thread
From: Steven Rostedt @ 2008-03-01 3:56 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: paulus, linuxppc-dev, LKML
On Sat, 1 Mar 2008, Benjamin Herrenschmidt wrote:
>
> Do we have any indication that it performs better than the C one ?
See below.
>
> Ben.
>
> >
> > +_GLOBAL(strncmp)
> > + mtctr r5
> > + addi r5,r3,-1
> > + addi r4,r4,-1
> > +1: lbzu r3,1(r5)
> > + cmpwi 1,r3,0
> > + lbzu r0,1(r4)
> > + subf. r3,r0,r3
> > + beqlr 1
> > + bdnzt eq,1b
> > + blr
> > +
And here's the objdump of the C version:
0000000000000080 <.strncmp>:
80: fb e1 ff f0 std r31,-16(r1)
84: f8 21 ff c1 stdu r1,-64(r1)
88: 7c 69 1b 78 mr r9,r3
8c: 7c a0 2b 79 mr. r0,r5
90: 38 60 00 00 li r3,0
94: 7c 09 03 a6 mtctr r0
98: 7c 3f 0b 78 mr r31,r1
9c: 41 82 00 68 beq- 104 <.strncmp+0x84>
a0: 89 69 00 00 lbz r11,0(r9)
a4: 88 04 00 00 lbz r0,0(r4)
a8: 7c 00 58 50 subf r0,r0,r11
ac: 78 00 06 20 clrldi r0,r0,56
b0: 2f a0 00 00 cmpdi cr7,r0,0
b4: 7c 00 07 74 extsb r0,r0
b8: 7c 03 03 78 mr r3,r0
bc: 40 9e 00 48 bne- cr7,104 <.strncmp+0x84>
c0: 2f ab 00 00 cmpdi cr7,r11,0
c4: 41 9e 00 40 beq- cr7,104 <.strncmp+0x84>
c8: 38 84 00 01 addi r4,r4,1
cc: 38 69 00 01 addi r3,r9,1
d0: 42 40 00 30 bdz- 100 <.strncmp+0x80>
d4: 88 03 00 00 lbz r0,0(r3)
d8: 89 24 00 00 lbz r9,0(r4)
dc: 38 63 00 01 addi r3,r3,1
e0: 38 84 00 01 addi r4,r4,1
e4: 2f 20 00 00 cmpdi cr6,r0,0
e8: 7c 09 00 50 subf r0,r9,r0
ec: 78 00 06 20 clrldi r0,r0,56
f0: 2f a0 00 00 cmpdi cr7,r0,0
f4: 7c 00 07 74 extsb r0,r0
f8: 40 9e 00 08 bne- cr7,100 <.strncmp+0x80>
fc: 40 9a ff d4 bne+ cr6,d0 <.strncmp+0x50>
100: 7c 03 03 78 mr r3,r0
104: e8 21 00 00 ld r1,0(r1)
108: eb e1 ff f0 ld r31,-16(r1)
10c: 4e 80 00 20 blr
I'll let you decide ;-)
Even if it was logically faster (which I still doubt) it's a hell of a lot
of cache lines to waste.
-- Steve
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] add strncmp to PowerPC
2008-03-01 3:56 ` Steven Rostedt
@ 2008-03-03 9:54 ` Gabriel Paubert
2008-03-03 10:10 ` Andreas Schwab
2008-03-03 19:08 ` Segher Boessenkool
0 siblings, 2 replies; 10+ messages in thread
From: Gabriel Paubert @ 2008-03-03 9:54 UTC (permalink / raw)
To: Steven Rostedt; +Cc: Benjamin Herrenschmidt, linuxppc-dev, paulus, LKML
On Fri, Feb 29, 2008 at 10:56:45PM -0500, Steven Rostedt wrote:
>
> On Sat, 1 Mar 2008, Benjamin Herrenschmidt wrote:
> >
> > Do we have any indication that it performs better than the C one ?
>
> See below.
>
> >
> > Ben.
> >
>
> > >
> > > +_GLOBAL(strncmp)
> > > + mtctr r5
> > > + addi r5,r3,-1
> > > + addi r4,r4,-1
> > > +1: lbzu r3,1(r5)
> > > + cmpwi 1,r3,0
> > > + lbzu r0,1(r4)
> > > + subf. r3,r0,r3
> > > + beqlr 1
> > > + bdnzt eq,1b
> > > + blr
> > > +
>
>
> And here's the objdump of the C version:
>
> 0000000000000080 <.strncmp>:
> 80: fb e1 ff f0 std r31,-16(r1)
> 84: f8 21 ff c1 stdu r1,-64(r1)
> 88: 7c 69 1b 78 mr r9,r3
> 8c: 7c a0 2b 79 mr. r0,r5
> 90: 38 60 00 00 li r3,0
> 94: 7c 09 03 a6 mtctr r0
> 98: 7c 3f 0b 78 mr r31,r1
> 9c: 41 82 00 68 beq- 104 <.strncmp+0x84>
> a0: 89 69 00 00 lbz r11,0(r9)
> a4: 88 04 00 00 lbz r0,0(r4)
> a8: 7c 00 58 50 subf r0,r0,r11
> ac: 78 00 06 20 clrldi r0,r0,56
> b0: 2f a0 00 00 cmpdi cr7,r0,0
> b4: 7c 00 07 74 extsb r0,r0
> b8: 7c 03 03 78 mr r3,r0
> bc: 40 9e 00 48 bne- cr7,104 <.strncmp+0x84>
> c0: 2f ab 00 00 cmpdi cr7,r11,0
> c4: 41 9e 00 40 beq- cr7,104 <.strncmp+0x84>
> c8: 38 84 00 01 addi r4,r4,1
> cc: 38 69 00 01 addi r3,r9,1
> d0: 42 40 00 30 bdz- 100 <.strncmp+0x80>
> d4: 88 03 00 00 lbz r0,0(r3)
> d8: 89 24 00 00 lbz r9,0(r4)
> dc: 38 63 00 01 addi r3,r3,1
> e0: 38 84 00 01 addi r4,r4,1
> e4: 2f 20 00 00 cmpdi cr6,r0,0
> e8: 7c 09 00 50 subf r0,r9,r0
> ec: 78 00 06 20 clrldi r0,r0,56
> f0: 2f a0 00 00 cmpdi cr7,r0,0
> f4: 7c 00 07 74 extsb r0,r0
> f8: 40 9e 00 08 bne- cr7,100 <.strncmp+0x80>
> fc: 40 9a ff d4 bne+ cr6,d0 <.strncmp+0x50>
> 100: 7c 03 03 78 mr r3,r0
> 104: e8 21 00 00 ld r1,0(r1)
> 108: eb e1 ff f0 ld r31,-16(r1)
> 10c: 4e 80 00 20 blr
>
>
> I'll let you decide ;-)
>
> Even if it was logically faster (which I still doubt) it's a hell of a lot
> of cache lines to waste.
Indeed, but there are some corner cases that the C code handles. Like
a length of 0 which may lead to infinite loop in the asm code.
OTOH, I'm a bit surprised by the extsb instructions in the compiler generated
code. We don't compile with -fsigned-char, do we? The clrldi
instructions are also extremely stupid.
Now that I think a bit more about it, I believe that the C version is
incorrect: the clrldi/extsb dance takes a value between -255 and +255
and collapses it into the -128 to 127 range, meaning that the return
value may be wrong if we rely on the sign of the result. So unless I
miss something, the problem is much more serious than just stupid code
(I had just a look at the libc version in C and characters are cast to
unsigned char before the comparison).
Regards,
Gabriel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] add strncmp to PowerPC
2008-03-03 9:54 ` Gabriel Paubert
@ 2008-03-03 10:10 ` Andreas Schwab
2008-03-03 19:08 ` Segher Boessenkool
1 sibling, 0 replies; 10+ messages in thread
From: Andreas Schwab @ 2008-03-03 10:10 UTC (permalink / raw)
To: Gabriel Paubert; +Cc: Steven Rostedt, paulus, LKML, linuxppc-dev
Gabriel Paubert <paubert@iram.es> writes:
> Now that I think a bit more about it, I believe that the C version is
> incorrect: the clrldi/extsb dance takes a value between -255 and +255
> and collapses it into the -128 to 127 range, meaning that the return
> value may be wrong if we rely on the sign of the result. So unless I
> miss something, the problem is much more serious than just stupid code
> (I had just a look at the libc version in C and characters are cast to
> unsigned char before the comparison).
The latter is explicitly required by the C standard. Ie. even if your
characters are signed they are always compared as unsigned by
strcmp/strncmp/memcmp.
Andreas.
--
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5
"And now for something completely different."
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] add strncmp to PowerPC
2008-03-03 9:54 ` Gabriel Paubert
2008-03-03 10:10 ` Andreas Schwab
@ 2008-03-03 19:08 ` Segher Boessenkool
1 sibling, 0 replies; 10+ messages in thread
From: Segher Boessenkool @ 2008-03-03 19:08 UTC (permalink / raw)
To: Gabriel Paubert; +Cc: paulus, LKML, linuxppc-dev, Steven Rostedt
>> Even if it was logically faster (which I still doubt) it's a hell of
>> a lot
>> of cache lines to waste.
Yeah, 1 on 64-bit and 3 on 32-bit, that's a terrible lot.</sarcasm>
> Indeed, but there are some corner cases that the C code handles. Like
> a length of 0 which may lead to infinite loop in the asm code.
>
> OTOH, I'm a bit surprised by the extsb instructions in the compiler
> generated
> code. We don't compile with -fsigned-char, do we? The clrldi
> instructions are also extremely stupid.
Those are both necessary to be equivalent to the C code, which uses
signed char explicitly. It is generally considered a Good Thing(tm)
for the compiler to generate assembler code equivalent to the C code,
even if the C code is wrong.
> Now that I think a bit more about it, I believe that the C version is
> incorrect
It is. It's a great entry for the IOCCC as well.
I just tested the following (can't guarantee it's correct, just a PoC):
int strncmp(const char *s1, const char *s2, unsigned long /*size_t*/
len)
{
while (len--) {
unsigned char c1, c2;
c1 = *s1++;
c2 = *s2++;
int cmp = c1 - c2;
if (cmp)
return cmp;
if (c1 == 0 || c2 == 0)
break;
}
return 0;
}
which generates (with GCC-4.2.3)
strncmp:
addi 5,5,1
mtctr 5
.L2:
bdz .L11
lbz 0,0(3)
addi 3,3,1
lbz 9,0(4)
addi 4,4,1
cmpwi 7,0,0
subf. 0,9,0
cmpwi 6,9,0
bne- 0,.L4
beq- 7,.L4
bne+ 6,.L2
.L4:
mr 3,0
blr
.L11:
li 0,0
mr 3,0
blr
which isn't horrid, although it does some weirdish things obviously.
Current GCC-4.4.0 generates
strncmp:
addi 5,5,1
mr 10,3
mtctr 5
li 11,0
bdz .L7
.p2align 4,,15
.L4:
lbzx 0,10,11
lbzx 9,4,11
addi 11,11,1
subf. 3,9,0
cmpwi 6,9,0
cmpwi 7,0,0
bnelr 0
beqlr 7
beqlr 6
bdnz .L4
.L7:
li 3,0
blr
which is about as good as it can get (well, it didn't realise you
only need to test one of c1, c2 for zero. Did I say this was just
proof-of-concept code?)
Segher
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] add strncmp to PowerPC
2008-03-01 3:04 ` Benjamin Herrenschmidt
2008-03-01 3:56 ` Steven Rostedt
@ 2008-03-05 4:03 ` Paul Mackerras
2008-03-05 5:26 ` Segher Boessenkool
1 sibling, 1 reply; 10+ messages in thread
From: Paul Mackerras @ 2008-03-05 4:03 UTC (permalink / raw)
To: benh; +Cc: Steven Rostedt, linuxppc-dev, LKML
Benjamin Herrenschmidt writes:
> Do we have any indication that it performs better than the C one ?
I would expect it to, given that the assembler one has two branches in
the per-byte loop compared to 3 in the C version.
Paul.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] add strncmp to PowerPC
2008-03-05 4:03 ` Paul Mackerras
@ 2008-03-05 5:26 ` Segher Boessenkool
2008-03-05 5:39 ` Paul Mackerras
0 siblings, 1 reply; 10+ messages in thread
From: Segher Boessenkool @ 2008-03-05 5:26 UTC (permalink / raw)
To: Paul Mackerras; +Cc: benh, LKML, linuxppc-dev, Steven Rostedt
>> Do we have any indication that it performs better than the C one ?
>
> I would expect it to, given that the assembler one has two branches in
> the per-byte loop compared to 3 in the C version.
But really, does it matter for strncmp() in the kernel?
Anyway, this asm code has bugs, as do both the current C version in the
kernel, and the code I posted. We need to do better :-)
Segher
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] add strncmp to PowerPC
2008-03-05 5:26 ` Segher Boessenkool
@ 2008-03-05 5:39 ` Paul Mackerras
2008-03-05 7:01 ` Segher Boessenkool
0 siblings, 1 reply; 10+ messages in thread
From: Paul Mackerras @ 2008-03-05 5:39 UTC (permalink / raw)
To: Segher Boessenkool; +Cc: benh, LKML, linuxppc-dev, Steven Rostedt
Segher Boessenkool writes:
> Anyway, this asm code has bugs, as do both the current C version in the
> kernel, and the code I posted. We need to do better :-)
The only bug I know of in the asm code is the behaviour when the count
is zero. Do you know of any other?
Paul.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] add strncmp to PowerPC
2008-03-05 5:39 ` Paul Mackerras
@ 2008-03-05 7:01 ` Segher Boessenkool
0 siblings, 0 replies; 10+ messages in thread
From: Segher Boessenkool @ 2008-03-05 7:01 UTC (permalink / raw)
To: Paul Mackerras; +Cc: benh, LKML, linuxppc-dev, Steven Rostedt
>> Anyway, this asm code has bugs, as do both the current C version in
>> the
>> kernel, and the code I posted. We need to do better :-)
>
> The only bug I know of in the asm code is the behaviour when the count
> is zero. Do you know of any other?
No, that's the bug I meant. Sorry for using such inexact language.
Segher
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2008-03-05 7:01 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-02-29 16:04 [PATCH] add strncmp to PowerPC Steven Rostedt
2008-03-01 3:04 ` Benjamin Herrenschmidt
2008-03-01 3:56 ` Steven Rostedt
2008-03-03 9:54 ` Gabriel Paubert
2008-03-03 10:10 ` Andreas Schwab
2008-03-03 19:08 ` Segher Boessenkool
2008-03-05 4:03 ` Paul Mackerras
2008-03-05 5:26 ` Segher Boessenkool
2008-03-05 5:39 ` Paul Mackerras
2008-03-05 7:01 ` Segher Boessenkool
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).