LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH -mm crypto] AES: remove crypto_fl_tab and replace crypto_il_tab with isb_tab
@ 2008-03-11  4:42 Huang, Ying
  2008-03-11  6:47 ` Huang, Ying
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Huang, Ying @ 2008-03-11  4:42 UTC (permalink / raw)
  To: Herbert Xu, Adam J. Richter, Alexander Kjeldaas
  Cc: linux-kernel, linux-crypto

Remove crypto_fl_tab from aes implementation. Because mix_col(1,n) = n,
all information in cryto_fl_tab is in crypto_ft_tab too.
crypto_il_tab is replaced by isb_tab, the byte shift is done
during decryption.

These changes reduce the encryption cache footprint to 50% and
decryption cache footprint to 53.1%. The code size is increased
slightly. On my Intel CORE micro-architecture CPU, there is almost no
performance penalty.

This patch has been built and tested against 2.6.25-rc3-mm1.

This patch is only for generic C implementation. If this kind of idea
is desired, I will implement the ASM version for x86.

Signed-off-by: Huang Ying <ying.huang@intel.com>

---
 crypto/aes_generic.c |   47 ++++++++++++++++-------------------------------
 include/crypto/aes.h |    2 --
 2 files changed, 16 insertions(+), 33 deletions(-)

--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -63,18 +63,14 @@ static inline u8 byte(const u32 x, const
 static u8 pow_tab[256] __initdata;
 static u8 log_tab[256] __initdata;
 static u8 sbx_tab[256] __initdata;
-static u8 isb_tab[256] __initdata;
+static u8 isb_tab[256];
 static u32 rco_tab[10];
 
 u32 crypto_ft_tab[4][256];
-u32 crypto_fl_tab[4][256];
 u32 crypto_it_tab[4][256];
-u32 crypto_il_tab[4][256];
 
 EXPORT_SYMBOL_GPL(crypto_ft_tab);
-EXPORT_SYMBOL_GPL(crypto_fl_tab);
 EXPORT_SYMBOL_GPL(crypto_it_tab);
-EXPORT_SYMBOL_GPL(crypto_il_tab);
 
 static inline u8 __init f_mult(u8 a, u8 b)
 {
@@ -122,12 +118,6 @@ static void __init gen_tabs(void)
 	for (i = 0; i < 256; ++i) {
 		p = sbx_tab[i];
 
-		t = p;
-		crypto_fl_tab[0][i] = t;
-		crypto_fl_tab[1][i] = rol32(t, 8);
-		crypto_fl_tab[2][i] = rol32(t, 16);
-		crypto_fl_tab[3][i] = rol32(t, 24);
-
 		t = ((u32) ff_mult(2, p)) |
 		    ((u32) p << 8) |
 		    ((u32) p << 16) | ((u32) ff_mult(3, p) << 24);
@@ -139,12 +129,6 @@ static void __init gen_tabs(void)
 
 		p = isb_tab[i];
 
-		t = p;
-		crypto_il_tab[0][i] = t;
-		crypto_il_tab[1][i] = rol32(t, 8);
-		crypto_il_tab[2][i] = rol32(t, 16);
-		crypto_il_tab[3][i] = rol32(t, 24);
-
 		t = ((u32) ff_mult(14, p)) |
 		    ((u32) ff_mult(9, p) << 8) |
 		    ((u32) ff_mult(13, p) << 16) |
@@ -173,10 +157,10 @@ static void __init gen_tabs(void)
 } while (0)
 
 #define ls_box(x)		\
-	crypto_fl_tab[0][byte(x, 0)] ^	\
-	crypto_fl_tab[1][byte(x, 1)] ^	\
-	crypto_fl_tab[2][byte(x, 2)] ^	\
-	crypto_fl_tab[3][byte(x, 3)]
+	(crypto_ft_tab[2][byte(x, 0)] & 0x000000ff) ^	\
+	(crypto_ft_tab[3][byte(x, 1)] & 0x0000ff00) ^	\
+	(crypto_ft_tab[0][byte(x, 2)] & 0x00ff0000) ^	\
+	(crypto_ft_tab[1][byte(x, 3)] & 0xff000000)
 
 #define loop4(i)	do {		\
 	t = ror32(t, 8);		\
@@ -303,11 +287,12 @@ EXPORT_SYMBOL_GPL(crypto_aes_set_key);
 	k += 4;			\
 } while (0)
 
-#define f_rl(bo, bi, n, k)	do {				\
-	bo[n] = crypto_fl_tab[0][byte(bi[n], 0)] ^			\
-		crypto_fl_tab[1][byte(bi[(n + 1) & 3], 1)] ^		\
-		crypto_fl_tab[2][byte(bi[(n + 2) & 3], 2)] ^		\
-		crypto_fl_tab[3][byte(bi[(n + 3) & 3], 3)] ^ *(k + n);	\
+#define f_rl(bo, bi, n, k)	do {					    \
+	bo[n] = (crypto_ft_tab[2][byte(bi[n], 0)] & 0x000000ff) ^ 	    \
+		(crypto_ft_tab[3][byte(bi[(n + 1) & 3], 1)] & 0x0000ff00) ^ \
+		(crypto_ft_tab[0][byte(bi[(n + 2) & 3], 2)] & 0x00ff0000) ^ \
+		(crypto_ft_tab[1][byte(bi[(n + 3) & 3], 3)] & 0xff000000) ^ \
+		*(k + n);						    \
 } while (0)
 
 #define f_lround(bo, bi, k)	do {\
@@ -375,11 +360,11 @@ static void aes_encrypt(struct crypto_tf
 	k += 4;			\
 } while (0)
 
-#define i_rl(bo, bi, n, k)	do {			\
-	bo[n] = crypto_il_tab[0][byte(bi[n], 0)] ^		\
-	crypto_il_tab[1][byte(bi[(n + 3) & 3], 1)] ^		\
-	crypto_il_tab[2][byte(bi[(n + 2) & 3], 2)] ^		\
-	crypto_il_tab[3][byte(bi[(n + 1) & 3], 3)] ^ *(k + n);	\
+#define i_rl(bo, bi, n, k)	do {					\
+	bo[n] = (u32)isb_tab[byte(bi[n], 0)] ^				\
+	((u32)isb_tab[byte(bi[(n + 3) & 3], 1)] << 8) ^			\
+	((u32)isb_tab[byte(bi[(n + 2) & 3], 2)] << 16) ^		\
+	((u32)isb_tab[byte(bi[(n + 1) & 3], 3)] << 24) ^ *(k + n);	\
 } while (0)
 
 #define i_lround(bo, bi, k)	do {\
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -22,9 +22,7 @@ struct crypto_aes_ctx {
 };
 
 extern u32 crypto_ft_tab[4][256];
-extern u32 crypto_fl_tab[4][256];
 extern u32 crypto_it_tab[4][256];
-extern u32 crypto_il_tab[4][256];
 
 int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		unsigned int key_len);


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH -mm crypto] AES: remove crypto_fl_tab and replace crypto_il_tab with isb_tab
  2008-03-11  4:42 [PATCH -mm crypto] AES: remove crypto_fl_tab and replace crypto_il_tab with isb_tab Huang, Ying
@ 2008-03-11  6:47 ` Huang, Ying
  2008-03-12  9:22 ` Sebastian Siewior
  2008-03-25 23:33 ` Sebastian Siewior
  2 siblings, 0 replies; 5+ messages in thread
From: Huang, Ying @ 2008-03-11  6:47 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Adam J. Richter, Alexander Kjeldaas, linux-kernel, linux-crypto

On Tue, 2008-03-11 at 12:42 +0800, Huang, Ying wrote:
> Remove crypto_fl_tab from aes implementation. Because mix_col(1,n) = n,
> all information in cryto_fl_tab is in crypto_ft_tab too.
> crypto_il_tab is replaced by isb_tab, the byte shift is done
> during decryption.
> 
> These changes reduce the encryption cache footprint to 50% and
> decryption cache footprint to 53.1%. The code size is increased
> slightly. On my Intel CORE micro-architecture CPU, there is almost no
> performance penalty.

Similar optimization is used by OpenSSL too. So I think this is
reasonable.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH -mm crypto] AES: remove crypto_fl_tab and replace crypto_il_tab with isb_tab
  2008-03-11  4:42 [PATCH -mm crypto] AES: remove crypto_fl_tab and replace crypto_il_tab with isb_tab Huang, Ying
  2008-03-11  6:47 ` Huang, Ying
@ 2008-03-12  9:22 ` Sebastian Siewior
  2008-03-14  9:30   ` Huang, Ying
  2008-03-25 23:33 ` Sebastian Siewior
  2 siblings, 1 reply; 5+ messages in thread
From: Sebastian Siewior @ 2008-03-12  9:22 UTC (permalink / raw)
  To: Huang, Ying
  Cc: Herbert Xu, Adam J. Richter, Alexander Kjeldaas, linux-kernel,
	linux-crypto

* Huang, Ying | 2008-03-11 12:42:56 [+0800]:

>Remove crypto_fl_tab from aes implementation. Because mix_col(1,n) = n,
>all information in cryto_fl_tab is in crypto_ft_tab too.
>crypto_il_tab is replaced by isb_tab, the byte shift is done
>during decryption.
nice.

>This patch is only for generic C implementation. If this kind of idea
>is desired, I will implement the ASM version for x86.
Could you please run 
| modprobe tcrypt mode=200
with and without the patch?

Sebastian

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH -mm crypto] AES: remove crypto_fl_tab and replace crypto_il_tab with isb_tab
  2008-03-12  9:22 ` Sebastian Siewior
@ 2008-03-14  9:30   ` Huang, Ying
  0 siblings, 0 replies; 5+ messages in thread
From: Huang, Ying @ 2008-03-14  9:30 UTC (permalink / raw)
  To: Sebastian Siewior
  Cc: Herbert Xu, Adam J. Richter, Alexander Kjeldaas, linux-kernel,
	linux-crypto

[-- Attachment #1: Type: text/plain, Size: 616 bytes --]

On Wed, 2008-03-12 at 10:22 +0100, Sebastian Siewior wrote:
> * Huang, Ying | 2008-03-11 12:42:56 [+0800]:
> 
> >Remove crypto_fl_tab from aes implementation. Because mix_col(1,n) = n,
> >all information in cryto_fl_tab is in crypto_ft_tab too.
> >crypto_il_tab is replaced by isb_tab, the byte shift is done
> >during decryption.
> nice.
> 
> >This patch is only for generic C implementation. If this kind of idea
> >is desired, I will implement the ASM version for x86.
> Could you please run 
> | modprobe tcrypt mode=200
> with and without the patch?

The files attached is the result.

Best Regards,
Huang Ying

[-- Attachment #2: tcrypt.without_patch --]
[-- Type: text/plain, Size: 9888 bytes --]


testing speed of ecb(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 599 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1191 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 3909 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 14758 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 116484 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 546 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1349 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4550 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 17349 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 138983 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 599 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1515 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5221 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 20040 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 158428 cycles (8192 bytes)

testing speed of ecb(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 627 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1175 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 3854 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 14590 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 115228 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 544 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1334 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4496 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 17144 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 137100 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 589 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1496 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5144 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 19735 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 156450 cycles (8192 bytes)

testing speed of cbc(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 654 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1381 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 4309 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 16019 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 126050 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 689 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1535 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4945 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 18602 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 147854 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 724 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1696 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5604 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 21218 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 167954 cycles (8192 bytes)

testing speed of cbc(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 814 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1466 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 4378 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 16025 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 126127 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 769 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1616 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 5012 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 18579 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 147793 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 812 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1783 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5654 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 21182 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 167341 cycles (8192 bytes)

testing speed of lrw(aes) encryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 802 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1515 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4550 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16583 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 130258 cycles (8192 bytes)
test 5 (320 bit key, 16 byte blocks): 1 operation in 780 cycles (16 bytes)
test 6 (320 bit key, 64 byte blocks): 1 operation in 1678 cycles (64 bytes)
test 7 (320 bit key, 256 byte blocks): 1 operation in 5194 cycles (256 bytes)
test 8 (320 bit key, 1024 byte blocks): 1 operation in 19213 cycles (1024 bytes)
test 9 (320 bit key, 8192 byte blocks): 1 operation in 152362 cycles (8192 bytes)
test 10 (384 bit key, 16 byte blocks): 1 operation in 812 cycles (16 bytes)
test 11 (384 bit key, 64 byte blocks): 1 operation in 1843 cycles (64 bytes)
test 12 (384 bit key, 256 byte blocks): 1 operation in 5860 cycles (256 bytes)
test 13 (384 bit key, 1024 byte blocks): 1 operation in 21879 cycles (1024 bytes)
test 14 (384 bit key, 8192 byte blocks): 1 operation in 172419 cycles (8192 bytes)

testing speed of lrw(aes) decryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 799 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1495 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4482 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16413 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 128887 cycles (8192 bytes)
test 5 (320 bit key, 16 byte blocks): 1 operation in 771 cycles (16 bytes)
test 6 (320 bit key, 64 byte blocks): 1 operation in 1658 cycles (64 bytes)
test 7 (320 bit key, 256 byte blocks): 1 operation in 5136 cycles (256 bytes)
test 8 (320 bit key, 1024 byte blocks): 1 operation in 19011 cycles (1024 bytes)
test 9 (320 bit key, 8192 byte blocks): 1 operation in 150736 cycles (8192 bytes)
test 10 (384 bit key, 16 byte blocks): 1 operation in 814 cycles (16 bytes)
test 11 (384 bit key, 64 byte blocks): 1 operation in 1821 cycles (64 bytes)
test 12 (384 bit key, 256 byte blocks): 1 operation in 5784 cycles (256 bytes)
test 13 (384 bit key, 1024 byte blocks): 1 operation in 21629 cycles (1024 bytes)
test 14 (384 bit key, 8192 byte blocks): 1 operation in 170319 cycles (8192 bytes)

testing speed of xts(aes) encryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 833 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1509 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4458 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16232 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 126195 cycles (8192 bytes)
test 5 (384 bit key, 16 byte blocks): 1 operation in 852 cycles (16 bytes)
test 6 (384 bit key, 64 byte blocks): 1 operation in 1708 cycles (64 bytes)
test 7 (384 bit key, 256 byte blocks): 1 operation in 5135 cycles (256 bytes)
test 8 (384 bit key, 1024 byte blocks): 1 operation in 18802 cycles (1024 bytes)
test 9 (384 bit key, 8192 byte blocks): 1 operation in 147053 cycles (8192 bytes)
test 10 (512 bit key, 16 byte blocks): 1 operation in 938 cycles (16 bytes)
test 11 (512 bit key, 64 byte blocks): 1 operation in 1924 cycles (64 bytes)
test 12 (512 bit key, 256 byte blocks): 1 operation in 5850 cycles (256 bytes)
test 13 (512 bit key, 1024 byte blocks): 1 operation in 21524 cycles (1024 bytes)
test 14 (512 bit key, 8192 byte blocks): 1 operation in 168333 cycles (8192 bytes)

testing speed of xts(aes) decryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 855 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1490 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4379 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 15931 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 124614 cycles (8192 bytes)
test 5 (384 bit key, 16 byte blocks): 1 operation in 847 cycles (16 bytes)
test 6 (384 bit key, 64 byte blocks): 1 operation in 1684 cycles (64 bytes)
test 7 (384 bit key, 256 byte blocks): 1 operation in 5062 cycles (256 bytes)
test 8 (384 bit key, 1024 byte blocks): 1 operation in 18574 cycles (1024 bytes)
test 9 (384 bit key, 8192 byte blocks): 1 operation in 145296 cycles (8192 bytes)
test 10 (512 bit key, 16 byte blocks): 1 operation in 932 cycles (16 bytes)
test 11 (512 bit key, 64 byte blocks): 1 operation in 1892 cycles (64 bytes)
test 12 (512 bit key, 256 byte blocks): 1 operation in 5756 cycles (256 bytes)
test 13 (512 bit key, 1024 byte blocks): 1 operation in 21227 cycles (1024 bytes)
test 14 (512 bit key, 8192 byte blocks): 1 operation in 166316 cycles (8192 bytes)

[-- Attachment #3: tcrypt.with_patch --]
[-- Type: text/plain, Size: 9888 bytes --]


testing speed of ecb(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 528 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1213 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 3984 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 15060 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 118884 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 558 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1362 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4618 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 17634 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 141172 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 601 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1535 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5263 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 20236 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 160585 cycles (8192 bytes)

testing speed of ecb(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 521 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1191 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 3895 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 14735 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 116355 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 554 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1352 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4542 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 17295 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 138497 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 594 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1507 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5178 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 19855 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 157464 cycles (8192 bytes)

testing speed of cbc(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 657 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1396 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 4406 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 16393 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 129278 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 691 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1641 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 5187 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 19155 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 151310 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 736 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1794 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5819 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 21769 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 170831 cycles (8192 bytes)

testing speed of cbc(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 738 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1475 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 4421 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 16220 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 127525 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 771 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1626 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 5045 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 18753 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 149204 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 809 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1785 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5691 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 21275 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 168499 cycles (8192 bytes)

testing speed of lrw(aes) encryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 745 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1547 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4614 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16885 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 132411 cycles (8192 bytes)
test 5 (320 bit key, 16 byte blocks): 1 operation in 778 cycles (16 bytes)
test 6 (320 bit key, 64 byte blocks): 1 operation in 1689 cycles (64 bytes)
test 7 (320 bit key, 256 byte blocks): 1 operation in 5260 cycles (256 bytes)
test 8 (320 bit key, 1024 byte blocks): 1 operation in 19462 cycles (1024 bytes)
test 9 (320 bit key, 8192 byte blocks): 1 operation in 154419 cycles (8192 bytes)
test 10 (384 bit key, 16 byte blocks): 1 operation in 816 cycles (16 bytes)
test 11 (384 bit key, 64 byte blocks): 1 operation in 1856 cycles (64 bytes)
test 12 (384 bit key, 256 byte blocks): 1 operation in 5916 cycles (256 bytes)
test 13 (384 bit key, 1024 byte blocks): 1 operation in 22159 cycles (1024 bytes)
test 14 (384 bit key, 8192 byte blocks): 1 operation in 174627 cycles (8192 bytes)

testing speed of lrw(aes) decryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 740 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1502 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4516 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16549 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 129912 cycles (8192 bytes)
test 5 (320 bit key, 16 byte blocks): 1 operation in 774 cycles (16 bytes)
test 6 (320 bit key, 64 byte blocks): 1 operation in 1659 cycles (64 bytes)
test 7 (320 bit key, 256 byte blocks): 1 operation in 5175 cycles (256 bytes)
test 8 (320 bit key, 1024 byte blocks): 1 operation in 19139 cycles (1024 bytes)
test 9 (320 bit key, 8192 byte blocks): 1 operation in 151969 cycles (8192 bytes)
test 10 (384 bit key, 16 byte blocks): 1 operation in 809 cycles (16 bytes)
test 11 (384 bit key, 64 byte blocks): 1 operation in 1821 cycles (64 bytes)
test 12 (384 bit key, 256 byte blocks): 1 operation in 5817 cycles (256 bytes)
test 13 (384 bit key, 1024 byte blocks): 1 operation in 21775 cycles (1024 bytes)
test 14 (384 bit key, 8192 byte blocks): 1 operation in 171423 cycles (8192 bytes)

testing speed of xts(aes) encryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 788 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1537 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4526 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16521 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 128545 cycles (8192 bytes)
test 5 (384 bit key, 16 byte blocks): 1 operation in 866 cycles (16 bytes)
test 6 (384 bit key, 64 byte blocks): 1 operation in 1740 cycles (64 bytes)
test 7 (384 bit key, 256 byte blocks): 1 operation in 5183 cycles (256 bytes)
test 8 (384 bit key, 1024 byte blocks): 1 operation in 19048 cycles (1024 bytes)
test 9 (384 bit key, 8192 byte blocks): 1 operation in 149163 cycles (8192 bytes)
test 10 (512 bit key, 16 byte blocks): 1 operation in 945 cycles (16 bytes)
test 11 (512 bit key, 64 byte blocks): 1 operation in 1931 cycles (64 bytes)
test 12 (512 bit key, 256 byte blocks): 1 operation in 5903 cycles (256 bytes)
test 13 (512 bit key, 1024 byte blocks): 1 operation in 21744 cycles (1024 bytes)
test 14 (512 bit key, 8192 byte blocks): 1 operation in 170528 cycles (8192 bytes)

testing speed of xts(aes) decryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 783 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1507 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4436 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16192 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 125962 cycles (8192 bytes)
test 5 (384 bit key, 16 byte blocks): 1 operation in 853 cycles (16 bytes)
test 6 (384 bit key, 64 byte blocks): 1 operation in 1701 cycles (64 bytes)
test 7 (384 bit key, 256 byte blocks): 1 operation in 5120 cycles (256 bytes)
test 8 (384 bit key, 1024 byte blocks): 1 operation in 18779 cycles (1024 bytes)
test 9 (384 bit key, 8192 byte blocks): 1 operation in 146792 cycles (8192 bytes)
test 10 (512 bit key, 16 byte blocks): 1 operation in 937 cycles (16 bytes)
test 11 (512 bit key, 64 byte blocks): 1 operation in 1906 cycles (64 bytes)
test 12 (512 bit key, 256 byte blocks): 1 operation in 5804 cycles (256 bytes)
test 13 (512 bit key, 1024 byte blocks): 1 operation in 21413 cycles (1024 bytes)
test 14 (512 bit key, 8192 byte blocks): 1 operation in 167733 cycles (8192 bytes)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH -mm crypto] AES: remove crypto_fl_tab and replace crypto_il_tab with isb_tab
  2008-03-11  4:42 [PATCH -mm crypto] AES: remove crypto_fl_tab and replace crypto_il_tab with isb_tab Huang, Ying
  2008-03-11  6:47 ` Huang, Ying
  2008-03-12  9:22 ` Sebastian Siewior
@ 2008-03-25 23:33 ` Sebastian Siewior
  2 siblings, 0 replies; 5+ messages in thread
From: Sebastian Siewior @ 2008-03-25 23:33 UTC (permalink / raw)
  To: Huang, Ying
  Cc: Herbert Xu, Adam J. Richter, Alexander Kjeldaas, linux-kernel,
	linux-crypto

* Huang, Ying | 2008-03-11 12:42:56 [+0800]:

>These changes reduce the encryption cache footprint to 50% and
>decryption cache footprint to 53.1%. The code size is increased
>slightly. On my Intel CORE micro-architecture CPU, there is almost no
>performance penalty.
According to the tcrypt numbers you've posted it is getting "slightly"
slower on encryption/decryption of 64+ bytes. How did you measure your
"almost no performance penalty"?

 Sebastian

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-03-25 23:34 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-03-11  4:42 [PATCH -mm crypto] AES: remove crypto_fl_tab and replace crypto_il_tab with isb_tab Huang, Ying
2008-03-11  6:47 ` Huang, Ying
2008-03-12  9:22 ` Sebastian Siewior
2008-03-14  9:30   ` Huang, Ying
2008-03-25 23:33 ` Sebastian Siewior

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).