summaryrefslogtreecommitdiff
path: root/arch/sparc
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-12-19 15:19:11 -0800
committerDavid S. Miller <davem@davemloft.net>2012-12-19 15:19:11 -0800
commit9f28ffc03e93343ac04874fda9edb7affea45165 (patch)
tree5d65df3dacd78570469a5c1183359595015fdd9b /arch/sparc
parent4a9d1946b0135b15d901d7e7c9796d36f352aaea (diff)
downloadlwn-9f28ffc03e93343ac04874fda9edb7affea45165.tar.gz
lwn-9f28ffc03e93343ac04874fda9edb7affea45165.zip
sparc64: Fix unrolled AES 256-bit key loops.
The basic scheme of the block mode assembler is that we start by enabling the FPU, loading the key into the floating point registers, then iterate calling the encrypt/decrypt routine for each block. For the 256-bit key cases, we run short on registers in the unrolled loops. So the {ENCRYPT,DECRYPT}_256_2() macros reload the key registers that get clobbered. The unrolled macros, {ENCRYPT,DECRYPT}_256(), are not mindful of this. So if we have a mix of multi-block and single-block calls, the single-block unrolled 256-bit encrypt/decrypt can run with some of the key registers clobbered. Handle this by always explicitly loading those registers before using the non-unrolled 256-bit macro. This was discovered thanks to all of the new test cases added by Jussi Kivilinna. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc')
-rw-r--r--arch/sparc/crypto/aes_asm.S20
1 files changed, 14 insertions, 6 deletions
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S
index 23f6cbb910d3..1cda8aa7cb85 100644
--- a/arch/sparc/crypto/aes_asm.S
+++ b/arch/sparc/crypto/aes_asm.S
@@ -1024,7 +1024,11 @@ ENTRY(aes_sparc64_ecb_encrypt_256)
add %o2, 0x20, %o2
brlz,pt %o3, 11f
nop
-10: ldx [%o1 + 0x00], %g3
+10: ldd [%o0 + 0xd0], %f56
+ ldd [%o0 + 0xd8], %f58
+ ldd [%o0 + 0xe0], %f60
+ ldd [%o0 + 0xe8], %f62
+ ldx [%o1 + 0x00], %g3
ldx [%o1 + 0x08], %g7
xor %g1, %g3, %g3
xor %g2, %g7, %g7
@@ -1128,9 +1132,9 @@ ENTRY(aes_sparc64_ecb_decrypt_256)
/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
ldx [%o0 - 0x10], %g1
subcc %o3, 0x10, %o3
+ ldx [%o0 - 0x08], %g2
be 10f
- ldx [%o0 - 0x08], %g2
- sub %o0, 0xf0, %o0
+ sub %o0, 0xf0, %o0
1: ldx [%o1 + 0x00], %g3
ldx [%o1 + 0x08], %g7
ldx [%o1 + 0x10], %o4
@@ -1154,7 +1158,11 @@ ENTRY(aes_sparc64_ecb_decrypt_256)
add %o2, 0x20, %o2
brlz,pt %o3, 11f
nop
-10: ldx [%o1 + 0x00], %g3
+10: ldd [%o0 + 0x18], %f56
+ ldd [%o0 + 0x10], %f58
+ ldd [%o0 + 0x08], %f60
+ ldd [%o0 + 0x00], %f62
+ ldx [%o1 + 0x00], %g3
ldx [%o1 + 0x08], %g7
xor %g1, %g3, %g3
xor %g2, %g7, %g7
@@ -1511,11 +1519,11 @@ ENTRY(aes_sparc64_ctr_crypt_256)
add %o2, 0x20, %o2
brlz,pt %o3, 11f
nop
- ldd [%o0 + 0xd0], %f56
+10: ldd [%o0 + 0xd0], %f56
ldd [%o0 + 0xd8], %f58
ldd [%o0 + 0xe0], %f60
ldd [%o0 + 0xe8], %f62
-10: xor %g1, %g3, %o5
+ xor %g1, %g3, %o5
MOVXTOD_O5_F0
xor %g2, %g7, %o5
MOVXTOD_O5_F2