From f421258d5bf883b68b1cdaa299a8a1da3eb92e0f Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 21 Dec 2014 22:53:58 +0200 Subject: MIPS: OCTEON: add crypto helper functions Add crypto helper functions which are needed for kernel level usage. The code for these has been extracted from the EdgeRouter Pro GPL tarball. While at it, also delete duplicate definitions of the functions. Signed-off-by: Aaro Koskinen Signed-off-by: Herbert Xu --- arch/mips/cavium-octeon/Makefile | 1 + arch/mips/cavium-octeon/crypto/Makefile | 5 ++ arch/mips/cavium-octeon/crypto/octeon-crypto.c | 66 ++++++++++++++++++++++++++ arch/mips/cavium-octeon/crypto/octeon-crypto.h | 17 +++++++ arch/mips/include/asm/octeon/octeon.h | 5 -- 5 files changed, 89 insertions(+), 5 deletions(-) create mode 100644 arch/mips/cavium-octeon/crypto/Makefile create mode 100644 arch/mips/cavium-octeon/crypto/octeon-crypto.c create mode 100644 arch/mips/cavium-octeon/crypto/octeon-crypto.h (limited to 'arch') diff --git a/arch/mips/cavium-octeon/Makefile b/arch/mips/cavium-octeon/Makefile index 42f5f1a4b40a..69a8a8dabc2b 100644 --- a/arch/mips/cavium-octeon/Makefile +++ b/arch/mips/cavium-octeon/Makefile @@ -16,6 +16,7 @@ obj-y := cpu.o setup.o octeon-platform.o octeon-irq.o csrc-octeon.o obj-y += dma-octeon.o obj-y += octeon-memcpy.o obj-y += executive/ +obj-y += crypto/ obj-$(CONFIG_MTD) += flash_setup.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile new file mode 100644 index 000000000000..739b80366c25 --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/Makefile @@ -0,0 +1,5 @@ +# +# OCTEON-specific crypto modules. +# + +obj-y += octeon-crypto.o diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c new file mode 100644 index 000000000000..7c82ff463b65 --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c @@ -0,0 +1,66 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004-2012 Cavium Networks + */ + +#include +#include +#include + +#include "octeon-crypto.h" + +/** + * Enable access to Octeon's COP2 crypto hardware for kernel use. Wrap any + * crypto operations in calls to octeon_crypto_enable/disable in order to make + * sure the state of COP2 isn't corrupted if userspace is also performing + * hardware crypto operations. Allocate the state parameter on the stack. + * Preemption must be disabled to prevent context switches. + * + * @state: Pointer to state structure to store current COP2 state in. + * + * Returns: Flags to be passed to octeon_crypto_disable() + */ +unsigned long octeon_crypto_enable(struct octeon_cop2_state *state) +{ + int status; + unsigned long flags; + + local_irq_save(flags); + status = read_c0_status(); + write_c0_status(status | ST0_CU2); + if (KSTK_STATUS(current) & ST0_CU2) { + octeon_cop2_save(&(current->thread.cp2)); + KSTK_STATUS(current) &= ~ST0_CU2; + status &= ~ST0_CU2; + } else if (status & ST0_CU2) { + octeon_cop2_save(state); + } + local_irq_restore(flags); + return status & ST0_CU2; +} +EXPORT_SYMBOL_GPL(octeon_crypto_enable); + +/** + * Disable access to Octeon's COP2 crypto hardware in the kernel. This must be + * called after an octeon_crypto_enable() before any context switch or return to + * userspace. + * + * @state: Pointer to COP2 state to restore + * @flags: Return value from octeon_crypto_enable() + */ +void octeon_crypto_disable(struct octeon_cop2_state *state, + unsigned long crypto_flags) +{ + unsigned long flags; + + local_irq_save(flags); + if (crypto_flags & ST0_CU2) + octeon_cop2_restore(state); + else + write_c0_status(read_c0_status() & ~ST0_CU2); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(octeon_crypto_disable); diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h new file mode 100644 index 000000000000..5ca86d4ead52 --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h @@ -0,0 +1,17 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved. + */ +#ifndef __LINUX_OCTEON_CRYPTO_H +#define __LINUX_OCTEON_CRYPTO_H + +#include + +extern unsigned long octeon_crypto_enable(struct octeon_cop2_state *state); +extern void octeon_crypto_disable(struct octeon_cop2_state *state, + unsigned long flags); + +#endif /* __LINUX_OCTEON_CRYPTO_H */ diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h index d781f9e66884..6dfefd2d5cdf 100644 --- a/arch/mips/include/asm/octeon/octeon.h +++ b/arch/mips/include/asm/octeon/octeon.h @@ -44,11 +44,6 @@ extern int octeon_get_boot_num_arguments(void); extern const char *octeon_get_boot_argument(int arg); extern void octeon_hal_setup_reserved32(void); extern void octeon_user_io_init(void); -struct octeon_cop2_state; -extern unsigned long octeon_crypto_enable(struct octeon_cop2_state *state); -extern void octeon_crypto_disable(struct octeon_cop2_state *state, - unsigned long flags); -extern asmlinkage void octeon_cop2_restore(struct octeon_cop2_state *task); extern void octeon_init_cvmcount(void); extern void octeon_setup_delays(void); -- cgit v1.2.3 From 1e585ef51c3fdcf296d2ce5cb8c9e74b1a36cfb4 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 21 Dec 2014 22:53:59 +0200 Subject: crypto: octeon - add instruction definitions for MD5 Add instruction definitions for MD5. Based on information extracted from EdgeRouter Pro GPL source tarball. Signed-off-by: Aaro Koskinen Signed-off-by: Herbert Xu --- arch/mips/cavium-octeon/crypto/octeon-crypto.h | 56 ++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'arch') diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h index 5ca86d4ead52..3f65bc6ccb83 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h @@ -4,14 +4,70 @@ * for more details. * * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved. + * + * MD5 instruction definitions added by Aaro Koskinen . + * */ #ifndef __LINUX_OCTEON_CRYPTO_H #define __LINUX_OCTEON_CRYPTO_H #include +#include extern unsigned long octeon_crypto_enable(struct octeon_cop2_state *state); extern void octeon_crypto_disable(struct octeon_cop2_state *state, unsigned long flags); +/* + * Macros needed to implement MD5: + */ + +/* + * The index can be 0-1. + */ +#define write_octeon_64bit_hash_dword(value, index) \ +do { \ + __asm__ __volatile__ ( \ + "dmtc2 %[rt],0x0048+" STR(index) \ + : \ + : [rt] "d" (value)); \ +} while (0) + +/* + * The index can be 0-1. + */ +#define read_octeon_64bit_hash_dword(index) \ +({ \ + u64 __value; \ + \ + __asm__ __volatile__ ( \ + "dmfc2 %[rt],0x0048+" STR(index) \ + : [rt] "=d" (__value) \ + : ); \ + \ + __value; \ +}) + +/* + * The index can be 0-6. + */ +#define write_octeon_64bit_block_dword(value, index) \ +do { \ + __asm__ __volatile__ ( \ + "dmtc2 %[rt],0x0040+" STR(index) \ + : \ + : [rt] "d" (value)); \ +} while (0) + +/* + * The value is the final block dword (64-bit). + */ +#define octeon_md5_start(value) \ +do { \ + __asm__ __volatile__ ( \ + "dmtc2 %[rt],0x4047" \ + : \ + : [rt] "d" (value)); \ +} while (0) + #endif /* __LINUX_OCTEON_CRYPTO_H */ -- cgit v1.2.3 From 011f3c6cbb97859860e451f2a75767cd4c1ffc03 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 21 Dec 2014 22:54:00 +0200 Subject: MIPS: OCTEON: reintroduce crypto features check Reintroduce run-time check for crypto features. The old one was deleted because it was unreliable, now decide the crypto availability on early boot when the model string is constructed. Signed-off-by: Aaro Koskinen Signed-off-by: Herbert Xu --- arch/mips/cavium-octeon/executive/octeon-model.c | 6 ++++++ arch/mips/include/asm/octeon/octeon-feature.h | 17 +++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/cavium-octeon/executive/octeon-model.c b/arch/mips/cavium-octeon/executive/octeon-model.c index e15b049b3bd7..b2104bd9ab3b 100644 --- a/arch/mips/cavium-octeon/executive/octeon-model.c +++ b/arch/mips/cavium-octeon/executive/octeon-model.c @@ -27,6 +27,9 @@ #include +enum octeon_feature_bits __octeon_feature_bits __read_mostly; +EXPORT_SYMBOL_GPL(__octeon_feature_bits); + /** * Read a byte of fuse data * @byte_addr: address to read @@ -103,6 +106,9 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id, else suffix = "NSP"; + if (!fus_dat2.s.nocrypto) + __octeon_feature_bits |= OCTEON_HAS_CRYPTO; + /* * Assume pass number is encoded using <5:3><2:0>. Exceptions * will be fixed later. diff --git a/arch/mips/include/asm/octeon/octeon-feature.h b/arch/mips/include/asm/octeon/octeon-feature.h index c4fe81f47f53..8ebd3f579b84 100644 --- a/arch/mips/include/asm/octeon/octeon-feature.h +++ b/arch/mips/include/asm/octeon/octeon-feature.h @@ -46,8 +46,6 @@ enum octeon_feature { OCTEON_FEATURE_SAAD, /* Does this Octeon support the ZIP offload engine? */ OCTEON_FEATURE_ZIP, - /* Does this Octeon support crypto acceleration using COP2? */ - OCTEON_FEATURE_CRYPTO, OCTEON_FEATURE_DORM_CRYPTO, /* Does this Octeon support PCI express? */ OCTEON_FEATURE_PCIE, @@ -86,6 +84,21 @@ enum octeon_feature { OCTEON_MAX_FEATURE }; +enum octeon_feature_bits { + OCTEON_HAS_CRYPTO = 0x0001, /* Crypto acceleration using COP2 */ +}; +extern enum octeon_feature_bits __octeon_feature_bits; + +/** + * octeon_has_crypto() - Check if this OCTEON has crypto acceleration support. + * + * Returns: Non-zero if the feature exists. Zero if the feature does not exist. + */ +static inline int octeon_has_crypto(void) +{ + return __octeon_feature_bits & OCTEON_HAS_CRYPTO; +} + /** * Determine if the current Octeon supports a specific feature. These * checks have been optimized to be fairly quick, but they should still -- cgit v1.2.3 From 1953c22f53747035b28c36dbb337ac3c10902644 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 21 Dec 2014 22:54:01 +0200 Subject: crypto: octeon - add MD5 module Add OCTEON MD5 module. Signed-off-by: Aaro Koskinen Signed-off-by: Herbert Xu --- arch/mips/cavium-octeon/crypto/Makefile | 2 + arch/mips/cavium-octeon/crypto/octeon-crypto.h | 2 + arch/mips/cavium-octeon/crypto/octeon-md5.c | 216 +++++++++++++++++++++++++ 3 files changed, 220 insertions(+) create mode 100644 arch/mips/cavium-octeon/crypto/octeon-md5.c (limited to 'arch') diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile index 739b80366c25..a74f76d85a2f 100644 --- a/arch/mips/cavium-octeon/crypto/Makefile +++ b/arch/mips/cavium-octeon/crypto/Makefile @@ -3,3 +3,5 @@ # obj-y += octeon-crypto.o + +obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h index 3f65bc6ccb83..e2a4aece9c24 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h @@ -14,6 +14,8 @@ #include #include +#define OCTEON_CR_OPCODE_PRIORITY 300 + extern unsigned long octeon_crypto_enable(struct octeon_cop2_state *state); extern void octeon_crypto_disable(struct octeon_cop2_state *state, unsigned long flags); diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c new file mode 100644 index 000000000000..b909881ba6c1 --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c @@ -0,0 +1,216 @@ +/* + * Cryptographic API. + * + * MD5 Message Digest Algorithm (RFC1321). + * + * Adapted for OCTEON by Aaro Koskinen . + * + * Based on crypto/md5.c, which is: + * + * Derived from cryptoapi implementation, originally based on the + * public domain implementation written by Colin Plumb in 1993. + * + * Copyright (c) Cryptoapi developers. + * Copyright (c) 2002 James Morris + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "octeon-crypto.h" + +/* + * We pass everything as 64-bit. OCTEON can handle misaligned data. + */ + +static void octeon_md5_store_hash(struct md5_state *ctx) +{ + u64 *hash = (u64 *)ctx->hash; + + write_octeon_64bit_hash_dword(hash[0], 0); + write_octeon_64bit_hash_dword(hash[1], 1); +} + +static void octeon_md5_read_hash(struct md5_state *ctx) +{ + u64 *hash = (u64 *)ctx->hash; + + hash[0] = read_octeon_64bit_hash_dword(0); + hash[1] = read_octeon_64bit_hash_dword(1); +} + +static void octeon_md5_transform(const void *_block) +{ + const u64 *block = _block; + + write_octeon_64bit_block_dword(block[0], 0); + write_octeon_64bit_block_dword(block[1], 1); + write_octeon_64bit_block_dword(block[2], 2); + write_octeon_64bit_block_dword(block[3], 3); + write_octeon_64bit_block_dword(block[4], 4); + write_octeon_64bit_block_dword(block[5], 5); + write_octeon_64bit_block_dword(block[6], 6); + octeon_md5_start(block[7]); +} + +static int octeon_md5_init(struct shash_desc *desc) +{ + struct md5_state *mctx = shash_desc_ctx(desc); + + mctx->hash[0] = cpu_to_le32(0x67452301); + mctx->hash[1] = cpu_to_le32(0xefcdab89); + mctx->hash[2] = cpu_to_le32(0x98badcfe); + mctx->hash[3] = cpu_to_le32(0x10325476); + mctx->byte_count = 0; + + return 0; +} + +static int octeon_md5_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct md5_state *mctx = shash_desc_ctx(desc); + const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f); + struct octeon_cop2_state state; + unsigned long flags; + + mctx->byte_count += len; + + if (avail > len) { + memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), + data, len); + return 0; + } + + memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data, + avail); + + local_bh_disable(); + preempt_disable(); + flags = octeon_crypto_enable(&state); + octeon_md5_store_hash(mctx); + + octeon_md5_transform(mctx->block); + data += avail; + len -= avail; + + while (len >= sizeof(mctx->block)) { + octeon_md5_transform(data); + data += sizeof(mctx->block); + len -= sizeof(mctx->block); + } + + octeon_md5_read_hash(mctx); + octeon_crypto_disable(&state, flags); + preempt_enable(); + local_bh_enable(); + + memcpy(mctx->block, data, len); + + return 0; +} + +static int octeon_md5_final(struct shash_desc *desc, u8 *out) +{ + struct md5_state *mctx = shash_desc_ctx(desc); + const unsigned int offset = mctx->byte_count & 0x3f; + char *p = (char *)mctx->block + offset; + int padding = 56 - (offset + 1); + struct octeon_cop2_state state; + unsigned long flags; + + *p++ = 0x80; + + local_bh_disable(); + preempt_disable(); + flags = octeon_crypto_enable(&state); + octeon_md5_store_hash(mctx); + + if (padding < 0) { + memset(p, 0x00, padding + sizeof(u64)); + octeon_md5_transform(mctx->block); + p = (char *)mctx->block; + padding = 56; + } + + memset(p, 0, padding); + mctx->block[14] = cpu_to_le32(mctx->byte_count << 3); + mctx->block[15] = cpu_to_le32(mctx->byte_count >> 29); + octeon_md5_transform(mctx->block); + + octeon_md5_read_hash(mctx); + octeon_crypto_disable(&state, flags); + preempt_enable(); + local_bh_enable(); + + memcpy(out, mctx->hash, sizeof(mctx->hash)); + memset(mctx, 0, sizeof(*mctx)); + + return 0; +} + +static int octeon_md5_export(struct shash_desc *desc, void *out) +{ + struct md5_state *ctx = shash_desc_ctx(desc); + + memcpy(out, ctx, sizeof(*ctx)); + return 0; +} + +static int octeon_md5_import(struct shash_desc *desc, const void *in) +{ + struct md5_state *ctx = shash_desc_ctx(desc); + + memcpy(ctx, in, sizeof(*ctx)); + return 0; +} + +static struct shash_alg alg = { + .digestsize = MD5_DIGEST_SIZE, + .init = octeon_md5_init, + .update = octeon_md5_update, + .final = octeon_md5_final, + .export = octeon_md5_export, + .import = octeon_md5_import, + .descsize = sizeof(struct md5_state), + .statesize = sizeof(struct md5_state), + .base = { + .cra_name = "md5", + .cra_driver_name= "octeon-md5", + .cra_priority = OCTEON_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init md5_mod_init(void) +{ + if (!octeon_has_crypto()) + return -ENOTSUPP; + return crypto_register_shash(&alg); +} + +static void __exit md5_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(md5_mod_init); +module_exit(md5_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MD5 Message Digest Algorithm (OCTEON)"); +MODULE_AUTHOR("Aaro Koskinen "); -- cgit v1.2.3 From b01264170cef82f3521db90a0589be72dfc7f6eb Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 11 Jan 2015 18:17:43 +0100 Subject: crypto: sparc64/aes - fix module description AES is a block cipher, not a hash. Cc: David S. Miller Signed-off-by: Mathias Krause Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/sparc/crypto/aes_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 705408766ab0..2e48eb8813ff 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -497,7 +497,7 @@ module_init(aes_sparc64_mod_init); module_exit(aes_sparc64_mod_fini); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated"); +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, sparc64 aes opcode accelerated"); MODULE_ALIAS_CRYPTO("aes"); -- cgit v1.2.3 From 7d676fdbb42ddef809011e4cfcc4d11cf64989b5 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 11 Jan 2015 18:17:44 +0100 Subject: crypto: sparc64/camellia - fix module alias The module alias should be "camellia", not "aes". Cc: David S. Miller Signed-off-by: Mathias Krause Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/sparc/crypto/camellia_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index 641f55cb61c3..6bf2479a12fb 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -322,6 +322,6 @@ module_exit(camellia_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated"); -MODULE_ALIAS_CRYPTO("aes"); +MODULE_ALIAS_CRYPTO("camellia"); #include "crop_devid.c" -- cgit v1.2.3 From 5001323b701449459c06a926becb6cf27708dff4 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 11 Jan 2015 18:17:45 +0100 Subject: crypto: sparc64/des - add "des3_ede" module alias This module provides implementations for "des3_ede", too. Announce those via an appropriate crypto module alias so it can be used in favour to the generic C implementation. Cc: David S. Miller Signed-off-by: Mathias Krause Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/sparc/crypto/des_glue.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index d11500972994..dd6a34fa6e19 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -533,5 +533,6 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated"); MODULE_ALIAS_CRYPTO("des"); +MODULE_ALIAS_CRYPTO("des3_ede"); #include "crop_devid.c" -- cgit v1.2.3 From 97ef8ef5a8fe022308c33c03d24424b579eecb36 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 11 Jan 2015 18:17:46 +0100 Subject: crypto: sparc64/md5 - fix module description MD5 is not SHA1. Cc: David S. Miller Signed-off-by: Mathias Krause Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/sparc/crypto/md5_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c index 64c7ff5f72a9..b688731d7ede 100644 --- a/arch/sparc/crypto/md5_glue.c +++ b/arch/sparc/crypto/md5_glue.c @@ -183,7 +183,7 @@ module_init(md5_sparc64_mod_init); module_exit(md5_sparc64_mod_fini); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated"); +MODULE_DESCRIPTION("MD5 Message Digest Algorithm, sparc64 md5 opcode accelerated"); MODULE_ALIAS_CRYPTO("md5"); -- cgit v1.2.3 From d8219f52a72033f84c15cde73294d46578fb2d68 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 11 Jan 2015 18:17:47 +0100 Subject: crypto: x86/des3_ede - drop bogus module aliases This module implements variations of "des3_ede" only. Drop the bogus module aliases for "des". Cc: Jussi Kivilinna Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu --- arch/x86/crypto/des3_ede_glue.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index 38a14f818ef1..d6fc59aaaadf 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -504,6 +504,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized"); MODULE_ALIAS_CRYPTO("des3_ede"); MODULE_ALIAS_CRYPTO("des3_ede-asm"); -MODULE_ALIAS_CRYPTO("des"); -MODULE_ALIAS_CRYPTO("des-asm"); MODULE_AUTHOR("Jussi Kivilinna "); -- cgit v1.2.3 From e31ac32d3bc27c33f002e0c9ffd6ae08b65474e6 Mon Sep 17 00:00:00 2001 From: Timothy McCaffrey Date: Tue, 13 Jan 2015 13:16:43 -0500 Subject: crypto: aesni - Add support for 192 & 256 bit keys to AESNI RFC4106 These patches fix the RFC4106 implementation in the aesni-intel module so it supports 192 & 256 bit keys. Since the AVX support that was added to this module also only supports 128 bit keys, and this patch only affects the SSE implementation, changes were also made to use the SSE version if key sizes other than 128 are specified. RFC4106 specifies that 192 & 256 bit keys must be supported (section 8.4). Also, this should fix Strongswan issue 341 where the aesni module needs to be unloaded if 256 bit keys are used: http://wiki.strongswan.org/issues/341 This patch has been tested with Sandy Bridge and Haswell processors. With 128 bit keys and input buffers > 512 bytes a slight performance degradation was noticed (~1%). For input buffers of less than 512 bytes there was no performance impact. Compared to 128 bit keys, 256 bit key size performance is approx. .5 cycles per byte slower on Sandy Bridge, and .37 cycles per byte slower on Haswell (vs. SSE code). This patch has also been tested with StrongSwan IPSec connections where it worked correctly. I created this diff from a git clone of crypto-2.6.git. Any questions, please feel free to contact me. Signed-off-by: Timothy McCaffrey Signed-off-by: Jarod Wilson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 343 +++++++++++++++++++------------------ arch/x86/crypto/aesni-intel_glue.c | 34 +++- 2 files changed, 205 insertions(+), 172 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 477e9d75149b..6bd2c6c95373 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -32,12 +32,23 @@ #include #include +/* + * The following macros are used to move an (un)aligned 16 byte value to/from + * an XMM register. This can done for either FP or integer values, for FP use + * movaps (move aligned packed single) or integer use movdqa (move double quad + * aligned). It doesn't make a performance difference which instruction is used + * since Nehalem (original Core i7) was released. However, the movaps is a byte + * shorter, so that is the one we'll use for now. (same for unaligned). + */ +#define MOVADQ movaps +#define MOVUDQ movups + #ifdef __x86_64__ + .data .align 16 .Lgf128mul_x_ble_mask: .octa 0x00000000000000010000000000000087 - POLY: .octa 0xC2000000000000000000000000000001 TWOONE: .octa 0x00000001000000000000000000000001 @@ -89,6 +100,7 @@ enc: .octa 0x2 #define arg8 STACK_OFFSET+16(%r14) #define arg9 STACK_OFFSET+24(%r14) #define arg10 STACK_OFFSET+32(%r14) +#define keysize 2*15*16(%arg1) #endif @@ -213,10 +225,12 @@ enc: .octa 0x2 .macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + MOVADQ SHUF_MASK(%rip), %xmm14 mov arg7, %r10 # %r10 = AAD mov arg8, %r12 # %r12 = aadLen mov %r12, %r11 pxor %xmm\i, %xmm\i + _get_AAD_loop\num_initial_blocks\operation: movd (%r10), \TMP1 pslldq $12, \TMP1 @@ -225,16 +239,18 @@ _get_AAD_loop\num_initial_blocks\operation: add $4, %r10 sub $4, %r12 jne _get_AAD_loop\num_initial_blocks\operation + cmp $16, %r11 je _get_AAD_loop2_done\num_initial_blocks\operation + mov $16, %r12 _get_AAD_loop2\num_initial_blocks\operation: psrldq $4, %xmm\i sub $4, %r12 cmp %r11, %r12 jne _get_AAD_loop2\num_initial_blocks\operation + _get_AAD_loop2_done\num_initial_blocks\operation: - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data xor %r11, %r11 # initialise the data pointer offset as zero @@ -243,59 +259,34 @@ _get_AAD_loop2_done\num_initial_blocks\operation: mov %arg5, %rax # %rax = *Y0 movdqu (%rax), \XMM0 # XMM0 = Y0 - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, \XMM0 .if (\i == 5) || (\i == 6) || (\i == 7) + MOVADQ ONE(%RIP),\TMP1 + MOVADQ (%arg1),\TMP2 .irpc index, \i_seq - paddd ONE(%rip), \XMM0 # INCR Y0 + paddd \TMP1, \XMM0 # INCR Y0 movdqa \XMM0, %xmm\index - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap - -.endr -.irpc index, \i_seq - pxor 16*0(%arg1), %xmm\index -.endr -.irpc index, \i_seq - movaps 0x10(%rdi), \TMP1 - AESENC \TMP1, %xmm\index # Round 1 -.endr -.irpc index, \i_seq - movaps 0x20(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x30(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x40(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x50(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x60(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 + pxor \TMP2, %xmm\index .endr -.irpc index, \i_seq - movaps 0x70(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x80(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x90(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 + lea 0x10(%arg1),%r10 + mov keysize,%eax + shr $2,%eax # 128->4, 192->6, 256->8 + add $5,%eax # 128->9, 192->11, 256->13 + +aes_loop_initial_dec\num_initial_blocks: + MOVADQ (%r10),\TMP1 +.irpc index, \i_seq + AESENC \TMP1, %xmm\index .endr + add $16,%r10 + sub $1,%eax + jnz aes_loop_initial_dec\num_initial_blocks + + MOVADQ (%r10), \TMP1 .irpc index, \i_seq - movaps 0xa0(%arg1), \TMP1 - AESENCLAST \TMP1, %xmm\index # Round 10 + AESENCLAST \TMP1, %xmm\index # Last Round .endr .irpc index, \i_seq movdqu (%arg3 , %r11, 1), \TMP1 @@ -305,10 +296,8 @@ _get_AAD_loop2_done\num_initial_blocks\operation: add $16, %r11 movdqa \TMP1, %xmm\index - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, %xmm\index - - # prepare plaintext/ciphertext for GHASH computation + # prepare plaintext/ciphertext for GHASH computation .endr .endif GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 @@ -338,30 +327,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation: * Precomputations for HashKey parallel with encryption of first 4 blocks. * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i */ - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM1 - movdqa SHUF_MASK(%rip), %xmm14 + MOVADQ ONE(%rip), \TMP1 + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, \XMM1 PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM2 - movdqa SHUF_MASK(%rip), %xmm14 + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, \XMM2 PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM3 - movdqa SHUF_MASK(%rip), %xmm14 + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, \XMM3 PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM4 - movdqa SHUF_MASK(%rip), %xmm14 + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, \XMM4 PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap - pxor 16*0(%arg1), \XMM1 - pxor 16*0(%arg1), \XMM2 - pxor 16*0(%arg1), \XMM3 - pxor 16*0(%arg1), \XMM4 + MOVADQ 0(%arg1),\TMP1 + pxor \TMP1, \XMM1 + pxor \TMP1, \XMM2 + pxor \TMP1, \XMM3 + pxor \TMP1, \XMM4 movdqa \TMP3, \TMP5 pshufd $78, \TMP3, \TMP1 pxor \TMP3, \TMP1 @@ -399,7 +386,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation: pshufd $78, \TMP5, \TMP1 pxor \TMP5, \TMP1 movdqa \TMP1, HashKey_4_k(%rsp) - movaps 0xa0(%arg1), \TMP2 + lea 0xa0(%arg1),%r10 + mov keysize,%eax + shr $2,%eax # 128->4, 192->6, 256->8 + sub $4,%eax # 128->0, 192->2, 256->4 + jz aes_loop_pre_dec_done\num_initial_blocks + +aes_loop_pre_dec\num_initial_blocks: + MOVADQ (%r10),\TMP2 +.irpc index, 1234 + AESENC \TMP2, %xmm\index +.endr + add $16,%r10 + sub $1,%eax + jnz aes_loop_pre_dec\num_initial_blocks + +aes_loop_pre_dec_done\num_initial_blocks: + MOVADQ (%r10), \TMP2 AESENCLAST \TMP2, \XMM1 AESENCLAST \TMP2, \XMM2 AESENCLAST \TMP2, \XMM3 @@ -421,15 +424,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation: movdqu \XMM4, 16*3(%arg2 , %r11 , 1) movdqa \TMP1, \XMM4 add $64, %r11 - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap pxor \XMMDst, \XMM1 # combine GHASHed value with the corresponding ciphertext - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap _initial_blocks_done\num_initial_blocks\operation: @@ -451,6 +450,7 @@ _initial_blocks_done\num_initial_blocks\operation: .macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + MOVADQ SHUF_MASK(%rip), %xmm14 mov arg7, %r10 # %r10 = AAD mov arg8, %r12 # %r12 = aadLen mov %r12, %r11 @@ -472,7 +472,6 @@ _get_AAD_loop2\num_initial_blocks\operation: cmp %r11, %r12 jne _get_AAD_loop2\num_initial_blocks\operation _get_AAD_loop2_done\num_initial_blocks\operation: - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data xor %r11, %r11 # initialise the data pointer offset as zero @@ -481,59 +480,35 @@ _get_AAD_loop2_done\num_initial_blocks\operation: mov %arg5, %rax # %rax = *Y0 movdqu (%rax), \XMM0 # XMM0 = Y0 - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, \XMM0 .if (\i == 5) || (\i == 6) || (\i == 7) -.irpc index, \i_seq - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, %xmm\index - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap -.endr -.irpc index, \i_seq - pxor 16*0(%arg1), %xmm\index -.endr -.irpc index, \i_seq - movaps 0x10(%rdi), \TMP1 - AESENC \TMP1, %xmm\index # Round 1 -.endr -.irpc index, \i_seq - movaps 0x20(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr + MOVADQ ONE(%RIP),\TMP1 + MOVADQ 0(%arg1),\TMP2 .irpc index, \i_seq - movaps 0x30(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, %xmm\index + PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap + pxor \TMP2, %xmm\index .endr -.irpc index, \i_seq - movaps 0x40(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x50(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x60(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x70(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x80(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x90(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 + lea 0x10(%arg1),%r10 + mov keysize,%eax + shr $2,%eax # 128->4, 192->6, 256->8 + add $5,%eax # 128->9, 192->11, 256->13 + +aes_loop_initial_enc\num_initial_blocks: + MOVADQ (%r10),\TMP1 +.irpc index, \i_seq + AESENC \TMP1, %xmm\index .endr + add $16,%r10 + sub $1,%eax + jnz aes_loop_initial_enc\num_initial_blocks + + MOVADQ (%r10), \TMP1 .irpc index, \i_seq - movaps 0xa0(%arg1), \TMP1 - AESENCLAST \TMP1, %xmm\index # Round 10 + AESENCLAST \TMP1, %xmm\index # Last Round .endr .irpc index, \i_seq movdqu (%arg3 , %r11, 1), \TMP1 @@ -541,8 +516,6 @@ _get_AAD_loop2_done\num_initial_blocks\operation: movdqu %xmm\index, (%arg2 , %r11, 1) # write back plaintext/ciphertext for num_initial_blocks add $16, %r11 - - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, %xmm\index # prepare plaintext/ciphertext for GHASH computation @@ -575,30 +548,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation: * Precomputations for HashKey parallel with encryption of first 4 blocks. * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i */ - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM1 - movdqa SHUF_MASK(%rip), %xmm14 + MOVADQ ONE(%RIP),\TMP1 + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, \XMM1 PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM2 - movdqa SHUF_MASK(%rip), %xmm14 + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, \XMM2 PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM3 - movdqa SHUF_MASK(%rip), %xmm14 + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, \XMM3 PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM4 - movdqa SHUF_MASK(%rip), %xmm14 + paddd \TMP1, \XMM0 # INCR Y0 + MOVADQ \XMM0, \XMM4 PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap - pxor 16*0(%arg1), \XMM1 - pxor 16*0(%arg1), \XMM2 - pxor 16*0(%arg1), \XMM3 - pxor 16*0(%arg1), \XMM4 + MOVADQ 0(%arg1),\TMP1 + pxor \TMP1, \XMM1 + pxor \TMP1, \XMM2 + pxor \TMP1, \XMM3 + pxor \TMP1, \XMM4 movdqa \TMP3, \TMP5 pshufd $78, \TMP3, \TMP1 pxor \TMP3, \TMP1 @@ -636,7 +607,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation: pshufd $78, \TMP5, \TMP1 pxor \TMP5, \TMP1 movdqa \TMP1, HashKey_4_k(%rsp) - movaps 0xa0(%arg1), \TMP2 + lea 0xa0(%arg1),%r10 + mov keysize,%eax + shr $2,%eax # 128->4, 192->6, 256->8 + sub $4,%eax # 128->0, 192->2, 256->4 + jz aes_loop_pre_enc_done\num_initial_blocks + +aes_loop_pre_enc\num_initial_blocks: + MOVADQ (%r10),\TMP2 +.irpc index, 1234 + AESENC \TMP2, %xmm\index +.endr + add $16,%r10 + sub $1,%eax + jnz aes_loop_pre_enc\num_initial_blocks + +aes_loop_pre_enc_done\num_initial_blocks: + MOVADQ (%r10), \TMP2 AESENCLAST \TMP2, \XMM1 AESENCLAST \TMP2, \XMM2 AESENCLAST \TMP2, \XMM3 @@ -655,15 +642,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation: movdqu \XMM4, 16*3(%arg2 , %r11 , 1) add $64, %r11 - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap pxor \XMMDst, \XMM1 # combine GHASHed value with the corresponding ciphertext - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - movdqa SHUF_MASK(%rip), %xmm14 PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap _initial_blocks_done\num_initial_blocks\operation: @@ -794,7 +777,23 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 - movaps 0xa0(%arg1), \TMP3 + lea 0xa0(%arg1),%r10 + mov keysize,%eax + shr $2,%eax # 128->4, 192->6, 256->8 + sub $4,%eax # 128->0, 192->2, 256->4 + jz aes_loop_par_enc_done + +aes_loop_par_enc: + MOVADQ (%r10),\TMP3 +.irpc index, 1234 + AESENC \TMP3, %xmm\index +.endr + add $16,%r10 + sub $1,%eax + jnz aes_loop_par_enc + +aes_loop_par_enc_done: + MOVADQ (%r10), \TMP3 AESENCLAST \TMP3, \XMM1 # Round 10 AESENCLAST \TMP3, \XMM2 AESENCLAST \TMP3, \XMM3 @@ -986,8 +985,24 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 - movaps 0xa0(%arg1), \TMP3 - AESENCLAST \TMP3, \XMM1 # Round 10 + lea 0xa0(%arg1),%r10 + mov keysize,%eax + shr $2,%eax # 128->4, 192->6, 256->8 + sub $4,%eax # 128->0, 192->2, 256->4 + jz aes_loop_par_dec_done + +aes_loop_par_dec: + MOVADQ (%r10),\TMP3 +.irpc index, 1234 + AESENC \TMP3, %xmm\index +.endr + add $16,%r10 + sub $1,%eax + jnz aes_loop_par_dec + +aes_loop_par_dec_done: + MOVADQ (%r10), \TMP3 + AESENCLAST \TMP3, \XMM1 # last round AESENCLAST \TMP3, \XMM2 AESENCLAST \TMP3, \XMM3 AESENCLAST \TMP3, \XMM4 @@ -1155,33 +1170,29 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst pxor \TMP6, \XMMDst # reduced result is in XMMDst .endm -/* Encryption of a single block done*/ -.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 - pxor (%arg1), \XMM0 - movaps 16(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 32(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 48(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 64(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 80(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 96(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 112(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 128(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 144(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 160(%arg1), \TMP1 - AESENCLAST \TMP1, \XMM0 -.endm +/* Encryption of a single block +* uses eax & r10 +*/ +.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 + pxor (%arg1), \XMM0 + mov keysize,%eax + shr $2,%eax # 128->4, 192->6, 256->8 + add $5,%eax # 128->9, 192->11, 256->13 + lea 16(%arg1), %r10 # get first expanded key address + +_esb_loop_\@: + MOVADQ (%r10),\TMP1 + AESENC \TMP1,\XMM0 + add $16,%r10 + sub $1,%eax + jnz _esb_loop_\@ + + MOVADQ (%r10),\TMP1 + AESENCLAST \TMP1,\XMM0 +.endm /***************************************************************************** * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. * u8 *out, // Plaintext output. Encrypt in-place is allowed. diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index ae855f4f64b7..947c6bf52c33 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -43,6 +43,7 @@ #include #endif + /* This data is stored at the end of the crypto_tfm struct. * It's a type of per "session" data storage location. * This needs to be 16 byte aligned. @@ -182,7 +183,8 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { - if (plaintext_len < AVX_GEN2_OPTSIZE) { + struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; + if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){ aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, aad_len, auth_tag, auth_tag_len); } else { @@ -197,7 +199,8 @@ static void aesni_gcm_dec_avx(void *ctx, u8 *out, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { - if (ciphertext_len < AVX_GEN2_OPTSIZE) { + struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; + if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad, aad_len, auth_tag, auth_tag_len); } else { @@ -231,7 +234,8 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { - if (plaintext_len < AVX_GEN2_OPTSIZE) { + struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; + if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, aad_len, auth_tag, auth_tag_len); } else if (plaintext_len < AVX_GEN4_OPTSIZE) { @@ -250,7 +254,8 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { - if (ciphertext_len < AVX_GEN2_OPTSIZE) { + struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; + if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad, aad_len, auth_tag, auth_tag_len); } else if (ciphertext_len < AVX_GEN4_OPTSIZE) { @@ -511,7 +516,7 @@ static int ctr_crypt(struct blkcipher_desc *desc, kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK, walk.iv); + nbytes & AES_BLOCK_MASK, walk.iv); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } @@ -902,7 +907,8 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, } /*Account for 4 byte nonce at the end.*/ key_len -= 4; - if (key_len != AES_KEYSIZE_128) { + if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && + key_len != AES_KEYSIZE_256) { crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } @@ -1013,6 +1019,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) __be32 counter = cpu_to_be32(1); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + u32 key_len = ctx->aes_key_expanded.key_length; void *aes_ctx = &(ctx->aes_key_expanded); unsigned long auth_tag_len = crypto_aead_authsize(tfm); u8 iv_tab[16+AESNI_ALIGN]; @@ -1027,6 +1034,13 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) /* to 8 or 12 bytes */ if (unlikely(req->assoclen != 8 && req->assoclen != 12)) return -EINVAL; + if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16)) + return -EINVAL; + if (unlikely(key_len != AES_KEYSIZE_128 && + key_len != AES_KEYSIZE_192 && + key_len != AES_KEYSIZE_256)) + return -EINVAL; + /* IV below built */ for (i = 0; i < 4; i++) *(iv+i) = ctx->nonce[i]; @@ -1091,6 +1105,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) int retval = 0; struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + u32 key_len = ctx->aes_key_expanded.key_length; void *aes_ctx = &(ctx->aes_key_expanded); unsigned long auth_tag_len = crypto_aead_authsize(tfm); u8 iv_and_authTag[32+AESNI_ALIGN]; @@ -1104,6 +1119,13 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) if (unlikely((req->cryptlen < auth_tag_len) || (req->assoclen != 8 && req->assoclen != 12))) return -EINVAL; + if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16)) + return -EINVAL; + if (unlikely(key_len != AES_KEYSIZE_128 && + key_len != AES_KEYSIZE_192 && + key_len != AES_KEYSIZE_256)) + return -EINVAL; + /* Assuming we are supporting rfc4106 64-bit extended */ /* sequence numbers We need to have the AAD length */ /* equal to 8 or 12 bytes */ -- cgit v1.2.3