From d2825fa9365d0101571ed16534b16b7c8d261ab3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 13 Mar 2022 21:11:01 -0600 Subject: crypto: sm3,sm4 - move into crypto directory The lib/crypto libraries live in lib because they are used by various drivers of the kernel. In contrast, the various helper functions in crypto are there because they're used exclusively by the crypto API. The SM3 and SM4 helper functions were erroniously moved into lib/crypto/ instead of crypto/, even though there are no in-kernel users outside of the crypto API of those functions. This commit moves them into crypto/. Cc: Herbert Xu Cc: Tianjia Zhang Cc: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 2a965aa0188d..454621a20eaa 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -45,13 +45,13 @@ config CRYPTO_SM3_ARM64_CE tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH - select CRYPTO_LIB_SM3 + select CRYPTO_SM3 config CRYPTO_SM4_ARM64_CE tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_LIB_SM4 + select CRYPTO_SM4 config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" -- cgit v1.2.3 From 02436762f5ff4b3f662bc196c70a563bcbc92b7d Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 15 Mar 2022 17:44:52 +0800 Subject: crypto: arm64/sm4-ce - rename to sm4-ce-cipher The subsequent patches of the series will have an implementation of SM4-ECB/CBC/CFB/CTR accelerated by the CE instruction set, which conflicts with the current module name. In order to keep the naming rules of the AES algorithm consistent, the sm4-ce algorithm is renamed to sm4-ce-cipher. In addition, the speed of sm4-ce-cipher is better than that of SM4 NEON. By the way, the priority of the algorithm is adjusted to 300, which is also to leave room for the priority of SM4 NEON. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Makefile | 4 +- arch/arm64/crypto/sm4-ce-cipher-core.S | 36 +++++++++++++++ arch/arm64/crypto/sm4-ce-cipher-glue.c | 82 ++++++++++++++++++++++++++++++++++ arch/arm64/crypto/sm4-ce-core.S | 36 --------------- arch/arm64/crypto/sm4-ce-glue.c | 82 ---------------------------------- 5 files changed, 120 insertions(+), 120 deletions(-) create mode 100644 arch/arm64/crypto/sm4-ce-cipher-core.S create mode 100644 arch/arm64/crypto/sm4-ce-cipher-glue.c delete mode 100644 arch/arm64/crypto/sm4-ce-core.S delete mode 100644 arch/arm64/crypto/sm4-ce-glue.c (limited to 'arch/arm64') diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 09a805cc32d7..85863e610a2e 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -20,8 +20,8 @@ sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o -obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o -sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o +obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce-cipher.o +sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o diff --git a/arch/arm64/crypto/sm4-ce-cipher-core.S b/arch/arm64/crypto/sm4-ce-cipher-core.S new file mode 100644 index 000000000000..4ac6cfbc5797 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-cipher-core.S @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + + .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8 + .set .Lv\b\().4s, \b + .endr + + .macro sm4e, rd, rn + .inst 0xcec08400 | .L\rd | (.L\rn << 5) + .endm + + /* + * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in); + */ + .text +SYM_FUNC_START(sm4_ce_do_crypt) + ld1 {v8.4s}, [x2] + ld1 {v0.4s-v3.4s}, [x0], #64 +CPU_LE( rev32 v8.16b, v8.16b ) + ld1 {v4.4s-v7.4s}, [x0] + sm4e v8.4s, v0.4s + sm4e v8.4s, v1.4s + sm4e v8.4s, v2.4s + sm4e v8.4s, v3.4s + sm4e v8.4s, v4.4s + sm4e v8.4s, v5.4s + sm4e v8.4s, v6.4s + sm4e v8.4s, v7.4s + rev64 v8.4s, v8.4s + ext v8.16b, v8.16b, v8.16b, #8 +CPU_LE( rev32 v8.16b, v8.16b ) + st1 {v8.4s}, [x1] + ret +SYM_FUNC_END(sm4_ce_do_crypt) diff --git a/arch/arm64/crypto/sm4-ce-cipher-glue.c b/arch/arm64/crypto/sm4-ce-cipher-glue.c new file mode 100644 index 000000000000..76a34ef4abbb --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-cipher-glue.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-ce"); +MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in); + +static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!crypto_simd_usable()) { + sm4_crypt_block(ctx->rkey_enc, out, in); + } else { + kernel_neon_begin(); + sm4_ce_do_crypt(ctx->rkey_enc, out, in); + kernel_neon_end(); + } +} + +static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!crypto_simd_usable()) { + sm4_crypt_block(ctx->rkey_dec, out, in); + } else { + kernel_neon_begin(); + sm4_ce_do_crypt(ctx->rkey_dec, out, in); + kernel_neon_end(); + } +} + +static struct crypto_alg sm4_ce_alg = { + .cra_name = "sm4", + .cra_driver_name = "sm4-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + .cra_u.cipher = { + .cia_min_keysize = SM4_KEY_SIZE, + .cia_max_keysize = SM4_KEY_SIZE, + .cia_setkey = sm4_ce_setkey, + .cia_encrypt = sm4_ce_encrypt, + .cia_decrypt = sm4_ce_decrypt + } +}; + +static int __init sm4_ce_mod_init(void) +{ + return crypto_register_alg(&sm4_ce_alg); +} + +static void __exit sm4_ce_mod_fini(void) +{ + crypto_unregister_alg(&sm4_ce_alg); +} + +module_cpu_feature_match(SM4, sm4_ce_mod_init); +module_exit(sm4_ce_mod_fini); diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S deleted file mode 100644 index 4ac6cfbc5797..000000000000 --- a/arch/arm64/crypto/sm4-ce-core.S +++ /dev/null @@ -1,36 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include - - .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8 - .set .Lv\b\().4s, \b - .endr - - .macro sm4e, rd, rn - .inst 0xcec08400 | .L\rd | (.L\rn << 5) - .endm - - /* - * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in); - */ - .text -SYM_FUNC_START(sm4_ce_do_crypt) - ld1 {v8.4s}, [x2] - ld1 {v0.4s-v3.4s}, [x0], #64 -CPU_LE( rev32 v8.16b, v8.16b ) - ld1 {v4.4s-v7.4s}, [x0] - sm4e v8.4s, v0.4s - sm4e v8.4s, v1.4s - sm4e v8.4s, v2.4s - sm4e v8.4s, v3.4s - sm4e v8.4s, v4.4s - sm4e v8.4s, v5.4s - sm4e v8.4s, v6.4s - sm4e v8.4s, v7.4s - rev64 v8.4s, v8.4s - ext v8.16b, v8.16b, v8.16b, #8 -CPU_LE( rev32 v8.16b, v8.16b ) - st1 {v8.4s}, [x1] - ret -SYM_FUNC_END(sm4_ce_do_crypt) diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c deleted file mode 100644 index 9c93cfc4841b..000000000000 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ /dev/null @@ -1,82 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_ALIAS_CRYPTO("sm4"); -MODULE_ALIAS_CRYPTO("sm4-ce"); -MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions"); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); - -asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in); - -static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int key_len) -{ - struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); - - return sm4_expandkey(ctx, key, key_len); -} - -static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); - - if (!crypto_simd_usable()) { - sm4_crypt_block(ctx->rkey_enc, out, in); - } else { - kernel_neon_begin(); - sm4_ce_do_crypt(ctx->rkey_enc, out, in); - kernel_neon_end(); - } -} - -static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); - - if (!crypto_simd_usable()) { - sm4_crypt_block(ctx->rkey_dec, out, in); - } else { - kernel_neon_begin(); - sm4_ce_do_crypt(ctx->rkey_dec, out, in); - kernel_neon_end(); - } -} - -static struct crypto_alg sm4_ce_alg = { - .cra_name = "sm4", - .cra_driver_name = "sm4-ce", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = SM4_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct sm4_ctx), - .cra_module = THIS_MODULE, - .cra_u.cipher = { - .cia_min_keysize = SM4_KEY_SIZE, - .cia_max_keysize = SM4_KEY_SIZE, - .cia_setkey = sm4_ce_setkey, - .cia_encrypt = sm4_ce_encrypt, - .cia_decrypt = sm4_ce_decrypt - } -}; - -static int __init sm4_ce_mod_init(void) -{ - return crypto_register_alg(&sm4_ce_alg); -} - -static void __exit sm4_ce_mod_fini(void) -{ - crypto_unregister_alg(&sm4_ce_alg); -} - -module_cpu_feature_match(SM4, sm4_ce_mod_init); -module_exit(sm4_ce_mod_fini); -- cgit v1.2.3 From 4f1aef9b806f58ef76fdac0b4d9cfab6e66aeef1 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 15 Mar 2022 17:44:53 +0800 Subject: crypto: arm64/sm4 - add ARMv8 NEON implementation This adds ARMv8 NEON implementations of SM4 in ECB, CBC, CFB and CTR modes. This implementation uses the plain NEON instruction set, All S-BOX substitutions uses the tbl/tbx instructions of ARMv8, combined with the out-of-order execution in CPU, this optimization supports encryption of up to 8 blocks at the same time. The performance of encrypting one block is not as good as software implementation, so the encryption operations of CBC and CFB still use pure software algorithms. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 6 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/sm4-neon-core.S | 487 ++++++++++++++++++++++++++++++++++++++ arch/arm64/crypto/sm4-neon-glue.c | 442 ++++++++++++++++++++++++++++++++++ 4 files changed, 938 insertions(+) create mode 100644 arch/arm64/crypto/sm4-neon-core.S create mode 100644 arch/arm64/crypto/sm4-neon-glue.c (limited to 'arch/arm64') diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 454621a20eaa..d62dd54d1800 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -53,6 +53,12 @@ config CRYPTO_SM4_ARM64_CE select CRYPTO_ALGAPI select CRYPTO_SM4 +config CRYPTO_SM4_ARM64_NEON_BLK + tristate "SM4 in ECB/CBC/CFB/CTR modes using NEON instructions" + depends on KERNEL_MODE_NEON + select CRYPTO_SKCIPHER + select CRYPTO_LIB_SM4 + config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" depends on KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 85863e610a2e..41aee6103e78 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -23,6 +23,9 @@ sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce-cipher.o sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o +obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o +sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o + obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o diff --git a/arch/arm64/crypto/sm4-neon-core.S b/arch/arm64/crypto/sm4-neon-core.S new file mode 100644 index 000000000000..3d5256b354d2 --- /dev/null +++ b/arch/arm64/crypto/sm4-neon-core.S @@ -0,0 +1,487 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm for ARMv8 NEON + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2022, Alibaba Group. + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include + +/* Register macros */ + +#define RTMP0 v8 +#define RTMP1 v9 +#define RTMP2 v10 +#define RTMP3 v11 + +#define RX0 v12 +#define RX1 v13 +#define RKEY v14 +#define RIV v15 + +/* Helper macros. */ + +#define PREPARE \ + adr_l x5, crypto_sm4_sbox; \ + ld1 {v16.16b-v19.16b}, [x5], #64; \ + ld1 {v20.16b-v23.16b}, [x5], #64; \ + ld1 {v24.16b-v27.16b}, [x5], #64; \ + ld1 {v28.16b-v31.16b}, [x5]; + +#define transpose_4x4(s0, s1, s2, s3) \ + zip1 RTMP0.4s, s0.4s, s1.4s; \ + zip1 RTMP1.4s, s2.4s, s3.4s; \ + zip2 RTMP2.4s, s0.4s, s1.4s; \ + zip2 RTMP3.4s, s2.4s, s3.4s; \ + zip1 s0.2d, RTMP0.2d, RTMP1.2d; \ + zip2 s1.2d, RTMP0.2d, RTMP1.2d; \ + zip1 s2.2d, RTMP2.2d, RTMP3.2d; \ + zip2 s3.2d, RTMP2.2d, RTMP3.2d; + +#define rotate_clockwise_90(s0, s1, s2, s3) \ + zip1 RTMP0.4s, s1.4s, s0.4s; \ + zip2 RTMP1.4s, s1.4s, s0.4s; \ + zip1 RTMP2.4s, s3.4s, s2.4s; \ + zip2 RTMP3.4s, s3.4s, s2.4s; \ + zip1 s0.2d, RTMP2.2d, RTMP0.2d; \ + zip2 s1.2d, RTMP2.2d, RTMP0.2d; \ + zip1 s2.2d, RTMP3.2d, RTMP1.2d; \ + zip2 s3.2d, RTMP3.2d, RTMP1.2d; + +#define ROUND4(round, s0, s1, s2, s3) \ + dup RX0.4s, RKEY.s[round]; \ + /* rk ^ s1 ^ s2 ^ s3 */ \ + eor RTMP1.16b, s2.16b, s3.16b; \ + eor RX0.16b, RX0.16b, s1.16b; \ + eor RX0.16b, RX0.16b, RTMP1.16b; \ + \ + /* sbox, non-linear part */ \ + movi RTMP3.16b, #64; /* sizeof(sbox) / 4 */ \ + tbl RTMP0.16b, {v16.16b-v19.16b}, RX0.16b; \ + sub RX0.16b, RX0.16b, RTMP3.16b; \ + tbx RTMP0.16b, {v20.16b-v23.16b}, RX0.16b; \ + sub RX0.16b, RX0.16b, RTMP3.16b; \ + tbx RTMP0.16b, {v24.16b-v27.16b}, RX0.16b; \ + sub RX0.16b, RX0.16b, RTMP3.16b; \ + tbx RTMP0.16b, {v28.16b-v31.16b}, RX0.16b; \ + \ + /* linear part */ \ + shl RTMP1.4s, RTMP0.4s, #8; \ + shl RTMP2.4s, RTMP0.4s, #16; \ + shl RTMP3.4s, RTMP0.4s, #24; \ + sri RTMP1.4s, RTMP0.4s, #(32-8); \ + sri RTMP2.4s, RTMP0.4s, #(32-16); \ + sri RTMP3.4s, RTMP0.4s, #(32-24); \ + /* RTMP1 = x ^ rol32(x, 8) ^ rol32(x, 16) */ \ + eor RTMP1.16b, RTMP1.16b, RTMP0.16b; \ + eor RTMP1.16b, RTMP1.16b, RTMP2.16b; \ + /* RTMP3 = x ^ rol32(x, 24) ^ rol32(RTMP1, 2) */ \ + eor RTMP3.16b, RTMP3.16b, RTMP0.16b; \ + shl RTMP2.4s, RTMP1.4s, 2; \ + sri RTMP2.4s, RTMP1.4s, #(32-2); \ + eor RTMP3.16b, RTMP3.16b, RTMP2.16b; \ + /* s0 ^= RTMP3 */ \ + eor s0.16b, s0.16b, RTMP3.16b; + +#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + \ + transpose_4x4(b0, b1, b2, b3); \ + \ + mov x6, 8; \ +4: \ + ld1 {RKEY.4s}, [x0], #16; \ + subs x6, x6, #1; \ + \ + ROUND4(0, b0, b1, b2, b3); \ + ROUND4(1, b1, b2, b3, b0); \ + ROUND4(2, b2, b3, b0, b1); \ + ROUND4(3, b3, b0, b1, b2); \ + \ + bne 4b; \ + \ + rotate_clockwise_90(b0, b1, b2, b3); \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + \ + /* repoint to rkey */ \ + sub x0, x0, #128; + +#define ROUND8(round, s0, s1, s2, s3, t0, t1, t2, t3) \ + /* rk ^ s1 ^ s2 ^ s3 */ \ + dup RX0.4s, RKEY.s[round]; \ + eor RTMP0.16b, s2.16b, s3.16b; \ + mov RX1.16b, RX0.16b; \ + eor RTMP1.16b, t2.16b, t3.16b; \ + eor RX0.16b, RX0.16b, s1.16b; \ + eor RX1.16b, RX1.16b, t1.16b; \ + eor RX0.16b, RX0.16b, RTMP0.16b; \ + eor RX1.16b, RX1.16b, RTMP1.16b; \ + \ + /* sbox, non-linear part */ \ + movi RTMP3.16b, #64; /* sizeof(sbox) / 4 */ \ + tbl RTMP0.16b, {v16.16b-v19.16b}, RX0.16b; \ + tbl RTMP1.16b, {v16.16b-v19.16b}, RX1.16b; \ + sub RX0.16b, RX0.16b, RTMP3.16b; \ + sub RX1.16b, RX1.16b, RTMP3.16b; \ + tbx RTMP0.16b, {v20.16b-v23.16b}, RX0.16b; \ + tbx RTMP1.16b, {v20.16b-v23.16b}, RX1.16b; \ + sub RX0.16b, RX0.16b, RTMP3.16b; \ + sub RX1.16b, RX1.16b, RTMP3.16b; \ + tbx RTMP0.16b, {v24.16b-v27.16b}, RX0.16b; \ + tbx RTMP1.16b, {v24.16b-v27.16b}, RX1.16b; \ + sub RX0.16b, RX0.16b, RTMP3.16b; \ + sub RX1.16b, RX1.16b, RTMP3.16b; \ + tbx RTMP0.16b, {v28.16b-v31.16b}, RX0.16b; \ + tbx RTMP1.16b, {v28.16b-v31.16b}, RX1.16b; \ + \ + /* linear part */ \ + shl RX0.4s, RTMP0.4s, #8; \ + shl RX1.4s, RTMP1.4s, #8; \ + shl RTMP2.4s, RTMP0.4s, #16; \ + shl RTMP3.4s, RTMP1.4s, #16; \ + sri RX0.4s, RTMP0.4s, #(32 - 8); \ + sri RX1.4s, RTMP1.4s, #(32 - 8); \ + sri RTMP2.4s, RTMP0.4s, #(32 - 16); \ + sri RTMP3.4s, RTMP1.4s, #(32 - 16); \ + /* RX = x ^ rol32(x, 8) ^ rol32(x, 16) */ \ + eor RX0.16b, RX0.16b, RTMP0.16b; \ + eor RX1.16b, RX1.16b, RTMP1.16b; \ + eor RX0.16b, RX0.16b, RTMP2.16b; \ + eor RX1.16b, RX1.16b, RTMP3.16b; \ + /* RTMP0/1 ^= x ^ rol32(x, 24) ^ rol32(RX, 2) */ \ + shl RTMP2.4s, RTMP0.4s, #24; \ + shl RTMP3.4s, RTMP1.4s, #24; \ + sri RTMP2.4s, RTMP0.4s, #(32 - 24); \ + sri RTMP3.4s, RTMP1.4s, #(32 - 24); \ + eor RTMP0.16b, RTMP0.16b, RTMP2.16b; \ + eor RTMP1.16b, RTMP1.16b, RTMP3.16b; \ + shl RTMP2.4s, RX0.4s, #2; \ + shl RTMP3.4s, RX1.4s, #2; \ + sri RTMP2.4s, RX0.4s, #(32 - 2); \ + sri RTMP3.4s, RX1.4s, #(32 - 2); \ + eor RTMP0.16b, RTMP0.16b, RTMP2.16b; \ + eor RTMP1.16b, RTMP1.16b, RTMP3.16b; \ + /* s0/t0 ^= RTMP0/1 */ \ + eor s0.16b, s0.16b, RTMP0.16b; \ + eor t0.16b, t0.16b, RTMP1.16b; + +#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + rev32 b4.16b, b4.16b; \ + rev32 b5.16b, b5.16b; \ + rev32 b6.16b, b6.16b; \ + rev32 b7.16b, b7.16b; \ + \ + transpose_4x4(b0, b1, b2, b3); \ + transpose_4x4(b4, b5, b6, b7); \ + \ + mov x6, 8; \ +8: \ + ld1 {RKEY.4s}, [x0], #16; \ + subs x6, x6, #1; \ + \ + ROUND8(0, b0, b1, b2, b3, b4, b5, b6, b7); \ + ROUND8(1, b1, b2, b3, b0, b5, b6, b7, b4); \ + ROUND8(2, b2, b3, b0, b1, b6, b7, b4, b5); \ + ROUND8(3, b3, b0, b1, b2, b7, b4, b5, b6); \ + \ + bne 8b; \ + \ + rotate_clockwise_90(b0, b1, b2, b3); \ + rotate_clockwise_90(b4, b5, b6, b7); \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + rev32 b4.16b, b4.16b; \ + rev32 b5.16b, b5.16b; \ + rev32 b6.16b, b6.16b; \ + rev32 b7.16b, b7.16b; \ + \ + /* repoint to rkey */ \ + sub x0, x0, #128; + + +.align 3 +SYM_FUNC_START_LOCAL(__sm4_neon_crypt_blk1_4) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * w3: num blocks (1..4) + */ + PREPARE; + + ld1 {v0.16b}, [x2], #16; + mov v1.16b, v0.16b; + mov v2.16b, v0.16b; + mov v3.16b, v0.16b; + cmp w3, #2; + blt .Lblk4_load_input_done; + ld1 {v1.16b}, [x2], #16; + beq .Lblk4_load_input_done; + ld1 {v2.16b}, [x2], #16; + cmp w3, #3; + beq .Lblk4_load_input_done; + ld1 {v3.16b}, [x2]; + +.Lblk4_load_input_done: + SM4_CRYPT_BLK4(v0, v1, v2, v3); + + st1 {v0.16b}, [x1], #16; + cmp w3, #2; + blt .Lblk4_store_output_done; + st1 {v1.16b}, [x1], #16; + beq .Lblk4_store_output_done; + st1 {v2.16b}, [x1], #16; + cmp w3, #3; + beq .Lblk4_store_output_done; + st1 {v3.16b}, [x1]; + +.Lblk4_store_output_done: + ret; +SYM_FUNC_END(__sm4_neon_crypt_blk1_4) + +.align 3 +SYM_FUNC_START(sm4_neon_crypt_blk1_8) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * w3: num blocks (1..8) + */ + cmp w3, #5; + blt __sm4_neon_crypt_blk1_4; + + PREPARE; + + ld1 {v0.16b-v3.16b}, [x2], #64; + ld1 {v4.16b}, [x2], #16; + mov v5.16b, v4.16b; + mov v6.16b, v4.16b; + mov v7.16b, v4.16b; + beq .Lblk8_load_input_done; + ld1 {v5.16b}, [x2], #16; + cmp w3, #7; + blt .Lblk8_load_input_done; + ld1 {v6.16b}, [x2], #16; + beq .Lblk8_load_input_done; + ld1 {v7.16b}, [x2]; + +.Lblk8_load_input_done: + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + cmp w3, #6; + st1 {v0.16b-v3.16b}, [x1], #64; + st1 {v4.16b}, [x1], #16; + blt .Lblk8_store_output_done; + st1 {v5.16b}, [x1], #16; + beq .Lblk8_store_output_done; + st1 {v6.16b}, [x1], #16; + cmp w3, #7; + beq .Lblk8_store_output_done; + st1 {v7.16b}, [x1]; + +.Lblk8_store_output_done: + ret; +SYM_FUNC_END(sm4_neon_crypt_blk1_8) + +.align 3 +SYM_FUNC_START(sm4_neon_crypt_blk8) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * w3: nblocks (multiples of 8) + */ + PREPARE; + +.Lcrypt_loop_blk: + subs w3, w3, #8; + bmi .Lcrypt_end; + + ld1 {v0.16b-v3.16b}, [x2], #64; + ld1 {v4.16b-v7.16b}, [x2], #64; + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + st1 {v0.16b-v3.16b}, [x1], #64; + st1 {v4.16b-v7.16b}, [x1], #64; + + b .Lcrypt_loop_blk; + +.Lcrypt_end: + ret; +SYM_FUNC_END(sm4_neon_crypt_blk8) + +.align 3 +SYM_FUNC_START(sm4_neon_cbc_dec_blk8) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nblocks (multiples of 8) + */ + PREPARE; + + ld1 {RIV.16b}, [x3]; + +.Lcbc_loop_blk: + subs w4, w4, #8; + bmi .Lcbc_end; + + ld1 {v0.16b-v3.16b}, [x2], #64; + ld1 {v4.16b-v7.16b}, [x2]; + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + sub x2, x2, #64; + eor v0.16b, v0.16b, RIV.16b; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v1.16b, v1.16b, RTMP0.16b; + eor v2.16b, v2.16b, RTMP1.16b; + eor v3.16b, v3.16b, RTMP2.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + eor v4.16b, v4.16b, RTMP3.16b; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v5.16b, v5.16b, RTMP0.16b; + eor v6.16b, v6.16b, RTMP1.16b; + eor v7.16b, v7.16b, RTMP2.16b; + + mov RIV.16b, RTMP3.16b; + st1 {v4.16b-v7.16b}, [x1], #64; + + b .Lcbc_loop_blk; + +.Lcbc_end: + /* store new IV */ + st1 {RIV.16b}, [x3]; + + ret; +SYM_FUNC_END(sm4_neon_cbc_dec_blk8) + +.align 3 +SYM_FUNC_START(sm4_neon_cfb_dec_blk8) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nblocks (multiples of 8) + */ + PREPARE; + + ld1 {v0.16b}, [x3]; + +.Lcfb_loop_blk: + subs w4, w4, #8; + bmi .Lcfb_end; + + ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48; + ld1 {v4.16b-v7.16b}, [x2]; + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + sub x2, x2, #48; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v0.16b, v0.16b, RTMP0.16b; + eor v1.16b, v1.16b, RTMP1.16b; + eor v2.16b, v2.16b, RTMP2.16b; + eor v3.16b, v3.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v4.16b, v4.16b, RTMP0.16b; + eor v5.16b, v5.16b, RTMP1.16b; + eor v6.16b, v6.16b, RTMP2.16b; + eor v7.16b, v7.16b, RTMP3.16b; + st1 {v4.16b-v7.16b}, [x1], #64; + + mov v0.16b, RTMP3.16b; + + b .Lcfb_loop_blk; + +.Lcfb_end: + /* store new IV */ + st1 {v0.16b}, [x3]; + + ret; +SYM_FUNC_END(sm4_neon_cfb_dec_blk8) + +.align 3 +SYM_FUNC_START(sm4_neon_ctr_enc_blk8) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: ctr (big endian, 128 bit) + * w4: nblocks (multiples of 8) + */ + PREPARE; + + ldp x7, x8, [x3]; + rev x7, x7; + rev x8, x8; + +.Lctr_loop_blk: + subs w4, w4, #8; + bmi .Lctr_end; + +#define inc_le128(vctr) \ + mov vctr.d[1], x8; \ + mov vctr.d[0], x7; \ + adds x8, x8, #1; \ + adc x7, x7, xzr; \ + rev64 vctr.16b, vctr.16b; + + /* construct CTRs */ + inc_le128(v0); /* +0 */ + inc_le128(v1); /* +1 */ + inc_le128(v2); /* +2 */ + inc_le128(v3); /* +3 */ + inc_le128(v4); /* +4 */ + inc_le128(v5); /* +5 */ + inc_le128(v6); /* +6 */ + inc_le128(v7); /* +7 */ + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v0.16b, v0.16b, RTMP0.16b; + eor v1.16b, v1.16b, RTMP1.16b; + eor v2.16b, v2.16b, RTMP2.16b; + eor v3.16b, v3.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v4.16b, v4.16b, RTMP0.16b; + eor v5.16b, v5.16b, RTMP1.16b; + eor v6.16b, v6.16b, RTMP2.16b; + eor v7.16b, v7.16b, RTMP3.16b; + st1 {v4.16b-v7.16b}, [x1], #64; + + b .Lctr_loop_blk; + +.Lctr_end: + /* store new CTR */ + rev x7, x7; + rev x8, x8; + stp x7, x8, [x3]; + + ret; +SYM_FUNC_END(sm4_neon_ctr_enc_blk8) diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c new file mode 100644 index 000000000000..03a6a6866a31 --- /dev/null +++ b/arch/arm64/crypto/sm4-neon-glue.c @@ -0,0 +1,442 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, using ARMv8 NEON + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2022, Alibaba Group. + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BYTES2BLKS(nbytes) ((nbytes) >> 4) +#define BYTES2BLK8(nbytes) (((nbytes) >> 4) & ~(8 - 1)) + +asmlinkage void sm4_neon_crypt_blk1_8(const u32 *rkey, u8 *dst, const u8 *src, + unsigned int nblks); +asmlinkage void sm4_neon_crypt_blk8(const u32 *rkey, u8 *dst, const u8 *src, + unsigned int nblks); +asmlinkage void sm4_neon_cbc_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); +asmlinkage void sm4_neon_cfb_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); +asmlinkage void sm4_neon_ctr_enc_blk8(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); + +static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) +{ + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLK8(nbytes); + if (nblks) { + sm4_neon_crypt_blk8(rkey, dst, src, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_neon_crypt_blk1_8(rkey, dst, src, nblks); + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_ecb_do_crypt(req, ctx->rkey_enc); +} + +static int sm4_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_ecb_do_crypt(req, ctx->rkey_dec); +} + +static int sm4_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE); + sm4_crypt_block(ctx->rkey_enc, dst, dst); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLK8(nbytes); + if (nblks) { + sm4_neon_cbc_dec_blk8(ctx->rkey_dec, dst, src, + walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + u8 iv[SM4_BLOCK_SIZE]; + int i; + + sm4_neon_crypt_blk1_8(ctx->rkey_dec, keystream, + src, nblks); + + src += ((int)nblks - 2) * SM4_BLOCK_SIZE; + dst += (nblks - 1) * SM4_BLOCK_SIZE; + memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); + + for (i = nblks - 1; i > 0; i--) { + crypto_xor_cpy(dst, src, + &keystream[i * SM4_BLOCK_SIZE], + SM4_BLOCK_SIZE); + src -= SM4_BLOCK_SIZE; + dst -= SM4_BLOCK_SIZE; + } + crypto_xor_cpy(dst, walk.iv, + keystream, SM4_BLOCK_SIZE); + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_cfb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + sm4_crypt_block(ctx->rkey_enc, keystream, iv); + crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_cfb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLK8(nbytes); + if (nblks) { + sm4_neon_cfb_dec_blk8(ctx->rkey_enc, dst, src, + walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + + memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); + if (nblks > 1) + memcpy(&keystream[SM4_BLOCK_SIZE], src, + (nblks - 1) * SM4_BLOCK_SIZE); + memcpy(walk.iv, src + (nblks - 1) * SM4_BLOCK_SIZE, + SM4_BLOCK_SIZE); + + sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream, + keystream, nblks); + + crypto_xor_cpy(dst, src, keystream, + nblks * SM4_BLOCK_SIZE); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_ctr_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLK8(nbytes); + if (nblks) { + sm4_neon_ctr_enc_blk8(ctx->rkey_enc, dst, src, + walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + int i; + + for (i = 0; i < nblks; i++) { + memcpy(&keystream[i * SM4_BLOCK_SIZE], + walk.iv, SM4_BLOCK_SIZE); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + } + sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream, + keystream, nblks); + + crypto_xor_cpy(dst, src, keystream, + nblks * SM4_BLOCK_SIZE); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static struct skcipher_alg sm4_algs[] = { + { + .base = { + .cra_name = "ecb(sm4)", + .cra_driver_name = "ecb-sm4-neon", + .cra_priority = 200, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_ecb_encrypt, + .decrypt = sm4_ecb_decrypt, + }, { + .base = { + .cra_name = "cbc(sm4)", + .cra_driver_name = "cbc-sm4-neon", + .cra_priority = 200, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = sm4_cbc_decrypt, + }, { + .base = { + .cra_name = "cfb(sm4)", + .cra_driver_name = "cfb-sm4-neon", + .cra_priority = 200, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = sm4_cfb_decrypt, + }, { + .base = { + .cra_name = "ctr(sm4)", + .cra_driver_name = "ctr-sm4-neon", + .cra_priority = 200, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_ctr_crypt, + .decrypt = sm4_ctr_crypt, + } +}; + +static int __init sm4_init(void) +{ + return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); +} + +static void __exit sm4_exit(void) +{ + crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); +} + +module_init(sm4_init); +module_exit(sm4_exit); + +MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 NEON"); +MODULE_ALIAS_CRYPTO("sm4-neon"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("ecb(sm4)"); +MODULE_ALIAS_CRYPTO("cbc(sm4)"); +MODULE_ALIAS_CRYPTO("cfb(sm4)"); +MODULE_ALIAS_CRYPTO("ctr(sm4)"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 5b33e0ec881c609d96c9cce63fe15e0d0af457db Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 15 Mar 2022 17:44:54 +0800 Subject: crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 6 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/sm4-ce-core.S | 660 ++++++++++++++++++++++++++++++++++++++++ arch/arm64/crypto/sm4-ce-glue.c | 372 ++++++++++++++++++++++ 4 files changed, 1041 insertions(+) create mode 100644 arch/arm64/crypto/sm4-ce-core.S create mode 100644 arch/arm64/crypto/sm4-ce-glue.c (limited to 'arch/arm64') diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index d62dd54d1800..4fe7037d2347 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -53,6 +53,12 @@ config CRYPTO_SM4_ARM64_CE select CRYPTO_ALGAPI select CRYPTO_SM4 +config CRYPTO_SM4_ARM64_CE_BLK + tristate "SM4 in ECB/CBC/CFB/CTR modes using ARMv8 Crypto Extensions" + depends on KERNEL_MODE_NEON + select CRYPTO_SKCIPHER + select CRYPTO_LIB_SM4 + config CRYPTO_SM4_ARM64_NEON_BLK tristate "SM4 in ECB/CBC/CFB/CTR modes using NEON instructions" depends on KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 41aee6103e78..bea8995133b1 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -23,6 +23,9 @@ sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce-cipher.o sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o +obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_BLK) += sm4-ce.o +sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o + obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S new file mode 100644 index 000000000000..934e0f093279 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-core.S @@ -0,0 +1,660 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm for ARMv8 with Crypto Extensions + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2022, Alibaba Group. + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include + +.arch armv8-a+crypto + +.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 16, 20, 24, 25, 26, 27, 28, 29, 30, 31 + .set .Lv\b\().4s, \b +.endr + +.macro sm4e, vd, vn + .inst 0xcec08400 | (.L\vn << 5) | .L\vd +.endm + +.macro sm4ekey, vd, vn, vm + .inst 0xce60c800 | (.L\vm << 16) | (.L\vn << 5) | .L\vd +.endm + +/* Register macros */ + +#define RTMP0 v16 +#define RTMP1 v17 +#define RTMP2 v18 +#define RTMP3 v19 + +#define RIV v20 + +/* Helper macros. */ + +#define PREPARE \ + ld1 {v24.16b-v27.16b}, [x0], #64; \ + ld1 {v28.16b-v31.16b}, [x0]; + +#define SM4_CRYPT_BLK(b0) \ + rev32 b0.16b, b0.16b; \ + sm4e b0.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + rev32 b0.16b, b0.16b; + +#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + sm4e b0.4s, v24.4s; \ + sm4e b1.4s, v24.4s; \ + sm4e b2.4s, v24.4s; \ + sm4e b3.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b1.4s, v25.4s; \ + sm4e b2.4s, v25.4s; \ + sm4e b3.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b1.4s, v26.4s; \ + sm4e b2.4s, v26.4s; \ + sm4e b3.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b1.4s, v27.4s; \ + sm4e b2.4s, v27.4s; \ + sm4e b3.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b1.4s, v28.4s; \ + sm4e b2.4s, v28.4s; \ + sm4e b3.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b1.4s, v29.4s; \ + sm4e b2.4s, v29.4s; \ + sm4e b3.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b1.4s, v30.4s; \ + sm4e b2.4s, v30.4s; \ + sm4e b3.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + sm4e b1.4s, v31.4s; \ + sm4e b2.4s, v31.4s; \ + sm4e b3.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + rev64 b1.4s, b1.4s; \ + rev64 b2.4s, b2.4s; \ + rev64 b3.4s, b3.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + ext b1.16b, b1.16b, b1.16b, #8; \ + ext b2.16b, b2.16b, b2.16b, #8; \ + ext b3.16b, b3.16b, b3.16b, #8; \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; + +#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + rev32 b4.16b, b4.16b; \ + rev32 b5.16b, b5.16b; \ + rev32 b6.16b, b6.16b; \ + rev32 b7.16b, b7.16b; \ + sm4e b0.4s, v24.4s; \ + sm4e b1.4s, v24.4s; \ + sm4e b2.4s, v24.4s; \ + sm4e b3.4s, v24.4s; \ + sm4e b4.4s, v24.4s; \ + sm4e b5.4s, v24.4s; \ + sm4e b6.4s, v24.4s; \ + sm4e b7.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b1.4s, v25.4s; \ + sm4e b2.4s, v25.4s; \ + sm4e b3.4s, v25.4s; \ + sm4e b4.4s, v25.4s; \ + sm4e b5.4s, v25.4s; \ + sm4e b6.4s, v25.4s; \ + sm4e b7.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b1.4s, v26.4s; \ + sm4e b2.4s, v26.4s; \ + sm4e b3.4s, v26.4s; \ + sm4e b4.4s, v26.4s; \ + sm4e b5.4s, v26.4s; \ + sm4e b6.4s, v26.4s; \ + sm4e b7.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b1.4s, v27.4s; \ + sm4e b2.4s, v27.4s; \ + sm4e b3.4s, v27.4s; \ + sm4e b4.4s, v27.4s; \ + sm4e b5.4s, v27.4s; \ + sm4e b6.4s, v27.4s; \ + sm4e b7.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b1.4s, v28.4s; \ + sm4e b2.4s, v28.4s; \ + sm4e b3.4s, v28.4s; \ + sm4e b4.4s, v28.4s; \ + sm4e b5.4s, v28.4s; \ + sm4e b6.4s, v28.4s; \ + sm4e b7.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b1.4s, v29.4s; \ + sm4e b2.4s, v29.4s; \ + sm4e b3.4s, v29.4s; \ + sm4e b4.4s, v29.4s; \ + sm4e b5.4s, v29.4s; \ + sm4e b6.4s, v29.4s; \ + sm4e b7.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b1.4s, v30.4s; \ + sm4e b2.4s, v30.4s; \ + sm4e b3.4s, v30.4s; \ + sm4e b4.4s, v30.4s; \ + sm4e b5.4s, v30.4s; \ + sm4e b6.4s, v30.4s; \ + sm4e b7.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + sm4e b1.4s, v31.4s; \ + sm4e b2.4s, v31.4s; \ + sm4e b3.4s, v31.4s; \ + sm4e b4.4s, v31.4s; \ + sm4e b5.4s, v31.4s; \ + sm4e b6.4s, v31.4s; \ + sm4e b7.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + rev64 b1.4s, b1.4s; \ + rev64 b2.4s, b2.4s; \ + rev64 b3.4s, b3.4s; \ + rev64 b4.4s, b4.4s; \ + rev64 b5.4s, b5.4s; \ + rev64 b6.4s, b6.4s; \ + rev64 b7.4s, b7.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + ext b1.16b, b1.16b, b1.16b, #8; \ + ext b2.16b, b2.16b, b2.16b, #8; \ + ext b3.16b, b3.16b, b3.16b, #8; \ + ext b4.16b, b4.16b, b4.16b, #8; \ + ext b5.16b, b5.16b, b5.16b, #8; \ + ext b6.16b, b6.16b, b6.16b, #8; \ + ext b7.16b, b7.16b, b7.16b, #8; \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + rev32 b4.16b, b4.16b; \ + rev32 b5.16b, b5.16b; \ + rev32 b6.16b, b6.16b; \ + rev32 b7.16b, b7.16b; + + +.align 3 +SYM_FUNC_START(sm4_ce_expand_key) + /* input: + * x0: 128-bit key + * x1: rkey_enc + * x2: rkey_dec + * x3: fk array + * x4: ck array + */ + ld1 {v0.16b}, [x0]; + rev32 v0.16b, v0.16b; + ld1 {v1.16b}, [x3]; + /* load ck */ + ld1 {v24.16b-v27.16b}, [x4], #64; + ld1 {v28.16b-v31.16b}, [x4]; + + /* input ^ fk */ + eor v0.16b, v0.16b, v1.16b; + + sm4ekey v0.4s, v0.4s, v24.4s; + sm4ekey v1.4s, v0.4s, v25.4s; + sm4ekey v2.4s, v1.4s, v26.4s; + sm4ekey v3.4s, v2.4s, v27.4s; + sm4ekey v4.4s, v3.4s, v28.4s; + sm4ekey v5.4s, v4.4s, v29.4s; + sm4ekey v6.4s, v5.4s, v30.4s; + sm4ekey v7.4s, v6.4s, v31.4s; + + st1 {v0.16b-v3.16b}, [x1], #64; + st1 {v4.16b-v7.16b}, [x1]; + rev64 v7.4s, v7.4s; + rev64 v6.4s, v6.4s; + rev64 v5.4s, v5.4s; + rev64 v4.4s, v4.4s; + rev64 v3.4s, v3.4s; + rev64 v2.4s, v2.4s; + rev64 v1.4s, v1.4s; + rev64 v0.4s, v0.4s; + ext v7.16b, v7.16b, v7.16b, #8; + ext v6.16b, v6.16b, v6.16b, #8; + ext v5.16b, v5.16b, v5.16b, #8; + ext v4.16b, v4.16b, v4.16b, #8; + ext v3.16b, v3.16b, v3.16b, #8; + ext v2.16b, v2.16b, v2.16b, #8; + ext v1.16b, v1.16b, v1.16b, #8; + ext v0.16b, v0.16b, v0.16b, #8; + st1 {v7.16b}, [x2], #16; + st1 {v6.16b}, [x2], #16; + st1 {v5.16b}, [x2], #16; + st1 {v4.16b}, [x2], #16; + st1 {v3.16b}, [x2], #16; + st1 {v2.16b}, [x2], #16; + st1 {v1.16b}, [x2], #16; + st1 {v0.16b}, [x2]; + + ret; +SYM_FUNC_END(sm4_ce_expand_key) + +.align 3 +SYM_FUNC_START(sm4_ce_crypt_block) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + */ + PREPARE; + + ld1 {v0.16b}, [x2]; + SM4_CRYPT_BLK(v0); + st1 {v0.16b}, [x1]; + + ret; +SYM_FUNC_END(sm4_ce_crypt_block) + +.align 3 +SYM_FUNC_START(sm4_ce_crypt) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * w3: nblocks + */ + PREPARE; + +.Lcrypt_loop_blk: + sub w3, w3, #8; + tbnz w3, #31, .Lcrypt_tail8; + + ld1 {v0.16b-v3.16b}, [x2], #64; + ld1 {v4.16b-v7.16b}, [x2], #64; + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + st1 {v0.16b-v3.16b}, [x1], #64; + st1 {v4.16b-v7.16b}, [x1], #64; + + cbz w3, .Lcrypt_end; + b .Lcrypt_loop_blk; + +.Lcrypt_tail8: + add w3, w3, #8; + cmp w3, #4; + blt .Lcrypt_tail4; + + sub w3, w3, #4; + + ld1 {v0.16b-v3.16b}, [x2], #64; + SM4_CRYPT_BLK4(v0, v1, v2, v3); + st1 {v0.16b-v3.16b}, [x1], #64; + + cbz w3, .Lcrypt_end; + +.Lcrypt_tail4: + sub w3, w3, #1; + + ld1 {v0.16b}, [x2], #16; + SM4_CRYPT_BLK(v0); + st1 {v0.16b}, [x1], #16; + + cbnz w3, .Lcrypt_tail4; + +.Lcrypt_end: + ret; +SYM_FUNC_END(sm4_ce_crypt) + +.align 3 +SYM_FUNC_START(sm4_ce_cbc_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nblocks + */ + PREPARE; + + ld1 {RIV.16b}, [x3]; + +.Lcbc_enc_loop: + sub w4, w4, #1; + + ld1 {RTMP0.16b}, [x2], #16; + eor RIV.16b, RIV.16b, RTMP0.16b; + + SM4_CRYPT_BLK(RIV); + + st1 {RIV.16b}, [x1], #16; + + cbnz w4, .Lcbc_enc_loop; + + /* store new IV */ + st1 {RIV.16b}, [x3]; + + ret; +SYM_FUNC_END(sm4_ce_cbc_enc) + +.align 3 +SYM_FUNC_START(sm4_ce_cbc_dec) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nblocks + */ + PREPARE; + + ld1 {RIV.16b}, [x3]; + +.Lcbc_loop_blk: + sub w4, w4, #8; + tbnz w4, #31, .Lcbc_tail8; + + ld1 {v0.16b-v3.16b}, [x2], #64; + ld1 {v4.16b-v7.16b}, [x2]; + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + sub x2, x2, #64; + eor v0.16b, v0.16b, RIV.16b; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v1.16b, v1.16b, RTMP0.16b; + eor v2.16b, v2.16b, RTMP1.16b; + eor v3.16b, v3.16b, RTMP2.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + eor v4.16b, v4.16b, RTMP3.16b; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v5.16b, v5.16b, RTMP0.16b; + eor v6.16b, v6.16b, RTMP1.16b; + eor v7.16b, v7.16b, RTMP2.16b; + + mov RIV.16b, RTMP3.16b; + st1 {v4.16b-v7.16b}, [x1], #64; + + cbz w4, .Lcbc_end; + b .Lcbc_loop_blk; + +.Lcbc_tail8: + add w4, w4, #8; + cmp w4, #4; + blt .Lcbc_tail4; + + sub w4, w4, #4; + + ld1 {v0.16b-v3.16b}, [x2]; + + SM4_CRYPT_BLK4(v0, v1, v2, v3); + + eor v0.16b, v0.16b, RIV.16b; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v1.16b, v1.16b, RTMP0.16b; + eor v2.16b, v2.16b, RTMP1.16b; + eor v3.16b, v3.16b, RTMP2.16b; + + mov RIV.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + cbz w4, .Lcbc_end; + +.Lcbc_tail4: + sub w4, w4, #1; + + ld1 {v0.16b}, [x2]; + + SM4_CRYPT_BLK(v0); + + eor v0.16b, v0.16b, RIV.16b; + ld1 {RIV.16b}, [x2], #16; + st1 {v0.16b}, [x1], #16; + + cbnz w4, .Lcbc_tail4; + +.Lcbc_end: + /* store new IV */ + st1 {RIV.16b}, [x3]; + + ret; +SYM_FUNC_END(sm4_ce_cbc_dec) + +.align 3 +SYM_FUNC_START(sm4_ce_cfb_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nblocks + */ + PREPARE; + + ld1 {RIV.16b}, [x3]; + +.Lcfb_enc_loop: + sub w4, w4, #1; + + SM4_CRYPT_BLK(RIV); + + ld1 {RTMP0.16b}, [x2], #16; + eor RIV.16b, RIV.16b, RTMP0.16b; + st1 {RIV.16b}, [x1], #16; + + cbnz w4, .Lcfb_enc_loop; + + /* store new IV */ + st1 {RIV.16b}, [x3]; + + ret; +SYM_FUNC_END(sm4_ce_cfb_enc) + +.align 3 +SYM_FUNC_START(sm4_ce_cfb_dec) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nblocks + */ + PREPARE; + + ld1 {v0.16b}, [x3]; + +.Lcfb_loop_blk: + sub w4, w4, #8; + tbnz w4, #31, .Lcfb_tail8; + + ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48; + ld1 {v4.16b-v7.16b}, [x2]; + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + sub x2, x2, #48; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v0.16b, v0.16b, RTMP0.16b; + eor v1.16b, v1.16b, RTMP1.16b; + eor v2.16b, v2.16b, RTMP2.16b; + eor v3.16b, v3.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v4.16b, v4.16b, RTMP0.16b; + eor v5.16b, v5.16b, RTMP1.16b; + eor v6.16b, v6.16b, RTMP2.16b; + eor v7.16b, v7.16b, RTMP3.16b; + st1 {v4.16b-v7.16b}, [x1], #64; + + mov v0.16b, RTMP3.16b; + + cbz w4, .Lcfb_end; + b .Lcfb_loop_blk; + +.Lcfb_tail8: + add w4, w4, #8; + cmp w4, #4; + blt .Lcfb_tail4; + + sub w4, w4, #4; + + ld1 {v1.16b, v2.16b, v3.16b}, [x2]; + + SM4_CRYPT_BLK4(v0, v1, v2, v3); + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v0.16b, v0.16b, RTMP0.16b; + eor v1.16b, v1.16b, RTMP1.16b; + eor v2.16b, v2.16b, RTMP2.16b; + eor v3.16b, v3.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + mov v0.16b, RTMP3.16b; + + cbz w4, .Lcfb_end; + +.Lcfb_tail4: + sub w4, w4, #1; + + SM4_CRYPT_BLK(v0); + + ld1 {RTMP0.16b}, [x2], #16; + eor v0.16b, v0.16b, RTMP0.16b; + st1 {v0.16b}, [x1], #16; + + mov v0.16b, RTMP0.16b; + + cbnz w4, .Lcfb_tail4; + +.Lcfb_end: + /* store new IV */ + st1 {v0.16b}, [x3]; + + ret; +SYM_FUNC_END(sm4_ce_cfb_dec) + +.align 3 +SYM_FUNC_START(sm4_ce_ctr_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: ctr (big endian, 128 bit) + * w4: nblocks + */ + PREPARE; + + ldp x7, x8, [x3]; + rev x7, x7; + rev x8, x8; + +.Lctr_loop_blk: + sub w4, w4, #8; + tbnz w4, #31, .Lctr_tail8; + +#define inc_le128(vctr) \ + mov vctr.d[1], x8; \ + mov vctr.d[0], x7; \ + adds x8, x8, #1; \ + adc x7, x7, xzr; \ + rev64 vctr.16b, vctr.16b; + + /* construct CTRs */ + inc_le128(v0); /* +0 */ + inc_le128(v1); /* +1 */ + inc_le128(v2); /* +2 */ + inc_le128(v3); /* +3 */ + inc_le128(v4); /* +4 */ + inc_le128(v5); /* +5 */ + inc_le128(v6); /* +6 */ + inc_le128(v7); /* +7 */ + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v0.16b, v0.16b, RTMP0.16b; + eor v1.16b, v1.16b, RTMP1.16b; + eor v2.16b, v2.16b, RTMP2.16b; + eor v3.16b, v3.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v4.16b, v4.16b, RTMP0.16b; + eor v5.16b, v5.16b, RTMP1.16b; + eor v6.16b, v6.16b, RTMP2.16b; + eor v7.16b, v7.16b, RTMP3.16b; + st1 {v4.16b-v7.16b}, [x1], #64; + + cbz w4, .Lctr_end; + b .Lctr_loop_blk; + +.Lctr_tail8: + add w4, w4, #8; + cmp w4, #4; + blt .Lctr_tail4; + + sub w4, w4, #4; + + /* construct CTRs */ + inc_le128(v0); /* +0 */ + inc_le128(v1); /* +1 */ + inc_le128(v2); /* +2 */ + inc_le128(v3); /* +3 */ + + SM4_CRYPT_BLK4(v0, v1, v2, v3); + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v0.16b, v0.16b, RTMP0.16b; + eor v1.16b, v1.16b, RTMP1.16b; + eor v2.16b, v2.16b, RTMP2.16b; + eor v3.16b, v3.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + cbz w4, .Lctr_end; + +.Lctr_tail4: + sub w4, w4, #1; + + /* construct CTRs */ + inc_le128(v0); + + SM4_CRYPT_BLK(v0); + + ld1 {RTMP0.16b}, [x2], #16; + eor v0.16b, v0.16b, RTMP0.16b; + st1 {v0.16b}, [x1], #16; + + cbnz w4, .Lctr_tail4; + +.Lctr_end: + /* store new CTR */ + rev x7, x7; + rev x8, x8; + stp x7, x8, [x3]; + + ret; +SYM_FUNC_END(sm4_ce_ctr_enc) diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c new file mode 100644 index 000000000000..496d55c0d01a --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, using ARMv8 Crypto Extensions + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2022, Alibaba Group. + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BYTES2BLKS(nbytes) ((nbytes) >> 4) + +asmlinkage void sm4_ce_expand_key(const u8 *key, u32 *rkey_enc, u32 *rkey_dec, + const u32 *fk, const u32 *ck); +asmlinkage void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src); +asmlinkage void sm4_ce_crypt(const u32 *rkey, u8 *dst, const u8 *src, + unsigned int nblks); +asmlinkage void sm4_ce_cbc_enc(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); +asmlinkage void sm4_ce_cbc_dec(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); +asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); +asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); +asmlinkage void sm4_ce_ctr_enc(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); + +static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + return 0; +} + +static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) +{ + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_crypt(rkey, dst, src, nblks); + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_ecb_do_crypt(req, ctx->rkey_enc); +} + +static int sm4_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_ecb_do_crypt(req, ctx->rkey_dec); +} + +static int sm4_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, walk.iv, nblks); + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_cfb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_cfb_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_cfb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_cfb_dec(ctx->rkey_enc, dst, src, walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_ctr_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static struct skcipher_alg sm4_algs[] = { + { + .base = { + .cra_name = "ecb(sm4)", + .cra_driver_name = "ecb-sm4-ce", + .cra_priority = 400, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_ecb_encrypt, + .decrypt = sm4_ecb_decrypt, + }, { + .base = { + .cra_name = "cbc(sm4)", + .cra_driver_name = "cbc-sm4-ce", + .cra_priority = 400, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = sm4_cbc_decrypt, + }, { + .base = { + .cra_name = "cfb(sm4)", + .cra_driver_name = "cfb-sm4-ce", + .cra_priority = 400, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = sm4_cfb_decrypt, + }, { + .base = { + .cra_name = "ctr(sm4)", + .cra_driver_name = "ctr-sm4-ce", + .cra_priority = 400, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_ctr_crypt, + .decrypt = sm4_ctr_crypt, + } +}; + +static int __init sm4_init(void) +{ + return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); +} + +static void __exit sm4_exit(void) +{ + crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); +} + +module_cpu_feature_match(SM4, sm4_init); +module_exit(sm4_exit); + +MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 Crypto Extensions"); +MODULE_ALIAS_CRYPTO("sm4-ce"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("ecb(sm4)"); +MODULE_ALIAS_CRYPTO("cbc(sm4)"); +MODULE_ALIAS_CRYPTO("cfb(sm4)"); +MODULE_ALIAS_CRYPTO("ctr(sm4)"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From b45b0a12200893732a0b0ec4a6df18521fd976ad Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 11 Apr 2022 11:13:13 +0800 Subject: crypto: arm64/sm4 - Fix wrong dependency of NEON/CE implementation Commit d2825fa9365d ("crypto: sm3,sm4 - move into crypto directory") moved the sm4 library implementation from the lib/crypto directory to the crypto directory and configured the name as CRYPTO_SM4. The arm64 SM4 NEON/CE implementation depends on this and needs to be modified uniformly. Fixes: 4f1aef9b806f ("crypto: arm64/sm4 - add ARMv8 NEON implementation") Fixes: 5b33e0ec881c ("crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation") Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 4fe7037d2347..ac85682c013c 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -57,13 +57,13 @@ config CRYPTO_SM4_ARM64_CE_BLK tristate "SM4 in ECB/CBC/CFB/CTR modes using ARMv8 Crypto Extensions" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER - select CRYPTO_LIB_SM4 + select CRYPTO_SM4 config CRYPTO_SM4_ARM64_NEON_BLK tristate "SM4 in ECB/CBC/CFB/CTR modes using NEON instructions" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER - select CRYPTO_LIB_SM4 + select CRYPTO_SM4 config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" -- cgit v1.2.3