diff options
Diffstat (limited to 'arch/x86/crypto/aesni-intel_glue.c')
-rw-r--r-- | arch/x86/crypto/aesni-intel_glue.c | 493 |
1 files changed, 239 insertions, 254 deletions
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index fbf43482e1f5..bc655d794a95 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -23,7 +23,6 @@ #include <linux/err.h> #include <crypto/algapi.h> #include <crypto/aes.h> -#include <crypto/ctr.h> #include <crypto/b128ops.h> #include <crypto/gcm.h> #include <crypto/xts.h> @@ -82,30 +81,8 @@ asmlinkage void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); #ifdef CONFIG_X86_64 - asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); -DEFINE_STATIC_CALL(aesni_ctr_enc_tfm, aesni_ctr_enc); - -asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, - void *keys, u8 *out, unsigned int num_bytes); -asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, - void *keys, u8 *out, unsigned int num_bytes); -asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv, - void *keys, u8 *out, unsigned int num_bytes); - - -asmlinkage void aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, - const void *keys, u8 *out, unsigned int num_bytes, - unsigned int byte_ctr); - -asmlinkage void aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, - const void *keys, u8 *out, unsigned int num_bytes, - unsigned int byte_ctr); - -asmlinkage void aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, - const void *keys, u8 *out, unsigned int num_bytes, - unsigned int byte_ctr); #endif static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) @@ -376,24 +353,8 @@ static int cts_cbc_decrypt(struct skcipher_request *req) } #ifdef CONFIG_X86_64 -static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv) -{ - /* - * based on key length, override with the by8 version - * of ctr mode encryption/decryption for improved performance - * aes_set_key_common() ensures that key length is one of - * {128,192,256} - */ - if (ctx->key_length == AES_KEYSIZE_128) - aes_ctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len); - else if (ctx->key_length == AES_KEYSIZE_192) - aes_ctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len); - else - aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len); -} - -static int ctr_crypt(struct skcipher_request *req) +/* This is the non-AVX version. */ +static int ctr_crypt_aesni(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); @@ -407,10 +368,9 @@ static int ctr_crypt(struct skcipher_request *req) while ((nbytes = walk.nbytes) > 0) { kernel_fpu_begin(); if (nbytes & AES_BLOCK_MASK) - static_call(aesni_ctr_enc_tfm)(ctx, walk.dst.virt.addr, - walk.src.virt.addr, - nbytes & AES_BLOCK_MASK, - walk.iv); + aesni_ctr_enc(ctx, walk.dst.virt.addr, + walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); nbytes &= ~AES_BLOCK_MASK; if (walk.nbytes == walk.total && nbytes > 0) { @@ -426,59 +386,6 @@ static int ctr_crypt(struct skcipher_request *req) } return err; } - -static void aesni_xctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv, - unsigned int byte_ctr) -{ - if (ctx->key_length == AES_KEYSIZE_128) - aes_xctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len, - byte_ctr); - else if (ctx->key_length == AES_KEYSIZE_192) - aes_xctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len, - byte_ctr); - else - aes_xctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len, - byte_ctr); -} - -static int xctr_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); - u8 keystream[AES_BLOCK_SIZE]; - struct skcipher_walk walk; - unsigned int nbytes; - unsigned int byte_ctr = 0; - int err; - __le32 block[AES_BLOCK_SIZE / sizeof(__le32)]; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes) > 0) { - kernel_fpu_begin(); - if (nbytes & AES_BLOCK_MASK) - aesni_xctr_enc_avx_tfm(ctx, walk.dst.virt.addr, - walk.src.virt.addr, nbytes & AES_BLOCK_MASK, - walk.iv, byte_ctr); - nbytes &= ~AES_BLOCK_MASK; - byte_ctr += walk.nbytes - nbytes; - - if (walk.nbytes == walk.total && nbytes > 0) { - memcpy(block, walk.iv, AES_BLOCK_SIZE); - block[0] ^= cpu_to_le32(1 + byte_ctr / AES_BLOCK_SIZE); - aesni_enc(ctx, keystream, (u8 *)block); - crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes - - nbytes, walk.src.virt.addr + walk.nbytes - - nbytes, keystream, nbytes); - byte_ctr += nbytes; - nbytes = 0; - } - kernel_fpu_end(); - err = skcipher_walk_done(&walk, nbytes); - } - return err; -} #endif static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key, @@ -505,7 +412,7 @@ static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key, typedef void (*xts_encrypt_iv_func)(const struct crypto_aes_ctx *tweak_key, u8 iv[AES_BLOCK_SIZE]); typedef void (*xts_crypt_func)(const struct crypto_aes_ctx *key, - const u8 *src, u8 *dst, unsigned int len, + const u8 *src, u8 *dst, int len, u8 tweak[AES_BLOCK_SIZE]); /* This handles cases where the source and/or destination span pages. */ @@ -581,11 +488,8 @@ xts_crypt(struct skcipher_request *req, xts_encrypt_iv_func encrypt_iv, { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); - const unsigned int cryptlen = req->cryptlen; - struct scatterlist *src = req->src; - struct scatterlist *dst = req->dst; - if (unlikely(cryptlen < AES_BLOCK_SIZE)) + if (unlikely(req->cryptlen < AES_BLOCK_SIZE)) return -EINVAL; kernel_fpu_begin(); @@ -593,23 +497,16 @@ xts_crypt(struct skcipher_request *req, xts_encrypt_iv_func encrypt_iv, /* * In practice, virtually all XTS plaintexts and ciphertexts are either - * 512 or 4096 bytes, aligned such that they don't span page boundaries. - * To optimize the performance of these cases, and also any other case - * where no page boundary is spanned, the below fast-path handles - * single-page sources and destinations as efficiently as possible. + * 512 or 4096 bytes and do not use multiple scatterlist elements. To + * optimize the performance of these cases, the below fast-path handles + * single-scatterlist-element messages as efficiently as possible. The + * code is 64-bit specific, as it assumes no page mapping is needed. */ - if (likely(src->length >= cryptlen && dst->length >= cryptlen && - src->offset + cryptlen <= PAGE_SIZE && - dst->offset + cryptlen <= PAGE_SIZE)) { - struct page *src_page = sg_page(src); - struct page *dst_page = sg_page(dst); - void *src_virt = kmap_local_page(src_page) + src->offset; - void *dst_virt = kmap_local_page(dst_page) + dst->offset; - - (*crypt_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen, - req->iv); - kunmap_local(dst_virt); - kunmap_local(src_virt); + if (IS_ENABLED(CONFIG_X86_64) && + likely(req->src->length >= req->cryptlen && + req->dst->length >= req->cryptlen)) { + (*crypt_func)(&ctx->crypt_ctx, sg_virt(req->src), + sg_virt(req->dst), req->cryptlen, req->iv); kernel_fpu_end(); return 0; } @@ -624,14 +521,14 @@ static void aesni_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, } static void aesni_xts_encrypt(const struct crypto_aes_ctx *key, - const u8 *src, u8 *dst, unsigned int len, + const u8 *src, u8 *dst, int len, u8 tweak[AES_BLOCK_SIZE]) { aesni_xts_enc(key, dst, src, len, tweak); } static void aesni_xts_decrypt(const struct crypto_aes_ctx *key, - const u8 *src, u8 *dst, unsigned int len, + const u8 *src, u8 *dst, int len, u8 tweak[AES_BLOCK_SIZE]) { aesni_xts_dec(key, dst, src, len, tweak); @@ -731,8 +628,8 @@ static struct skcipher_alg aesni_skciphers[] = { .ivsize = AES_BLOCK_SIZE, .chunksize = AES_BLOCK_SIZE, .setkey = aesni_skcipher_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, + .encrypt = ctr_crypt_aesni, + .decrypt = ctr_crypt_aesni, #endif }, { .base = { @@ -758,42 +655,112 @@ static struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)]; #ifdef CONFIG_X86_64 -/* - * XCTR does not have a non-AVX implementation, so it must be enabled - * conditionally. - */ -static struct skcipher_alg aesni_xctr = { - .base = { - .cra_name = "__xctr(aes)", - .cra_driver_name = "__xctr-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = CRYPTO_AES_CTX_SIZE, - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .chunksize = AES_BLOCK_SIZE, - .setkey = aesni_skcipher_setkey, - .encrypt = xctr_crypt, - .decrypt = xctr_crypt, -}; - -static struct simd_skcipher_alg *aesni_simd_xctr; - asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, u8 iv[AES_BLOCK_SIZE]); -#define DEFINE_XTS_ALG(suffix, driver_name, priority) \ +/* __always_inline to avoid indirect call */ +static __always_inline int +ctr_crypt(struct skcipher_request *req, + void (*ctr64_func)(const struct crypto_aes_ctx *key, + const u8 *src, u8 *dst, int len, + const u64 le_ctr[2])) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct crypto_aes_ctx *key = aes_ctx(crypto_skcipher_ctx(tfm)); + unsigned int nbytes, p1_nbytes, nblocks; + struct skcipher_walk walk; + u64 le_ctr[2]; + u64 ctr64; + int err; + + ctr64 = le_ctr[0] = get_unaligned_be64(&req->iv[8]); + le_ctr[1] = get_unaligned_be64(&req->iv[0]); + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) != 0) { + if (nbytes < walk.total) { + /* Not the end yet, so keep the length block-aligned. */ + nbytes = round_down(nbytes, AES_BLOCK_SIZE); + nblocks = nbytes / AES_BLOCK_SIZE; + } else { + /* It's the end, so include any final partial block. */ + nblocks = DIV_ROUND_UP(nbytes, AES_BLOCK_SIZE); + } + ctr64 += nblocks; + + kernel_fpu_begin(); + if (likely(ctr64 >= nblocks)) { + /* The low 64 bits of the counter won't overflow. */ + (*ctr64_func)(key, walk.src.virt.addr, + walk.dst.virt.addr, nbytes, le_ctr); + } else { + /* + * The low 64 bits of the counter will overflow. The + * assembly doesn't handle this case, so split the + * operation into two at the point where the overflow + * will occur. After the first part, add the carry bit. + */ + p1_nbytes = min_t(unsigned int, nbytes, + (nblocks - ctr64) * AES_BLOCK_SIZE); + (*ctr64_func)(key, walk.src.virt.addr, + walk.dst.virt.addr, p1_nbytes, le_ctr); + le_ctr[0] = 0; + le_ctr[1]++; + (*ctr64_func)(key, walk.src.virt.addr + p1_nbytes, + walk.dst.virt.addr + p1_nbytes, + nbytes - p1_nbytes, le_ctr); + } + kernel_fpu_end(); + le_ctr[0] = ctr64; + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + put_unaligned_be64(ctr64, &req->iv[8]); + put_unaligned_be64(le_ctr[1], &req->iv[0]); + + return err; +} + +/* __always_inline to avoid indirect call */ +static __always_inline int +xctr_crypt(struct skcipher_request *req, + void (*xctr_func)(const struct crypto_aes_ctx *key, + const u8 *src, u8 *dst, int len, + const u8 iv[AES_BLOCK_SIZE], u64 ctr)) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct crypto_aes_ctx *key = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; + u64 ctr = 1; + int err; + + err = skcipher_walk_virt(&walk, req, false); + while ((nbytes = walk.nbytes) != 0) { + if (nbytes < walk.total) + nbytes = round_down(nbytes, AES_BLOCK_SIZE); + + kernel_fpu_begin(); + (*xctr_func)(key, walk.src.virt.addr, walk.dst.virt.addr, + nbytes, req->iv, ctr); + kernel_fpu_end(); + + ctr += DIV_ROUND_UP(nbytes, AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + return err; +} + +#define DEFINE_AVX_SKCIPHER_ALGS(suffix, driver_name_suffix, priority) \ \ asmlinkage void \ aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ - u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \ + u8 *dst, int len, u8 tweak[AES_BLOCK_SIZE]); \ asmlinkage void \ aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ - u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \ + u8 *dst, int len, u8 tweak[AES_BLOCK_SIZE]); \ \ static int xts_encrypt_##suffix(struct skcipher_request *req) \ { \ @@ -805,32 +772,80 @@ static int xts_decrypt_##suffix(struct skcipher_request *req) \ return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_decrypt_##suffix); \ } \ \ -static struct skcipher_alg aes_xts_alg_##suffix = { \ - .base = { \ - .cra_name = "__xts(aes)", \ - .cra_driver_name = "__" driver_name, \ - .cra_priority = priority, \ - .cra_flags = CRYPTO_ALG_INTERNAL, \ - .cra_blocksize = AES_BLOCK_SIZE, \ - .cra_ctxsize = XTS_AES_CTX_SIZE, \ - .cra_module = THIS_MODULE, \ - }, \ - .min_keysize = 2 * AES_MIN_KEY_SIZE, \ - .max_keysize = 2 * AES_MAX_KEY_SIZE, \ - .ivsize = AES_BLOCK_SIZE, \ - .walksize = 2 * AES_BLOCK_SIZE, \ - .setkey = xts_setkey_aesni, \ - .encrypt = xts_encrypt_##suffix, \ - .decrypt = xts_decrypt_##suffix, \ -}; \ +asmlinkage void \ +aes_ctr64_crypt_##suffix(const struct crypto_aes_ctx *key, \ + const u8 *src, u8 *dst, int len, const u64 le_ctr[2]);\ + \ +static int ctr_crypt_##suffix(struct skcipher_request *req) \ +{ \ + return ctr_crypt(req, aes_ctr64_crypt_##suffix); \ +} \ + \ +asmlinkage void \ +aes_xctr_crypt_##suffix(const struct crypto_aes_ctx *key, \ + const u8 *src, u8 *dst, int len, \ + const u8 iv[AES_BLOCK_SIZE], u64 ctr); \ \ -static struct simd_skcipher_alg *aes_xts_simdalg_##suffix +static int xctr_crypt_##suffix(struct skcipher_request *req) \ +{ \ + return xctr_crypt(req, aes_xctr_crypt_##suffix); \ +} \ + \ +static struct skcipher_alg skcipher_algs_##suffix[] = {{ \ + .base.cra_name = "__xts(aes)", \ + .base.cra_driver_name = "__xts-aes-" driver_name_suffix, \ + .base.cra_priority = priority, \ + .base.cra_flags = CRYPTO_ALG_INTERNAL, \ + .base.cra_blocksize = AES_BLOCK_SIZE, \ + .base.cra_ctxsize = XTS_AES_CTX_SIZE, \ + .base.cra_module = THIS_MODULE, \ + .min_keysize = 2 * AES_MIN_KEY_SIZE, \ + .max_keysize = 2 * AES_MAX_KEY_SIZE, \ + .ivsize = AES_BLOCK_SIZE, \ + .walksize = 2 * AES_BLOCK_SIZE, \ + .setkey = xts_setkey_aesni, \ + .encrypt = xts_encrypt_##suffix, \ + .decrypt = xts_decrypt_##suffix, \ +}, { \ + .base.cra_name = "__ctr(aes)", \ + .base.cra_driver_name = "__ctr-aes-" driver_name_suffix, \ + .base.cra_priority = priority, \ + .base.cra_flags = CRYPTO_ALG_INTERNAL, \ + .base.cra_blocksize = 1, \ + .base.cra_ctxsize = CRYPTO_AES_CTX_SIZE, \ + .base.cra_module = THIS_MODULE, \ + .min_keysize = AES_MIN_KEY_SIZE, \ + .max_keysize = AES_MAX_KEY_SIZE, \ + .ivsize = AES_BLOCK_SIZE, \ + .chunksize = AES_BLOCK_SIZE, \ + .setkey = aesni_skcipher_setkey, \ + .encrypt = ctr_crypt_##suffix, \ + .decrypt = ctr_crypt_##suffix, \ +}, { \ + .base.cra_name = "__xctr(aes)", \ + .base.cra_driver_name = "__xctr-aes-" driver_name_suffix, \ + .base.cra_priority = priority, \ + .base.cra_flags = CRYPTO_ALG_INTERNAL, \ + .base.cra_blocksize = 1, \ + .base.cra_ctxsize = CRYPTO_AES_CTX_SIZE, \ + .base.cra_module = THIS_MODULE, \ + .min_keysize = AES_MIN_KEY_SIZE, \ + .max_keysize = AES_MAX_KEY_SIZE, \ + .ivsize = AES_BLOCK_SIZE, \ + .chunksize = AES_BLOCK_SIZE, \ + .setkey = aesni_skcipher_setkey, \ + .encrypt = xctr_crypt_##suffix, \ + .decrypt = xctr_crypt_##suffix, \ +}}; \ + \ +static struct simd_skcipher_alg * \ +simd_skcipher_algs_##suffix[ARRAY_SIZE(skcipher_algs_##suffix)] -DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500); +DEFINE_AVX_SKCIPHER_ALGS(aesni_avx, "aesni-avx", 500); #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) -DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600); -DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700); -DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800); +DEFINE_AVX_SKCIPHER_ALGS(vaes_avx2, "vaes-avx2", 600); +DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_256, "vaes-avx10_256", 700); +DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_512, "vaes-avx10_512", 800); #endif /* The common part of the x86_64 AES-GCM key struct */ @@ -1291,41 +1306,40 @@ static void gcm_process_assoc(const struct aes_gcm_key *key, u8 ghash_acc[16], scatterwalk_start(&walk, sg_src); while (assoclen) { - unsigned int len_this_page = scatterwalk_clamp(&walk, assoclen); - void *mapped = scatterwalk_map(&walk); - const void *src = mapped; + unsigned int orig_len_this_step = scatterwalk_next( + &walk, assoclen); + unsigned int len_this_step = orig_len_this_step; unsigned int len; + const u8 *src = walk.addr; - assoclen -= len_this_page; - scatterwalk_advance(&walk, len_this_page); if (unlikely(pos)) { - len = min(len_this_page, 16 - pos); + len = min(len_this_step, 16 - pos); memcpy(&buf[pos], src, len); pos += len; src += len; - len_this_page -= len; + len_this_step -= len; if (pos < 16) goto next; aes_gcm_aad_update(key, ghash_acc, buf, 16, flags); pos = 0; } - len = len_this_page; + len = len_this_step; if (unlikely(assoclen)) /* Not the last segment yet? */ len = round_down(len, 16); aes_gcm_aad_update(key, ghash_acc, src, len, flags); src += len; - len_this_page -= len; - if (unlikely(len_this_page)) { - memcpy(buf, src, len_this_page); - pos = len_this_page; + len_this_step -= len; + if (unlikely(len_this_step)) { + memcpy(buf, src, len_this_step); + pos = len_this_step; } next: - scatterwalk_unmap(mapped); - scatterwalk_pagedone(&walk, 0, assoclen); + scatterwalk_done_src(&walk, orig_len_this_step); if (need_resched()) { kernel_fpu_end(); kernel_fpu_begin(); } + assoclen -= orig_len_this_step; } if (unlikely(pos)) aes_gcm_aad_update(key, ghash_acc, buf, pos, flags); @@ -1536,34 +1550,15 @@ DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512, AES_GCM_KEY_AVX10_SIZE, 800); #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ -/* - * This is a list of CPU models that are known to suffer from downclocking when - * zmm registers (512-bit vectors) are used. On these CPUs, the AES mode - * implementations with zmm registers won't be used by default. Implementations - * with ymm registers (256-bit vectors) will be used by default instead. - */ -static const struct x86_cpu_id zmm_exclusion_list[] = { - X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), - X86_MATCH_VFM(INTEL_ICELAKE_X, 0), - X86_MATCH_VFM(INTEL_ICELAKE_D, 0), - X86_MATCH_VFM(INTEL_ICELAKE, 0), - X86_MATCH_VFM(INTEL_ICELAKE_L, 0), - X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), - X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), - X86_MATCH_VFM(INTEL_TIGERLAKE, 0), - /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ - /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */ - {}, -}; - static int __init register_avx_algs(void) { int err; if (!boot_cpu_has(X86_FEATURE_AVX)) return 0; - err = simd_register_skciphers_compat(&aes_xts_alg_aesni_avx, 1, - &aes_xts_simdalg_aesni_avx); + err = simd_register_skciphers_compat(skcipher_algs_aesni_avx, + ARRAY_SIZE(skcipher_algs_aesni_avx), + simd_skcipher_algs_aesni_avx); if (err) return err; err = simd_register_aeads_compat(aes_gcm_algs_aesni_avx, @@ -1571,6 +1566,12 @@ static int __init register_avx_algs(void) aes_gcm_simdalgs_aesni_avx); if (err) return err; + /* + * Note: not all the algorithms registered below actually require + * VPCLMULQDQ. But in practice every CPU with VAES also has VPCLMULQDQ. + * Similarly, the assembler support was added at about the same time. + * For simplicity, just always check for VAES and VPCLMULQDQ together. + */ #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_VAES) || @@ -1578,8 +1579,9 @@ static int __init register_avx_algs(void) !boot_cpu_has(X86_FEATURE_PCLMULQDQ) || !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) return 0; - err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx2, 1, - &aes_xts_simdalg_vaes_avx2); + err = simd_register_skciphers_compat(skcipher_algs_vaes_avx2, + ARRAY_SIZE(skcipher_algs_vaes_avx2), + simd_skcipher_algs_vaes_avx2); if (err) return err; @@ -1590,8 +1592,9 @@ static int __init register_avx_algs(void) XFEATURE_MASK_AVX512, NULL)) return 0; - err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_256, 1, - &aes_xts_simdalg_vaes_avx10_256); + err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_256, + ARRAY_SIZE(skcipher_algs_vaes_avx10_256), + simd_skcipher_algs_vaes_avx10_256); if (err) return err; err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_256, @@ -1600,16 +1603,18 @@ static int __init register_avx_algs(void) if (err) return err; - if (x86_match_cpu(zmm_exclusion_list)) { + if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) { int i; - aes_xts_alg_vaes_avx10_512.base.cra_priority = 1; + for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx10_512); i++) + skcipher_algs_vaes_avx10_512[i].base.cra_priority = 1; for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++) aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1; } - err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512, 1, - &aes_xts_simdalg_vaes_avx10_512); + err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_512, + ARRAY_SIZE(skcipher_algs_vaes_avx10_512), + simd_skcipher_algs_vaes_avx10_512); if (err) return err; err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_512, @@ -1623,27 +1628,31 @@ static int __init register_avx_algs(void) static void unregister_avx_algs(void) { - if (aes_xts_simdalg_aesni_avx) - simd_unregister_skciphers(&aes_xts_alg_aesni_avx, 1, - &aes_xts_simdalg_aesni_avx); + if (simd_skcipher_algs_aesni_avx[0]) + simd_unregister_skciphers(skcipher_algs_aesni_avx, + ARRAY_SIZE(skcipher_algs_aesni_avx), + simd_skcipher_algs_aesni_avx); if (aes_gcm_simdalgs_aesni_avx[0]) simd_unregister_aeads(aes_gcm_algs_aesni_avx, ARRAY_SIZE(aes_gcm_algs_aesni_avx), aes_gcm_simdalgs_aesni_avx); #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) - if (aes_xts_simdalg_vaes_avx2) - simd_unregister_skciphers(&aes_xts_alg_vaes_avx2, 1, - &aes_xts_simdalg_vaes_avx2); - if (aes_xts_simdalg_vaes_avx10_256) - simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1, - &aes_xts_simdalg_vaes_avx10_256); + if (simd_skcipher_algs_vaes_avx2[0]) + simd_unregister_skciphers(skcipher_algs_vaes_avx2, + ARRAY_SIZE(skcipher_algs_vaes_avx2), + simd_skcipher_algs_vaes_avx2); + if (simd_skcipher_algs_vaes_avx10_256[0]) + simd_unregister_skciphers(skcipher_algs_vaes_avx10_256, + ARRAY_SIZE(skcipher_algs_vaes_avx10_256), + simd_skcipher_algs_vaes_avx10_256); if (aes_gcm_simdalgs_vaes_avx10_256[0]) simd_unregister_aeads(aes_gcm_algs_vaes_avx10_256, ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256), aes_gcm_simdalgs_vaes_avx10_256); - if (aes_xts_simdalg_vaes_avx10_512) - simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512, 1, - &aes_xts_simdalg_vaes_avx10_512); + if (simd_skcipher_algs_vaes_avx10_512[0]) + simd_unregister_skciphers(skcipher_algs_vaes_avx10_512, + ARRAY_SIZE(skcipher_algs_vaes_avx10_512), + simd_skcipher_algs_vaes_avx10_512); if (aes_gcm_simdalgs_vaes_avx10_512[0]) simd_unregister_aeads(aes_gcm_algs_vaes_avx10_512, ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512), @@ -1676,13 +1685,6 @@ static int __init aesni_init(void) if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; -#ifdef CONFIG_X86_64 - if (boot_cpu_has(X86_FEATURE_AVX)) { - /* optimize performance of ctr mode encryption transform */ - static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm); - pr_info("AES CTR mode by8 optimization enabled\n"); - } -#endif /* CONFIG_X86_64 */ err = crypto_register_alg(&aesni_cipher_alg); if (err) @@ -1700,14 +1702,6 @@ static int __init aesni_init(void) if (err) goto unregister_skciphers; -#ifdef CONFIG_X86_64 - if (boot_cpu_has(X86_FEATURE_AVX)) - err = simd_register_skciphers_compat(&aesni_xctr, 1, - &aesni_simd_xctr); - if (err) - goto unregister_aeads; -#endif /* CONFIG_X86_64 */ - err = register_avx_algs(); if (err) goto unregister_avx; @@ -1716,11 +1710,6 @@ static int __init aesni_init(void) unregister_avx: unregister_avx_algs(); -#ifdef CONFIG_X86_64 - if (aesni_simd_xctr) - simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); -unregister_aeads: -#endif /* CONFIG_X86_64 */ simd_unregister_aeads(aes_gcm_algs_aesni, ARRAY_SIZE(aes_gcm_algs_aesni), aes_gcm_simdalgs_aesni); @@ -1740,10 +1729,6 @@ static void __exit aesni_exit(void) simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), aesni_simd_skciphers); crypto_unregister_alg(&aesni_cipher_alg); -#ifdef CONFIG_X86_64 - if (boot_cpu_has(X86_FEATURE_AVX)) - simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); -#endif /* CONFIG_X86_64 */ unregister_avx_algs(); } |