diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-08 06:44:48 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-08 06:44:48 -0400 |
commit | 87d7bcee4f5973a593b0d50134364cfe5652ff33 (patch) | |
tree | 677125896b64de2f5acfa204955442f58e74cfa9 | |
parent | 0223f9aaef94a09ffc0b6abcba732e62a483b88c (diff) | |
parent | be34c4ef693ff5c10f55606dbd656ddf0b4a8340 (diff) | |
download | lwn-87d7bcee4f5973a593b0d50134364cfe5652ff33.tar.gz lwn-87d7bcee4f5973a593b0d50134364cfe5652ff33.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
- add multibuffer infrastructure (single_task_running scheduler helper,
OKed by Peter on lkml.
- add SHA1 multibuffer implementation for AVX2.
- reenable "by8" AVX CTR optimisation after fixing counter overflow.
- add APM X-Gene SoC RNG support.
- SHA256/SHA512 now handles unaligned input correctly.
- set lz4 decompressed length correctly.
- fix algif socket buffer allocation failure for 64K page machines.
- misc fixes
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (47 commits)
crypto: sha - Handle unaligned input data in generic sha256 and sha512.
Revert "crypto: aesni - disable "by8" AVX CTR optimization"
crypto: aesni - remove unused defines in "by8" variant
crypto: aesni - fix counter overflow handling in "by8" variant
hwrng: printk replacement
crypto: qat - Removed unneeded partial state
crypto: qat - Fix typo in name of tasklet_struct
crypto: caam - Dynamic allocation of addresses for various memory blocks in CAAM.
crypto: mcryptd - Fix typos in CRYPTO_MCRYPTD description
crypto: algif - avoid excessive use of socket buffer in skcipher
arm64: dts: add random number generator dts node to APM X-Gene platform.
Documentation: rng: Add X-Gene SoC RNG driver documentation
hwrng: xgene - add support for APM X-Gene SoC RNG support
crypto: mv_cesa - Add missing #define
crypto: testmgr - add test for lz4 and lz4hc
crypto: lz4,lz4hc - fix decompression
crypto: qat - Use pci_enable_msix_exact() instead of pci_enable_msix()
crypto: drbg - fix maximum value checks on 32 bit systems
crypto: drbg - fix sparse warning for cpu_to_be[32|64]
crypto: sha-mb - sha1_mb_alg_state can be static
...
50 files changed, 4706 insertions, 837 deletions
diff --git a/Documentation/devicetree/bindings/rng/apm,rng.txt b/Documentation/devicetree/bindings/rng/apm,rng.txt new file mode 100644 index 000000000000..4dde4b06cdd9 --- /dev/null +++ b/Documentation/devicetree/bindings/rng/apm,rng.txt @@ -0,0 +1,17 @@ +APM X-Gene SoC random number generator. + +Required properties: + +- compatible : should be "apm,xgene-rng" +- reg : specifies base physical address and size of the registers map +- clocks : phandle to clock-controller plus clock-specifier pair +- interrupts : specify the fault interrupt for the RNG device + +Example: + + rng: rng@10520000 { + compatible = "apm,xgene-rng"; + reg = <0x0 0x10520000 0x0 0x100>; + interrupts = <0x0 0x41 0x4>; + clocks = <&rngpkaclk 0>; + }; diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index c0aceef7f5b3..f391972ad135 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -269,6 +269,19 @@ enable-mask = <0x2>; clock-output-names = "rtcclk"; }; + + rngpkaclk: rngpkaclk@17000000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x17000000 0x0 0x2000>; + reg-names = "csr-reg"; + csr-offset = <0xc>; + csr-mask = <0x10>; + enable-offset = <0x10>; + enable-mask = <0x10>; + clock-output-names = "rngpkaclk"; + }; }; serial0: serial@1c020000 { @@ -421,5 +434,13 @@ }; }; + + rng: rng@10520000 { + compatible = "apm,xgene-rng"; + reg = <0x0 0x10520000 0x0 0x100>; + interrupts = <0x0 0x41 0x4>; + clocks = <&rngpkaclk 0>; + }; + }; }; diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index d551165a3159..fd0f848938cc 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o +obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/ obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index f091f122ed24..2df2a0298f5a 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -79,9 +79,6 @@ #define xcounter %xmm8 #define xbyteswap %xmm9 #define xkey0 %xmm10 -#define xkey3 %xmm11 -#define xkey6 %xmm12 -#define xkey9 %xmm13 #define xkey4 %xmm11 #define xkey8 %xmm12 #define xkey12 %xmm13 @@ -108,6 +105,10 @@ byteswap_const: .octa 0x000102030405060708090A0B0C0D0E0F +ddq_low_msk: + .octa 0x0000000000000000FFFFFFFFFFFFFFFF +ddq_high_add_1: + .octa 0x00000000000000010000000000000000 ddq_add_1: .octa 0x00000000000000000000000000000001 ddq_add_2: @@ -169,7 +170,12 @@ ddq_add_8: .rept (by - 1) club DDQ_DATA, i club XDATA, i - vpaddd var_ddq_add(%rip), xcounter, var_xdata + vpaddq var_ddq_add(%rip), xcounter, var_xdata + vptest ddq_low_msk(%rip), var_xdata + jnz 1f + vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata + vpaddq ddq_high_add_1(%rip), xcounter, xcounter + 1: vpshufb xbyteswap, var_xdata, var_xdata .set i, (i +1) .endr @@ -178,7 +184,11 @@ ddq_add_8: vpxor xkey0, xdata0, xdata0 club DDQ_DATA, by - vpaddd var_ddq_add(%rip), xcounter, xcounter + vpaddq var_ddq_add(%rip), xcounter, xcounter + vptest ddq_low_msk(%rip), xcounter + jnz 1f + vpaddq ddq_high_add_1(%rip), xcounter, xcounter + 1: .set i, 1 .rept (by - 1) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index a7ccd57f19e4..888950f29fd9 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -481,7 +481,7 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx, crypto_inc(ctrblk, AES_BLOCK_SIZE); } -#if 0 /* temporary disabled due to failing crypto tests */ +#ifdef CONFIG_AS_AVX static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv) { @@ -1522,7 +1522,7 @@ static int __init aesni_init(void) aesni_gcm_dec_tfm = aesni_gcm_dec; } aesni_ctr_enc_tfm = aesni_ctr_enc; -#if 0 /* temporary disabled due to failing crypto tests */ +#ifdef CONFIG_AS_AVX if (cpu_has_avx) { /* optimize performance of ctr mode encryption transform */ aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm; diff --git a/arch/x86/crypto/sha-mb/Makefile b/arch/x86/crypto/sha-mb/Makefile new file mode 100644 index 000000000000..2f8756375df5 --- /dev/null +++ b/arch/x86/crypto/sha-mb/Makefile @@ -0,0 +1,11 @@ +# +# Arch-specific CryptoAPI modules. +# + +avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ + $(comma)4)$(comma)%ymm2,yes,no) +ifeq ($(avx2_supported),yes) + obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o + sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \ + sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o +endif diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c new file mode 100644 index 000000000000..99eefd812958 --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -0,0 +1,935 @@ +/* + * Multi buffer SHA1 algorithm Glue Code + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Tim Chen <tim.c.chen@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <linux/list.h> +#include <crypto/scatterwalk.h> +#include <crypto/sha.h> +#include <crypto/mcryptd.h> +#include <crypto/crypto_wq.h> +#include <asm/byteorder.h> +#include <asm/i387.h> +#include <asm/xcr.h> +#include <asm/xsave.h> +#include <linux/hardirq.h> +#include <asm/fpu-internal.h> +#include "sha_mb_ctx.h" + +#define FLUSH_INTERVAL 1000 /* in usec */ + +static struct mcryptd_alg_state sha1_mb_alg_state; + +struct sha1_mb_ctx { + struct mcryptd_ahash *mcryptd_tfm; +}; + +static inline struct mcryptd_hash_request_ctx *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx) +{ + struct shash_desc *desc; + + desc = container_of((void *) hash_ctx, struct shash_desc, __ctx); + return container_of(desc, struct mcryptd_hash_request_ctx, desc); +} + +static inline struct ahash_request *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) +{ + return container_of((void *) ctx, struct ahash_request, __ctx); +} + +static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, + struct shash_desc *desc) +{ + rctx->flag = HASH_UPDATE; +} + +static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state); +static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)(struct sha1_mb_mgr *state, + struct job_sha1 *job); +static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)(struct sha1_mb_mgr *state); +static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)(struct sha1_mb_mgr *state); + +inline void sha1_init_digest(uint32_t *digest) +{ + static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0, + SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }; + memcpy(digest, initial_digest, sizeof(initial_digest)); +} + +inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], + uint32_t total_len) +{ + uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1); + + memset(&padblock[i], 0, SHA1_BLOCK_SIZE); + padblock[i] = 0x80; + + i += ((SHA1_BLOCK_SIZE - 1) & + (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1))) + + 1 + SHA1_PADLENGTHFIELD_SIZE; + +#if SHA1_PADLENGTHFIELD_SIZE == 16 + *((uint64_t *) &padblock[i - 16]) = 0; +#endif + + *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); + + /* Number of extra blocks to hash */ + return i >> SHA1_LOG2_BLOCK_SIZE; +} + +static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, struct sha1_hash_ctx *ctx) +{ + while (ctx) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { + /* Clear PROCESSING bit */ + ctx->status = HASH_CTX_STS_COMPLETE; + return ctx; + } + + /* + * If the extra blocks are empty, begin hashing what remains + * in the user's buffer. + */ + if (ctx->partial_block_buffer_length == 0 && + ctx->incoming_buffer_length) { + + const void *buffer = ctx->incoming_buffer; + uint32_t len = ctx->incoming_buffer_length; + uint32_t copy_len; + + /* + * Only entire blocks can be hashed. + * Copy remainder to extra blocks buffer. + */ + copy_len = len & (SHA1_BLOCK_SIZE-1); + + if (copy_len) { + len -= copy_len; + memcpy(ctx->partial_block_buffer, + ((const char *) buffer + len), + copy_len); + ctx->partial_block_buffer_length = copy_len; + } + + ctx->incoming_buffer_length = 0; + + /* len should be a multiple of the block size now */ + assert((len % SHA1_BLOCK_SIZE) == 0); + + /* Set len to the number of blocks to be hashed */ + len >>= SHA1_LOG2_BLOCK_SIZE; + + if (len) { + + ctx->job.buffer = (uint8_t *) buffer; + ctx->job.len = len; + ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, + &ctx->job); + continue; + } + } + + /* + * If the extra blocks are not empty, then we are + * either on the last block(s) or we need more + * user input before continuing. + */ + if (ctx->status & HASH_CTX_STS_LAST) { + + uint8_t *buf = ctx->partial_block_buffer; + uint32_t n_extra_blocks = sha1_pad(buf, ctx->total_length); + + ctx->status = (HASH_CTX_STS_PROCESSING | + HASH_CTX_STS_COMPLETE); + ctx->job.buffer = buf; + ctx->job.len = (uint32_t) n_extra_blocks; + ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, &ctx->job); + continue; + } + + if (ctx) + ctx->status = HASH_CTX_STS_IDLE; + return ctx; + } + + return NULL; +} + +static struct sha1_hash_ctx *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr) +{ + /* + * If get_comp_job returns NULL, there are no jobs complete. + * If get_comp_job returns a job, verify that it is safe to return to the user. + * If it is not ready, resubmit the job to finish processing. + * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned. + * Otherwise, all jobs currently being managed by the hash_ctx_mgr still need processing. + */ + struct sha1_hash_ctx *ctx; + + ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr); + return sha1_ctx_mgr_resubmit(mgr, ctx); +} + +static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr) +{ + sha1_job_mgr_init(&mgr->mgr); +} + +static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, + struct sha1_hash_ctx *ctx, + const void *buffer, + uint32_t len, + int flags) +{ + if (flags & (~HASH_ENTIRE)) { + /* User should not pass anything other than FIRST, UPDATE, or LAST */ + ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; + return ctx; + } + + if (ctx->status & HASH_CTX_STS_PROCESSING) { + /* Cannot submit to a currently processing job. */ + ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; + return ctx; + } + + if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + /* Cannot update a finished job. */ + ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; + return ctx; + } + + + if (flags & HASH_FIRST) { + /* Init digest */ + sha1_init_digest(ctx->job.result_digest); + + /* Reset byte counter */ + ctx->total_length = 0; + + /* Clear extra blocks */ + ctx->partial_block_buffer_length = 0; + } + + /* If we made it here, there were no errors during this call to submit */ + ctx->error = HASH_CTX_ERROR_NONE; + + /* Store buffer ptr info from user */ + ctx->incoming_buffer = buffer; + ctx->incoming_buffer_length = len; + + /* Store the user's request flags and mark this ctx as currently being processed. */ + ctx->status = (flags & HASH_LAST) ? + (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : + HASH_CTX_STS_PROCESSING; + + /* Advance byte counter */ + ctx->total_length += len; + + /* + * If there is anything currently buffered in the extra blocks, + * append to it until it contains a whole block. + * Or if the user's buffer contains less than a whole block, + * append as much as possible to the extra block. + */ + if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) { + /* Compute how many bytes to copy from user buffer into extra block */ + uint32_t copy_len = SHA1_BLOCK_SIZE - ctx->partial_block_buffer_length; + if (len < copy_len) + copy_len = len; + + if (copy_len) { + /* Copy and update relevant pointers and counters */ + memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length], + buffer, copy_len); + + ctx->partial_block_buffer_length += copy_len; + ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len); + ctx->incoming_buffer_length = len - copy_len; + } + + /* The extra block should never contain more than 1 block here */ + assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE); + + /* If the extra block buffer contains exactly 1 block, it can be hashed. */ + if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) { + ctx->partial_block_buffer_length = 0; + + ctx->job.buffer = ctx->partial_block_buffer; + ctx->job.len = 1; + ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, &ctx->job); + } + } + + return sha1_ctx_mgr_resubmit(mgr, ctx); +} + +static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr) +{ + struct sha1_hash_ctx *ctx; + + while (1) { + ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr); + + /* If flush returned 0, there are no more jobs in flight. */ + if (!ctx) + return NULL; + + /* + * If flush returned a job, resubmit the job to finish processing. + */ + ctx = sha1_ctx_mgr_resubmit(mgr, ctx); + + /* + * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned. + * Otherwise, all jobs currently being managed by the sha1_ctx_mgr + * still need processing. Loop. + */ + if (ctx) + return ctx; + } +} + +static int sha1_mb_init(struct shash_desc *desc) +{ + struct sha1_hash_ctx *sctx = shash_desc_ctx(desc); + + hash_ctx_init(sctx); + sctx->job.result_digest[0] = SHA1_H0; + sctx->job.result_digest[1] = SHA1_H1; + sctx->job.result_digest[2] = SHA1_H2; + sctx->job.result_digest[3] = SHA1_H3; + sctx->job.result_digest[4] = SHA1_H4; + sctx->total_length = 0; + sctx->partial_block_buffer_length = 0; + sctx->status = HASH_CTX_STS_IDLE; + + return 0; +} + +static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx) +{ + int i; + struct sha1_hash_ctx *sctx = shash_desc_ctx(&rctx->desc); + __be32 *dst = (__be32 *) rctx->out; + + for (i = 0; i < 5; ++i) + dst[i] = cpu_to_be32(sctx->job.result_digest[i]); + + return 0; +} + +static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, + struct mcryptd_alg_cstate *cstate, bool flush) +{ + int flag = HASH_UPDATE; + int nbytes, err = 0; + struct mcryptd_hash_request_ctx *rctx = *ret_rctx; + struct sha1_hash_ctx *sha_ctx; + + /* more work ? */ + while (!(rctx->flag & HASH_DONE)) { + nbytes = crypto_ahash_walk_done(&rctx->walk, 0); + if (nbytes < 0) { + err = nbytes; + goto out; + } + /* check if the walk is done */ + if (crypto_ahash_walk_last(&rctx->walk)) { + rctx->flag |= HASH_DONE; + if (rctx->flag & HASH_FINAL) + flag |= HASH_LAST; + + } + sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(&rctx->desc); + kernel_fpu_begin(); + sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag); + if (!sha_ctx) { + if (flush) + sha_ctx = sha1_ctx_mgr_flush(cstate->mgr); + } + kernel_fpu_end(); + if (sha_ctx) + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + else { + rctx = NULL; + goto out; + } + } + + /* copy the results */ + if (rctx->flag & HASH_FINAL) + sha1_mb_set_results(rctx); + +out: + *ret_rctx = rctx; + return err; +} + +static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, + struct mcryptd_alg_cstate *cstate, + int err) +{ + struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); + struct sha1_hash_ctx *sha_ctx; + struct mcryptd_hash_request_ctx *req_ctx; + int ret; + + /* remove from work list */ + spin_lock(&cstate->work_lock); + list_del(&rctx->waiter); + spin_unlock(&cstate->work_lock); + + if (irqs_disabled()) + rctx->complete(&req->base, err); + else { + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); + } + + /* check to see if there are other jobs that are done */ + sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr); + while (sha_ctx) { + req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&req_ctx, cstate, false); + if (req_ctx) { + spin_lock(&cstate->work_lock); + list_del(&req_ctx->waiter); + spin_unlock(&cstate->work_lock); + + req = cast_mcryptd_ctx_to_req(req_ctx); + if (irqs_disabled()) + rctx->complete(&req->base, ret); + else { + local_bh_disable(); + rctx->complete(&req->base, ret); + local_bh_enable(); + } + } + sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr); + } + + return 0; +} + +static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx, + struct mcryptd_alg_cstate *cstate) +{ + unsigned long next_flush; + unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); + + /* initialize tag */ + rctx->tag.arrival = jiffies; /* tag the arrival time */ + rctx->tag.seq_num = cstate->next_seq_num++; + next_flush = rctx->tag.arrival + delay; + rctx->tag.expire = next_flush; + + spin_lock(&cstate->work_lock); + list_add_tail(&rctx->waiter, &cstate->work_list); + spin_unlock(&cstate->work_lock); + + mcryptd_arm_flusher(cstate, delay); +} + +static int sha1_mb_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct mcryptd_hash_request_ctx *rctx = + container_of(desc, struct mcryptd_hash_request_ctx, desc); + struct mcryptd_alg_cstate *cstate = + this_cpu_ptr(sha1_mb_alg_state.alg_cstate); + + struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); + struct sha1_hash_ctx *sha_ctx; + int ret = 0, nbytes; + + + /* sanity check */ + if (rctx->tag.cpu != smp_processor_id()) { + pr_err("mcryptd error: cpu clash\n"); + goto done; + } + + /* need to init context */ + req_ctx_init(rctx, desc); + + nbytes = crypto_ahash_walk_first(req, &rctx->walk); + + if (nbytes < 0) { + ret = nbytes; + goto done; + } + + if (crypto_ahash_walk_last(&rctx->walk)) + rctx->flag |= HASH_DONE; + + /* submit */ + sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc); + sha1_mb_add_list(rctx, cstate); + kernel_fpu_begin(); + sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, HASH_UPDATE); + kernel_fpu_end(); + + /* check if anything is returned */ + if (!sha_ctx) + return -EINPROGRESS; + + if (sha_ctx->error) { + ret = sha_ctx->error; + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + goto done; + } + + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&rctx, cstate, false); + + if (!rctx) + return -EINPROGRESS; +done: + sha_complete_job(rctx, cstate, ret); + return ret; +} + +static int sha1_mb_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct mcryptd_hash_request_ctx *rctx = + container_of(desc, struct mcryptd_hash_request_ctx, desc); + struct mcryptd_alg_cstate *cstate = + this_cpu_ptr(sha1_mb_alg_state.alg_cstate); + + struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); + struct sha1_hash_ctx *sha_ctx; + int ret = 0, flag = HASH_UPDATE, nbytes; + + /* sanity check */ + if (rctx->tag.cpu != smp_processor_id()) { + pr_err("mcryptd error: cpu clash\n"); + goto done; + } + + /* need to init context */ + req_ctx_init(rctx, desc); + + nbytes = crypto_ahash_walk_first(req, &rctx->walk); + + if (nbytes < 0) { + ret = nbytes; + goto done; + } + + if (crypto_ahash_walk_last(&rctx->walk)) { + rctx->flag |= HASH_DONE; + flag = HASH_LAST; + } + rctx->out = out; + + /* submit */ + rctx->flag |= HASH_FINAL; + sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc); + sha1_mb_add_list(rctx, cstate); + + kernel_fpu_begin(); + sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag); + kernel_fpu_end(); + + /* check if anything is returned */ + if (!sha_ctx) + return -EINPROGRESS; + + if (sha_ctx->error) { + ret = sha_ctx->error; + goto done; + } + + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&rctx, cstate, false); + if (!rctx) + return -EINPROGRESS; +done: + sha_complete_job(rctx, cstate, ret); + return ret; +} + +static int sha1_mb_final(struct shash_desc *desc, u8 *out) +{ + struct mcryptd_hash_request_ctx *rctx = + container_of(desc, struct mcryptd_hash_request_ctx, desc); + struct mcryptd_alg_cstate *cstate = + this_cpu_ptr(sha1_mb_alg_state.alg_cstate); + + struct sha1_hash_ctx *sha_ctx; + int ret = 0; + u8 data; + + /* sanity check */ + if (rctx->tag.cpu != smp_processor_id()) { + pr_err("mcryptd error: cpu clash\n"); + goto done; + } + + /* need to init context */ + req_ctx_init(rctx, desc); + + rctx->out = out; + rctx->flag |= HASH_DONE | HASH_FINAL; + + sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc); + /* flag HASH_FINAL and 0 data size */ + sha1_mb_add_list(rctx, cstate); + kernel_fpu_begin(); + sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, HASH_LAST); + kernel_fpu_end(); + + /* check if anything is returned */ + if (!sha_ctx) + return -EINPROGRESS; + + if (sha_ctx->error) { + ret = sha_ctx->error; + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + goto done; + } + + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&rctx, cstate, false); + if (!rctx) + return -EINPROGRESS; +done: + sha_complete_job(rctx, cstate, ret); + return ret; +} + +static int sha1_mb_export(struct shash_desc *desc, void *out) +{ + struct sha1_hash_ctx *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int sha1_mb_import(struct shash_desc *desc, const void *in) +{ + struct sha1_hash_ctx *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + + +static struct shash_alg sha1_mb_shash_alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_mb_init, + .update = sha1_mb_update, + .final = sha1_mb_final, + .finup = sha1_mb_finup, + .export = sha1_mb_export, + .import = sha1_mb_import, + .descsize = sizeof(struct sha1_hash_ctx), + .statesize = sizeof(struct sha1_hash_ctx), + .base = { + .cra_name = "__sha1-mb", + .cra_driver_name = "__intel_sha1-mb", + .cra_priority = 100, + /* + * use ASYNC flag as some buffers in multi-buffer + * algo may not have completed before hashing thread sleep + */ + .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list), + } +}; + +static int sha1_mb_async_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_init(mcryptd_req); +} + +static int sha1_mb_async_update(struct ahash_request *req) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_update(mcryptd_req); +} + +static int sha1_mb_async_finup(struct ahash_request *req) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_finup(mcryptd_req); +} + +static int sha1_mb_async_final(struct ahash_request *req) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_final(mcryptd_req); +} + +static int sha1_mb_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_digest(mcryptd_req); +} + +static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm) +{ + struct mcryptd_ahash *mcryptd_tfm; + struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); + struct mcryptd_hash_ctx *mctx; + + mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", 0, 0); + if (IS_ERR(mcryptd_tfm)) + return PTR_ERR(mcryptd_tfm); + mctx = crypto_ahash_ctx(&mcryptd_tfm->base); + mctx->alg_state = &sha1_mb_alg_state; + ctx->mcryptd_tfm = mcryptd_tfm; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(&mcryptd_tfm->base)); + + return 0; +} + +static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm) +{ + struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); + + mcryptd_free_ahash(ctx->mcryptd_tfm); +} + +static struct ahash_alg sha1_mb_async_alg = { + .init = sha1_mb_async_init, + .update = sha1_mb_async_update, + .final = sha1_mb_async_final, + .finup = sha1_mb_async_finup, + .digest = sha1_mb_async_digest, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1_mb", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_type = &crypto_ahash_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list), + .cra_init = sha1_mb_async_init_tfm, + .cra_exit = sha1_mb_async_exit_tfm, + .cra_ctxsize = sizeof(struct sha1_mb_ctx), + .cra_alignmask = 0, + }, + }, +}; + +static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate) +{ + struct mcryptd_hash_request_ctx *rctx; + unsigned long cur_time; + unsigned long next_flush = 0; + struct sha1_hash_ctx *sha_ctx; + + + cur_time = jiffies; + + while (!list_empty(&cstate->work_list)) { + rctx = list_entry(cstate->work_list.next, + struct mcryptd_hash_request_ctx, waiter); + if time_before(cur_time, rctx->tag.expire) + break; + kernel_fpu_begin(); + sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr); + kernel_fpu_end(); + if (!sha_ctx) { + pr_err("sha1_mb error: nothing got flushed for non-empty list\n"); + break; + } + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + sha_finish_walk(&rctx, cstate, true); + sha_complete_job(rctx, cstate, 0); + } + + if (!list_empty(&cstate->work_list)) { + rctx = list_entry(cstate->work_list.next, + struct mcryptd_hash_request_ctx, waiter); + /* get the hash context and then flush time */ + next_flush = rctx->tag.expire; + mcryptd_arm_flusher(cstate, get_delay(next_flush)); + } + return next_flush; +} + +static int __init sha1_mb_mod_init(void) +{ + + int cpu; + int err; + struct mcryptd_alg_cstate *cpu_state; + + /* check for dependent cpu features */ + if (!boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_BMI2)) + return -ENODEV; + + /* initialize multibuffer structures */ + sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate); + + sha1_job_mgr_init = sha1_mb_mgr_init_avx2; + sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2; + sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2; + sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2; + + if (!sha1_mb_alg_state.alg_cstate) + return -ENOMEM; + for_each_possible_cpu(cpu) { + cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); + cpu_state->next_flush = 0; + cpu_state->next_seq_num = 0; + cpu_state->flusher_engaged = false; + INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); + cpu_state->cpu = cpu; + cpu_state->alg_state = &sha1_mb_alg_state; + cpu_state->mgr = (struct sha1_ctx_mgr *) kzalloc(sizeof(struct sha1_ctx_mgr), GFP_KERNEL); + if (!cpu_state->mgr) + goto err2; + sha1_ctx_mgr_init(cpu_state->mgr); + INIT_LIST_HEAD(&cpu_state->work_list); + spin_lock_init(&cpu_state->work_lock); + } + sha1_mb_alg_state.flusher = &sha1_mb_flusher; + + err = crypto_register_shash(&sha1_mb_shash_alg); + if (err) + goto err2; + err = crypto_register_ahash(&sha1_mb_async_alg); + if (err) + goto err1; + + + return 0; +err1: + crypto_unregister_shash(&sha1_mb_shash_alg); +err2: + for_each_possible_cpu(cpu) { + cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); + kfree(cpu_state->mgr); + } + free_percpu(sha1_mb_alg_state.alg_cstate); + return -ENODEV; +} + +static void __exit sha1_mb_mod_fini(void) +{ + int cpu; + struct mcryptd_alg_cstate *cpu_state; + + crypto_unregister_ahash(&sha1_mb_async_alg); + crypto_unregister_shash(&sha1_mb_shash_alg); + for_each_possible_cpu(cpu) { + cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); + kfree(cpu_state->mgr); + } + free_percpu(sha1_mb_alg_state.alg_cstate); +} + +module_init(sha1_mb_mod_init); +module_exit(sha1_mb_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated"); + +MODULE_ALIAS("sha1"); diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S new file mode 100644 index 000000000000..86688c6e7a25 --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S @@ -0,0 +1,287 @@ +/* + * Header file for multi buffer SHA1 algorithm data structure + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford <james.guilford@intel.com> + * Tim Chen <tim.c.chen@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +# Macros for defining data structures + +# Usage example + +#START_FIELDS # JOB_AES +### name size align +#FIELD _plaintext, 8, 8 # pointer to plaintext +#FIELD _ciphertext, 8, 8 # pointer to ciphertext +#FIELD _IV, 16, 8 # IV +#FIELD _keys, 8, 8 # pointer to keys +#FIELD _len, 4, 4 # length in bytes +#FIELD _status, 4, 4 # status enumeration +#FIELD _user_data, 8, 8 # pointer to user data +#UNION _union, size1, align1, \ +# size2, align2, \ +# size3, align3, \ +# ... +#END_FIELDS +#%assign _JOB_AES_size _FIELD_OFFSET +#%assign _JOB_AES_align _STRUCT_ALIGN + +######################################################################### + +# Alternate "struc-like" syntax: +# STRUCT job_aes2 +# RES_Q .plaintext, 1 +# RES_Q .ciphertext, 1 +# RES_DQ .IV, 1 +# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN +# RES_U .union, size1, align1, \ +# size2, align2, \ +# ... +# ENDSTRUCT +# # Following only needed if nesting +# %assign job_aes2_size _FIELD_OFFSET +# %assign job_aes2_align _STRUCT_ALIGN +# +# RES_* macros take a name, a count and an optional alignment. +# The count in in terms of the base size of the macro, and the +# default alignment is the base size. +# The macros are: +# Macro Base size +# RES_B 1 +# RES_W 2 +# RES_D 4 +# RES_Q 8 +# RES_DQ 16 +# RES_Y 32 +# RES_Z 64 +# +# RES_U defines a union. It's arguments are a name and two or more +# pairs of "size, alignment" +# +# The two assigns are only needed if this structure is being nested +# within another. Even if the assigns are not done, one can still use +# STRUCT_NAME_size as the size of the structure. +# +# Note that for nesting, you still need to assign to STRUCT_NAME_size. +# +# The differences between this and using "struc" directly are that each +# type is implicitly aligned to its natural length (although this can be +# over-ridden with an explicit third parameter), and that the structure +# is padded at the end to its overall alignment. +# + +######################################################################### + +#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_ +#define _SHA1_MB_MGR_DATASTRUCT_ASM_ + +## START_FIELDS +.macro START_FIELDS + _FIELD_OFFSET = 0 + _STRUCT_ALIGN = 0 +.endm + +## FIELD name size align +.macro FIELD name size align + _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) + \name = _FIELD_OFFSET + _FIELD_OFFSET = _FIELD_OFFSET + (\size) +.if (\align > _STRUCT_ALIGN) + _STRUCT_ALIGN = \align +.endif +.endm + +## END_FIELDS +.macro END_FIELDS + _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) +.endm + +######################################################################## + +.macro STRUCT p1 +START_FIELDS +.struc \p1 +.endm + +.macro ENDSTRUCT + tmp = _FIELD_OFFSET + END_FIELDS + tmp = (_FIELD_OFFSET - %%tmp) +.if (tmp > 0) + .lcomm tmp +.endif +.endstruc +.endm + +## RES_int name size align +.macro RES_int p1 p2 p3 + name = \p1 + size = \p2 + align = .\p3 + + _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) +.align align +.lcomm name size + _FIELD_OFFSET = _FIELD_OFFSET + (size) +.if (align > _STRUCT_ALIGN) + _STRUCT_ALIGN = align +.endif +.endm + + + +# macro RES_B name, size [, align] +.macro RES_B _name, _size, _align=1 +RES_int _name _size _align +.endm + +# macro RES_W name, size [, align] +.macro RES_W _name, _size, _align=2 +RES_int _name 2*(_size) _align +.endm + +# macro RES_D name, size [, align] +.macro RES_D _name, _size, _align=4 +RES_int _name 4*(_size) _align +.endm + +# macro RES_Q name, size [, align] +.macro RES_Q _name, _size, _align=8 +RES_int _name 8*(_size) _align +.endm + +# macro RES_DQ name, size [, align] +.macro RES_DQ _name, _size, _align=16 +RES_int _name 16*(_size) _align +.endm + +# macro RES_Y name, size [, align] +.macro RES_Y _name, _size, _align=32 +RES_int _name 32*(_size) _align +.endm + +# macro RES_Z name, size [, align] +.macro RES_Z _name, _size, _align=64 +RES_int _name 64*(_size) _align +.endm + + +#endif + +######################################################################## +#### Define constants +######################################################################## + +######################################################################## +#### Define SHA1 Out Of Order Data Structures +######################################################################## + +START_FIELDS # LANE_DATA +### name size align +FIELD _job_in_lane, 8, 8 # pointer to job object +END_FIELDS + +_LANE_DATA_size = _FIELD_OFFSET +_LANE_DATA_align = _STRUCT_ALIGN + +######################################################################## + +START_FIELDS # SHA1_ARGS_X8 +### name size align +FIELD _digest, 4*5*8, 16 # transposed digest +FIELD _data_ptr, 8*8, 8 # array of pointers to data +END_FIELDS + +_SHA1_ARGS_X4_size = _FIELD_OFFSET +_SHA1_ARGS_X4_align = _STRUCT_ALIGN +_SHA1_ARGS_X8_size = _FIELD_OFFSET +_SHA1_ARGS_X8_align = _STRUCT_ALIGN + +######################################################################## + +START_FIELDS # MB_MGR +### name size align +FIELD _args, _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align +FIELD _lens, 4*8, 8 +FIELD _unused_lanes, 8, 8 +FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align +END_FIELDS + +_MB_MGR_size = _FIELD_OFFSET +_MB_MGR_align = _STRUCT_ALIGN + +_args_digest = _args + _digest +_args_data_ptr = _args + _data_ptr + + +######################################################################## +#### Define constants +######################################################################## + +#define STS_UNKNOWN 0 +#define STS_BEING_PROCESSED 1 +#define STS_COMPLETED 2 + +######################################################################## +#### Define JOB_SHA1 structure +######################################################################## + +START_FIELDS # JOB_SHA1 + +### name size align +FIELD _buffer, 8, 8 # pointer to buffer +FIELD _len, 4, 4 # length in bytes +FIELD _result_digest, 5*4, 32 # Digest (output) +FIELD _status, 4, 4 +FIELD _user_data, 8, 8 +END_FIELDS + +_JOB_SHA1_size = _FIELD_OFFSET +_JOB_SHA1_align = _STRUCT_ALIGN diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S new file mode 100644 index 000000000000..85c4e1cf7172 --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S @@ -0,0 +1,327 @@ +/* + * Flush routine for SHA1 multibuffer + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford <james.guilford@intel.com> + * Tim Chen <tim.c.chen@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <linux/linkage.h> +#include "sha1_mb_mgr_datastruct.S" + + +.extern sha1_x8_avx2 + +# LINUX register definitions +#define arg1 %rdi +#define arg2 %rsi + +# Common definitions +#define state arg1 +#define job arg2 +#define len2 arg2 + +# idx must be a register not clobbered by sha1_x8_avx2 +#define idx %r8 +#define DWORD_idx %r8d + +#define unused_lanes %rbx +#define lane_data %rbx +#define tmp2 %rbx +#define tmp2_w %ebx + +#define job_rax %rax +#define tmp1 %rax +#define size_offset %rax +#define tmp %rax +#define start_offset %rax + +#define tmp3 %arg1 + +#define extra_blocks %arg2 +#define p %arg2 + + +# STACK_SPACE needs to be an odd multiple of 8 +_XMM_SAVE_SIZE = 10*16 +_GPR_SAVE_SIZE = 8*8 +_ALIGN_SIZE = 8 + +_XMM_SAVE = 0 +_GPR_SAVE = _XMM_SAVE + _XMM_SAVE_SIZE +STACK_SPACE = _GPR_SAVE + _GPR_SAVE_SIZE + _ALIGN_SIZE + +.macro LABEL prefix n +\prefix\n\(): +.endm + +.macro JNE_SKIP i +jne skip_\i +.endm + +.altmacro +.macro SET_OFFSET _offset +offset = \_offset +.endm +.noaltmacro + +# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state) +# arg 1 : rcx : state +ENTRY(sha1_mb_mgr_flush_avx2) + mov %rsp, %r10 + sub $STACK_SPACE, %rsp + and $~31, %rsp + mov %rbx, _GPR_SAVE(%rsp) + mov %r10, _GPR_SAVE+8*1(%rsp) #save rsp + mov %rbp, _GPR_SAVE+8*3(%rsp) + mov %r12, _GPR_SAVE+8*4(%rsp) + mov %r13, _GPR_SAVE+8*5(%rsp) + mov %r14, _GPR_SAVE+8*6(%rsp) + mov %r15, _GPR_SAVE+8*7(%rsp) + + # If bit (32+3) is set, then all lanes are empty + mov _unused_lanes(state), unused_lanes + bt $32+3, unused_lanes + jc return_null + + # find a lane with a non-null job + xor idx, idx + offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne one(%rip), idx + offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne two(%rip), idx + offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne three(%rip), idx + offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne four(%rip), idx + offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne five(%rip), idx + offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne six(%rip), idx + offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne seven(%rip), idx + + # copy idx to empty lanes +copy_lane_data: + offset = (_args + _data_ptr) + mov offset(state,idx,8), tmp + + I = 0 +.rep 8 + offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) +.altmacro + JNE_SKIP %I + offset = (_args + _data_ptr + 8*I) + mov tmp, offset(state) + offset = (_lens + 4*I) + movl $0xFFFFFFFF, offset(state) +LABEL skip_ %I + I = (I+1) +.noaltmacro +.endr + + # Find min length + vmovdqa _lens+0*16(state), %xmm0 + vmovdqa _lens+1*16(state), %xmm1 + + vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} + vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} + vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword + + vmovd %xmm2, DWORD_idx + mov idx, len2 + and $0xF, idx + shr $4, len2 + jz len_is_0 + + vpand clear_low_nibble(%rip), %xmm2, %xmm2 + vpshufd $0, %xmm2, %xmm2 + + vpsubd %xmm2, %xmm0, %xmm0 + vpsubd %xmm2, %xmm1, %xmm1 + + vmovdqa %xmm0, _lens+0*16(state) + vmovdqa %xmm1, _lens+1*16(state) + + # "state" and "args" are the same address, arg1 + # len is arg2 + call sha1_x8_avx2 + # state and idx are intact + + +len_is_0: + # process completed job "idx" + imul $_LANE_DATA_size, idx, lane_data + lea _ldata(state, lane_data), lane_data + + mov _job_in_lane(lane_data), job_rax + movq $0, _job_in_lane(lane_data) + movl $STS_COMPLETED, _status(job_rax) + mov _unused_lanes(state), unused_lanes + shl $4, unused_lanes + or idx, unused_lanes + mov unused_lanes, _unused_lanes(state) + + movl $0xFFFFFFFF, _lens(state, idx, 4) + + vmovd _args_digest(state , idx, 4) , %xmm0 + vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 + vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 + vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 + movl _args_digest+4*32(state, idx, 4), tmp2_w + + vmovdqu %xmm0, _result_digest(job_rax) + offset = (_result_digest + 1*16) + mov tmp2_w, offset(job_rax) + +return: + + mov _GPR_SAVE(%rsp), %rbx + mov _GPR_SAVE+8*1(%rsp), %r10 #saved rsp + mov _GPR_SAVE+8*3(%rsp), %rbp + mov _GPR_SAVE+8*4(%rsp), %r12 + mov _GPR_SAVE+8*5(%rsp), %r13 + mov _GPR_SAVE+8*6(%rsp), %r14 + mov _GPR_SAVE+8*7(%rsp), %r15 + mov %r10, %rsp + + ret + +return_null: + xor job_rax, job_rax + jmp return +ENDPROC(sha1_mb_mgr_flush_avx2) + + +################################################################# + +.align 16 +ENTRY(sha1_mb_mgr_get_comp_job_avx2) + push %rbx + + ## if bit 32+3 is set, then all lanes are empty + mov _unused_lanes(state), unused_lanes + bt $(32+3), unused_lanes + jc .return_null + + # Find min length + vmovdqa _lens(state), %xmm0 + vmovdqa _lens+1*16(state), %xmm1 + + vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} + vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} + vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword + + vmovd %xmm2, DWORD_idx + test $~0xF, idx + jnz .return_null + + # process completed job "idx" + imul $_LANE_DATA_size, idx, lane_data + lea _ldata(state, lane_data), lane_data + + mov _job_in_lane(lane_data), job_rax + movq $0, _job_in_lane(lane_data) + movl $STS_COMPLETED, _status(job_rax) + mov _unused_lanes(state), unused_lanes + shl $4, unused_lanes + or idx, unused_lanes + mov unused_lanes, _unused_lanes(state) + + movl $0xFFFFFFFF, _lens(state, idx, 4) + + vmovd _args_digest(state, idx, 4), %xmm0 + vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 + vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 + vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 + movl _args_digest+4*32(state, idx, 4), tmp2_w + + vmovdqu %xmm0, _result_digest(job_rax) + movl tmp2_w, _result_digest+1*16(job_rax) + + pop %rbx + + ret + +.return_null: + xor job_rax, job_rax + pop %rbx + ret +ENDPROC(sha1_mb_mgr_get_comp_job_avx2) + +.data + +.align 16 +clear_low_nibble: +.octa 0x000000000000000000000000FFFFFFF0 +one: +.quad 1 +two: +.quad 2 +three: +.quad 3 +four: +.quad 4 +five: +.quad 5 +six: +.quad 6 +seven: +.quad 7 diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c new file mode 100644 index 000000000000..4ca7e166a2aa --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c @@ -0,0 +1,64 @@ +/* + * Initialization code for multi buffer SHA1 algorithm for AVX2 + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Tim Chen <tim.c.chen@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sha_mb_mgr.h" + +void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) +{ + unsigned int j; + state->unused_lanes = 0xF76543210; + for (j = 0; j < 8; j++) { + state->lens[j] = 0xFFFFFFFF; + state->ldata[j].job_in_lane = NULL; + } +} diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S new file mode 100644 index 000000000000..2ab9560b53c8 --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S @@ -0,0 +1,228 @@ +/* + * Buffer submit code for multi buffer SHA1 algorithm + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford <james.guilford@intel.com> + * Tim Chen <tim.c.chen@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/linkage.h> +#include "sha1_mb_mgr_datastruct.S" + + +.extern sha1_x8_avx + +# LINUX register definitions +arg1 = %rdi +arg2 = %rsi +size_offset = %rcx +tmp2 = %rcx +extra_blocks = %rdx + +# Common definitions +#define state arg1 +#define job %rsi +#define len2 arg2 +#define p2 arg2 + +# idx must be a register not clobberred by sha1_x8_avx2 +idx = %r8 +DWORD_idx = %r8d +last_len = %r8 + +p = %r11 +start_offset = %r11 + +unused_lanes = %rbx +BYTE_unused_lanes = %bl + +job_rax = %rax +len = %rax +DWORD_len = %eax + +lane = %rbp +tmp3 = %rbp + +tmp = %r9 +DWORD_tmp = %r9d + +lane_data = %r10 + +# STACK_SPACE needs to be an odd multiple of 8 +STACK_SPACE = 8*8 + 16*10 + 8 + +# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job) +# arg 1 : rcx : state +# arg 2 : rdx : job +ENTRY(sha1_mb_mgr_submit_avx2) + + mov %rsp, %r10 + sub $STACK_SPACE, %rsp + and $~31, %rsp + + mov %rbx, (%rsp) + mov %r10, 8*2(%rsp) #save old rsp + mov %rbp, 8*3(%rsp) + mov %r12, 8*4(%rsp) + mov %r13, 8*5(%rsp) + mov %r14, 8*6(%rsp) + mov %r15, 8*7(%rsp) + + mov _unused_lanes(state), unused_lanes + mov unused_lanes, lane + and $0xF, lane + shr $4, unused_lanes + imul $_LANE_DATA_size, lane, lane_data + movl $STS_BEING_PROCESSED, _status(job) + lea _ldata(state, lane_data), lane_data + mov unused_lanes, _unused_lanes(state) + movl _len(job), DWORD_len + + mov job, _job_in_lane(lane_data) + shl $4, len + or lane, len + + movl DWORD_len, _lens(state , lane, 4) + + # Load digest words from result_digest + vmovdqu _result_digest(job), %xmm0 + mov _result_digest+1*16(job), DWORD_tmp + vmovd %xmm0, _args_digest(state, lane, 4) + vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) + vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) + vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) + movl DWORD_tmp, _args_digest+4*32(state , lane, 4) + + mov _buffer(job), p + mov p, _args_data_ptr(state, lane, 8) + + cmp $0xF, unused_lanes + jne return_null + +start_loop: + # Find min length + vmovdqa _lens(state), %xmm0 + vmovdqa _lens+1*16(state), %xmm1 + + vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} + vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} + vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword + + vmovd %xmm2, DWORD_idx + mov idx, len2 + and $0xF, idx + shr $4, len2 + jz len_is_0 + + vpand clear_low_nibble(%rip), %xmm2, %xmm2 + vpshufd $0, %xmm2, %xmm2 + + vpsubd %xmm2, %xmm0, %xmm0 + vpsubd %xmm2, %xmm1, %xmm1 + + vmovdqa %xmm0, _lens + 0*16(state) + vmovdqa %xmm1, _lens + 1*16(state) + + + # "state" and "args" are the same address, arg1 + # len is arg2 + call sha1_x8_avx2 + + # state and idx are intact + +len_is_0: + # process completed job "idx" + imul $_LANE_DATA_size, idx, lane_data + lea _ldata(state, lane_data), lane_data + + mov _job_in_lane(lane_data), job_rax + mov _unused_lanes(state), unused_lanes + movq $0, _job_in_lane(lane_data) + movl $STS_COMPLETED, _status(job_rax) + shl $4, unused_lanes + or idx, unused_lanes + mov unused_lanes, _unused_lanes(state) + + movl $0xFFFFFFFF, _lens(state, idx, 4) + + vmovd _args_digest(state, idx, 4), %xmm0 + vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 + vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 + vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 + movl 4*32(state, idx, 4), DWORD_tmp + + vmovdqu %xmm0, _result_digest(job_rax) + movl DWORD_tmp, _result_digest+1*16(job_rax) + +return: + + mov (%rsp), %rbx + mov 8*2(%rsp), %r10 #save old rsp + mov 8*3(%rsp), %rbp + mov 8*4(%rsp), %r12 + mov 8*5(%rsp), %r13 + mov 8*6(%rsp), %r14 + mov 8*7(%rsp), %r15 + mov %r10, %rsp + + ret + +return_null: + xor job_rax, job_rax + jmp return + +ENDPROC(sha1_mb_mgr_submit_avx2) + +.data + +.align 16 +clear_low_nibble: + .octa 0x000000000000000000000000FFFFFFF0 diff --git a/arch/x86/crypto/sha-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha-mb/sha1_x8_avx2.S new file mode 100644 index 000000000000..8e1b47792b31 --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha1_x8_avx2.S @@ -0,0 +1,472 @@ +/* + * Multi-buffer SHA1 algorithm hash compute routine + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford <james.guilford@intel.com> + * Tim Chen <tim.c.chen@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/linkage.h> +#include "sha1_mb_mgr_datastruct.S" + +## code to compute oct SHA1 using SSE-256 +## outer calling routine takes care of save and restore of XMM registers + +## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15# ymm0-15 +## +## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15 +## Linux preserves: rdi rbp r8 +## +## clobbers ymm0-15 + + +# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 +# "transpose" data in {r0...r7} using temps {t0...t1} +# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7} +# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} +# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} +# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} +# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} +# r4 = {e7 e6 e5 e4 e3 e2 e1 e0} +# r5 = {f7 f6 f5 f4 f3 f2 f1 f0} +# r6 = {g7 g6 g5 g4 g3 g2 g1 g0} +# r7 = {h7 h6 h5 h4 h3 h2 h1 h0} +# +# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7} +# r0 = {h0 g0 f0 e0 d0 c0 b0 a0} +# r1 = {h1 g1 f1 e1 d1 c1 b1 a1} +# r2 = {h2 g2 f2 e2 d2 c2 b2 a2} +# r3 = {h3 g3 f3 e3 d3 c3 b3 a3} +# r4 = {h4 g4 f4 e4 d4 c4 b4 a4} +# r5 = {h5 g5 f5 e5 d5 c5 b5 a5} +# r6 = {h6 g6 f6 e6 d6 c6 b6 a6} +# r7 = {h7 g7 f7 e7 d7 c7 b7 a7} +# + +.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1 + # process top half (r0..r3) {a...d} + vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} + vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} + vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} + vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} + vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1} + vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2} + vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3} + vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0} + + # use r2 in place of t0 + # process bottom half (r4..r7) {e...h} + vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0} + vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2} + vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0} + vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2} + vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1} + vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2} + vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3} + vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0} + + vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6 + vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2 + vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5 + vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1 + vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7 + vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3 + vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4 + vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0 + +.endm +## +## Magic functions defined in FIPS 180-1 +## +# macro MAGIC_F0 F,B,C,D,T ## F = (D ^ (B & (C ^ D))) +.macro MAGIC_F0 regF regB regC regD regT + vpxor \regD, \regC, \regF + vpand \regB, \regF, \regF + vpxor \regD, \regF, \regF +.endm + +# macro MAGIC_F1 F,B,C,D,T ## F = (B ^ C ^ D) +.macro MAGIC_F1 regF regB regC regD regT + vpxor \regC, \regD, \regF + vpxor \regB, \regF, \regF +.endm + +# macro MAGIC_F2 F,B,C,D,T ## F = ((B & C) | (B & D) | (C & D)) +.macro MAGIC_F2 regF regB regC regD regT + vpor \regC, \regB, \regF + vpand \regC, \regB, \regT + vpand \regD, \regF, \regF + vpor \regT, \regF, \regF +.endm + +# macro MAGIC_F3 F,B,C,D,T ## F = (B ^ C ^ D) +.macro MAGIC_F3 regF regB regC regD regT + MAGIC_F1 \regF,\regB,\regC,\regD,\regT +.endm + +# PROLD reg, imm, tmp +.macro PROLD reg imm tmp + vpsrld $(32-\imm), \reg, \tmp + vpslld $\imm, \reg, \reg + vpor \tmp, \reg, \reg +.endm + +.macro PROLD_nd reg imm tmp src + vpsrld $(32-\imm), \src, \tmp + vpslld $\imm, \src, \reg + vpor \tmp, \reg, \reg +.endm + +.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC + vpaddd \immCNT, \regE, \regE + vpaddd \memW*32(%rsp), \regE, \regE + PROLD_nd \regT, 5, \regF, \regA + vpaddd \regT, \regE, \regE + \MAGIC \regF, \regB, \regC, \regD, \regT + PROLD \regB, 30, \regT + vpaddd \regF, \regE, \regE +.endm + +.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC + vpaddd \immCNT, \regE, \regE + offset = ((\memW - 14) & 15) * 32 + vmovdqu offset(%rsp), W14 + vpxor W14, W16, W16 + offset = ((\memW - 8) & 15) * 32 + vpxor offset(%rsp), W16, W16 + offset = ((\memW - 3) & 15) * 32 + vpxor offset(%rsp), W16, W16 + vpsrld $(32-1), W16, \regF + vpslld $1, W16, W16 + vpor W16, \regF, \regF + + ROTATE_W + + offset = ((\memW - 0) & 15) * 32 + vmovdqu \regF, offset(%rsp) + vpaddd \regF, \regE, \regE + PROLD_nd \regT, 5, \regF, \regA + vpaddd \regT, \regE, \regE + \MAGIC \regF,\regB,\regC,\regD,\regT ## FUN = MAGIC_Fi(B,C,D) + PROLD \regB,30, \regT + vpaddd \regF, \regE, \regE +.endm + +######################################################################## +######################################################################## +######################################################################## + +## FRAMESZ plus pushes must be an odd multiple of 8 +YMM_SAVE = (15-15)*32 +FRAMESZ = 32*16 + YMM_SAVE +_YMM = FRAMESZ - YMM_SAVE + +#define VMOVPS vmovups + +IDX = %rax +inp0 = %r9 +inp1 = %r10 +inp2 = %r11 +inp3 = %r12 +inp4 = %r13 +inp5 = %r14 +inp6 = %r15 +inp7 = %rcx +arg1 = %rdi +arg2 = %rsi +RSP_SAVE = %rdx + +# ymm0 A +# ymm1 B +# ymm2 C +# ymm3 D +# ymm4 E +# ymm5 F AA +# ymm6 T0 BB +# ymm7 T1 CC +# ymm8 T2 DD +# ymm9 T3 EE +# ymm10 T4 TMP +# ymm11 T5 FUN +# ymm12 T6 K +# ymm13 T7 W14 +# ymm14 T8 W15 +# ymm15 T9 W16 + + +A = %ymm0 +B = %ymm1 +C = %ymm2 +D = %ymm3 +E = %ymm4 +F = %ymm5 +T0 = %ymm6 +T1 = %ymm7 +T2 = %ymm8 +T3 = %ymm9 +T4 = %ymm10 +T5 = %ymm11 +T6 = %ymm12 +T7 = %ymm13 +T8 = %ymm14 +T9 = %ymm15 + +AA = %ymm5 +BB = %ymm6 +CC = %ymm7 +DD = %ymm8 +EE = %ymm9 +TMP = %ymm10 +FUN = %ymm11 +K = %ymm12 +W14 = %ymm13 +W15 = %ymm14 +W16 = %ymm15 + +.macro ROTATE_ARGS + TMP_ = E + E = D + D = C + C = B + B = A + A = TMP_ +.endm + +.macro ROTATE_W +TMP_ = W16 +W16 = W15 +W15 = W14 +W14 = TMP_ +.endm + +# 8 streams x 5 32bit words per digest x 4 bytes per word +#define DIGEST_SIZE (8*5*4) + +.align 32 + +# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size) +# arg 1 : pointer to array[4] of pointer to input data +# arg 2 : size (in blocks) ;; assumed to be >= 1 +# +ENTRY(sha1_x8_avx2) + + push RSP_SAVE + + #save rsp + mov %rsp, RSP_SAVE + sub $FRAMESZ, %rsp + + #align rsp to 32 Bytes + and $~0x1F, %rsp + + ## Initialize digests + vmovdqu 0*32(arg1), A + vmovdqu 1*32(arg1), B + vmovdqu 2*32(arg1), C + vmovdqu 3*32(arg1), D + vmovdqu 4*32(arg1), E + + ## transpose input onto stack + mov _data_ptr+0*8(arg1),inp0 + mov _data_ptr+1*8(arg1),inp1 + mov _data_ptr+2*8(arg1),inp2 + mov _data_ptr+3*8(arg1),inp3 + mov _data_ptr+4*8(arg1),inp4 + mov _data_ptr+5*8(arg1),inp5 + mov _data_ptr+6*8(arg1),inp6 + mov _data_ptr+7*8(arg1),inp7 + + xor IDX, IDX +lloop: + vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), F + I=0 +.rep 2 + VMOVPS (inp0, IDX), T0 + VMOVPS (inp1, IDX), T1 + VMOVPS (inp2, IDX), T2 + VMOVPS (inp3, IDX), T3 + VMOVPS (inp4, IDX), T4 + VMOVPS (inp5, IDX), T5 + VMOVPS (inp6, IDX), T6 + VMOVPS (inp7, IDX), T7 + + TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 + vpshufb F, T0, T0 + vmovdqu T0, (I*8)*32(%rsp) + vpshufb F, T1, T1 + vmovdqu T1, (I*8+1)*32(%rsp) + vpshufb F, T2, T2 + vmovdqu T2, (I*8+2)*32(%rsp) + vpshufb F, T3, T3 + vmovdqu T3, (I*8+3)*32(%rsp) + vpshufb F, T4, T4 + vmovdqu T4, (I*8+4)*32(%rsp) + vpshufb F, T5, T5 + vmovdqu T5, (I*8+5)*32(%rsp) + vpshufb F, T6, T6 + vmovdqu T6, (I*8+6)*32(%rsp) + vpshufb F, T7, T7 + vmovdqu T7, (I*8+7)*32(%rsp) + add $32, IDX + I = (I+1) +.endr + # save old digests + vmovdqu A,AA + vmovdqu B,BB + vmovdqu C,CC + vmovdqu D,DD + vmovdqu E,EE + +## +## perform 0-79 steps +## + vmovdqu K00_19(%rip), K +## do rounds 0...15 + I = 0 +.rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0 + ROTATE_ARGS + I = (I+1) +.endr + +## do rounds 16...19 + vmovdqu ((16 - 16) & 15) * 32 (%rsp), W16 + vmovdqu ((16 - 15) & 15) * 32 (%rsp), W15 +.rep 4 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0 + ROTATE_ARGS + I = (I+1) +.endr + +## do rounds 20...39 + vmovdqu K20_39(%rip), K +.rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1 + ROTATE_ARGS + I = (I+1) +.endr + +## do rounds 40...59 + vmovdqu K40_59(%rip), K +.rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2 + ROTATE_ARGS + I = (I+1) +.endr + +## do rounds 60...79 + vmovdqu K60_79(%rip), K +.rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3 + ROTATE_ARGS + I = (I+1) +.endr + + vpaddd AA,A,A + vpaddd BB,B,B + vpaddd CC,C,C + vpaddd DD,D,D + vpaddd EE,E,E + + sub $1, arg2 + jne lloop + + # write out digests + vmovdqu A, 0*32(arg1) + vmovdqu B, 1*32(arg1) + vmovdqu C, 2*32(arg1) + vmovdqu D, 3*32(arg1) + vmovdqu E, 4*32(arg1) + + # update input pointers + add IDX, inp0 + add IDX, inp1 + add IDX, inp2 + add IDX, inp3 + add IDX, inp4 + add IDX, inp5 + add IDX, inp6 + add IDX, inp7 + mov inp0, _data_ptr (arg1) + mov inp1, _data_ptr + 1*8(arg1) + mov inp2, _data_ptr + 2*8(arg1) + mov inp3, _data_ptr + 3*8(arg1) + mov inp4, _data_ptr + 4*8(arg1) + mov inp5, _data_ptr + 5*8(arg1) + mov inp6, _data_ptr + 6*8(arg1) + mov inp7, _data_ptr + 7*8(arg1) + + ################ + ## Postamble + + mov RSP_SAVE, %rsp + pop RSP_SAVE + + ret +ENDPROC(sha1_x8_avx2) + + +.data + +.align 32 +K00_19: +.octa 0x5A8279995A8279995A8279995A827999 +.octa 0x5A8279995A8279995A8279995A827999 +K20_39: +.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 +.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 +K40_59: +.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC +.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC +K60_79: +.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 +.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 +PSHUFFLE_BYTE_FLIP_MASK: +.octa 0x0c0d0e0f08090a0b0405060700010203 +.octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha-mb/sha_mb_ctx.h b/arch/x86/crypto/sha-mb/sha_mb_ctx.h new file mode 100644 index 000000000000..e36069d0c1bd --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha_mb_ctx.h @@ -0,0 +1,136 @@ +/* + * Header file for multi buffer SHA context + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Tim Chen <tim.c.chen@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SHA_MB_CTX_INTERNAL_H +#define _SHA_MB_CTX_INTERNAL_H + +#include "sha_mb_mgr.h" + +#define HASH_UPDATE 0x00 +#define HASH_FIRST 0x01 +#define HASH_LAST 0x02 +#define HASH_ENTIRE 0x03 +#define HASH_DONE 0x04 +#define HASH_FINAL 0x08 + +#define HASH_CTX_STS_IDLE 0x00 +#define HASH_CTX_STS_PROCESSING 0x01 +#define HASH_CTX_STS_LAST 0x02 +#define HASH_CTX_STS_COMPLETE 0x04 + +enum hash_ctx_error { + HASH_CTX_ERROR_NONE = 0, + HASH_CTX_ERROR_INVALID_FLAGS = -1, + HASH_CTX_ERROR_ALREADY_PROCESSING = -2, + HASH_CTX_ERROR_ALREADY_COMPLETED = -3, + +#ifdef HASH_CTX_DEBUG + HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4, +#endif +}; + + +#define hash_ctx_user_data(ctx) ((ctx)->user_data) +#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) +#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) +#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) +#define hash_ctx_status(ctx) ((ctx)->status) +#define hash_ctx_error(ctx) ((ctx)->error) +#define hash_ctx_init(ctx) \ + do { \ + (ctx)->error = HASH_CTX_ERROR_NONE; \ + (ctx)->status = HASH_CTX_STS_COMPLETE; \ + } while (0) + + +/* Hash Constants and Typedefs */ +#define SHA1_DIGEST_LENGTH 5 +#define SHA1_LOG2_BLOCK_SIZE 6 + +#define SHA1_PADLENGTHFIELD_SIZE 8 + +#ifdef SHA_MB_DEBUG +#define assert(expr) \ +do { \ + if (unlikely(!(expr))) { \ + printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ + #expr, __FILE__, __func__, __LINE__); \ + } \ +} while (0) +#else +#define assert(expr) do {} while (0) +#endif + +struct sha1_ctx_mgr { + struct sha1_mb_mgr mgr; +}; + +/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */ + +struct sha1_hash_ctx { + /* Must be at struct offset 0 */ + struct job_sha1 job; + /* status flag */ + int status; + /* error flag */ + int error; + + uint32_t total_length; + const void *incoming_buffer; + uint32_t incoming_buffer_length; + uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2]; + uint32_t partial_block_buffer_length; + void *user_data; +}; + +#endif diff --git a/arch/x86/crypto/sha-mb/sha_mb_mgr.h b/arch/x86/crypto/sha-mb/sha_mb_mgr.h new file mode 100644 index 000000000000..08ad1a9acfd7 --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha_mb_mgr.h @@ -0,0 +1,110 @@ +/* + * Header file for multi buffer SHA1 algorithm manager + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford <james.guilford@intel.com> + * Tim Chen <tim.c.chen@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __SHA_MB_MGR_H +#define __SHA_MB_MGR_H + + +#include <linux/types.h> + +#define NUM_SHA1_DIGEST_WORDS 5 + +enum job_sts { STS_UNKNOWN = 0, + STS_BEING_PROCESSED = 1, + STS_COMPLETED = 2, + STS_INTERNAL_ERROR = 3, + STS_ERROR = 4 +}; + +struct job_sha1 { + u8 *buffer; + u32 len; + u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32); + enum job_sts status; + void *user_data; +}; + +/* SHA1 out-of-order scheduler */ + +/* typedef uint32_t sha1_digest_array[5][8]; */ + +struct sha1_args_x8 { + uint32_t digest[5][8]; + uint8_t *data_ptr[8]; +}; + +struct sha1_lane_data { + struct job_sha1 *job_in_lane; +}; + +struct sha1_mb_mgr { + struct sha1_args_x8 args; + + uint32_t lens[8]; + + /* each byte is index (0...7) of unused lanes */ + uint64_t unused_lanes; + /* byte 4 is set to FF as a flag */ + struct sha1_lane_data ldata[8]; +}; + + +#define SHA1_MB_MGR_NUM_LANES_AVX2 8 + +void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state); +struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state, + struct job_sha1 *job); +struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state); +struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state); + +#endif diff --git a/crypto/Kconfig b/crypto/Kconfig index 77daef031db5..87bbc9c1e681 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -158,6 +158,20 @@ config CRYPTO_CRYPTD converts an arbitrary synchronous software crypto algorithm into an asynchronous algorithm that executes in a kernel thread. +config CRYPTO_MCRYPTD + tristate "Software async multi-buffer crypto daemon" + select CRYPTO_BLKCIPHER + select CRYPTO_HASH + select CRYPTO_MANAGER + select CRYPTO_WORKQUEUE + help + This is a generic software asynchronous crypto daemon that + provides the kernel thread to assist multi-buffer crypto + algorithms for submitting jobs and flushing jobs in multi-buffer + crypto algorithms. Multi-buffer crypto algorithms are executed + in the context of this kernel thread and drivers can post + their crypto request asynchronously to be processed by this daemon. + config CRYPTO_AUTHENC tristate "Authenc support" select CRYPTO_AEAD @@ -559,6 +573,22 @@ config CRYPTO_SHA1_PPC This is the powerpc hardware accelerated implementation of the SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). +config CRYPTO_SHA1_MB + tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)" + depends on X86 && 64BIT + select CRYPTO_SHA1 + select CRYPTO_HASH + select CRYPTO_MCRYPTD + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using multi-buffer technique. This algorithm computes on + multiple data lanes concurrently with SIMD instructions for + better throughput. It should not be enabled by default but + used when there is significant amount of work to keep the keep + the data lanes filled to get performance benefit. If the data + lanes remain unfilled, a flush operation will be initiated to + process the crypto jobs, adding a slight latency. + config CRYPTO_SHA256 tristate "SHA224 and SHA256 digest algorithm" select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index cfa57b3f5a4d..1445b9100c05 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -60,6 +60,7 @@ obj-$(CONFIG_CRYPTO_GCM) += gcm.o obj-$(CONFIG_CRYPTO_CCM) += ccm.o obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o +obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o obj-$(CONFIG_CRYPTO_DES) += des_generic.o obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o diff --git a/crypto/ahash.c b/crypto/ahash.c index f2a5d8f656ff..f6a36a52d738 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -131,8 +131,10 @@ int crypto_hash_walk_first(struct ahash_request *req, { walk->total = req->nbytes; - if (!walk->total) + if (!walk->total) { + walk->entrylen = 0; return 0; + } walk->alignmask = crypto_ahash_alignmask(crypto_ahash_reqtfm(req)); walk->sg = req->src; @@ -147,8 +149,10 @@ int crypto_ahash_walk_first(struct ahash_request *req, { walk->total = req->nbytes; - if (!walk->total) + if (!walk->total) { + walk->entrylen = 0; return 0; + } walk->alignmask = crypto_ahash_alignmask(crypto_ahash_reqtfm(req)); walk->sg = req->src; @@ -167,8 +171,10 @@ int crypto_hash_walk_first_compat(struct hash_desc *hdesc, { walk->total = len; - if (!walk->total) + if (!walk->total) { + walk->entrylen = 0; return 0; + } walk->alignmask = crypto_hash_alignmask(hdesc->tfm); walk->sg = sg; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index a19c027b29bd..83187f497c7c 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -49,7 +49,7 @@ struct skcipher_ctx { struct ablkcipher_request req; }; -#define MAX_SGL_ENTS ((PAGE_SIZE - sizeof(struct skcipher_sg_list)) / \ +#define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \ sizeof(struct scatterlist) - 1) static inline int skcipher_sndbuf(struct sock *sk) diff --git a/crypto/drbg.c b/crypto/drbg.c index a53ee099e281..54cfd4820abc 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -117,27 +117,18 @@ static const struct drbg_core drbg_cores[] = { { .flags = DRBG_CTR | DRBG_STRENGTH128, .statelen = 32, /* 256 bits as defined in 10.2.1 */ - .max_addtllen = 35, - .max_bits = 19, - .max_req = 48, .blocklen_bytes = 16, .cra_name = "ctr_aes128", .backend_cra_name = "ecb(aes)", }, { .flags = DRBG_CTR | DRBG_STRENGTH192, .statelen = 40, /* 320 bits as defined in 10.2.1 */ - .max_addtllen = 35, - .max_bits = 19, - .max_req = 48, .blocklen_bytes = 16, .cra_name = "ctr_aes192", .backend_cra_name = "ecb(aes)", }, { .flags = DRBG_CTR | DRBG_STRENGTH256, .statelen = 48, /* 384 bits as defined in 10.2.1 */ - .max_addtllen = 35, - .max_bits = 19, - .max_req = 48, .blocklen_bytes = 16, .cra_name = "ctr_aes256", .backend_cra_name = "ecb(aes)", @@ -147,36 +138,24 @@ static const struct drbg_core drbg_cores[] = { { .flags = DRBG_HASH | DRBG_STRENGTH128, .statelen = 55, /* 440 bits */ - .max_addtllen = 35, - .max_bits = 19, - .max_req = 48, .blocklen_bytes = 20, .cra_name = "sha1", .backend_cra_name = "sha1", }, { .flags = DRBG_HASH | DRBG_STRENGTH256, .statelen = 111, /* 888 bits */ - .max_addtllen = 35, - .max_bits = 19, - .max_req = 48, .blocklen_bytes = 48, .cra_name = "sha384", .backend_cra_name = "sha384", }, { .flags = DRBG_HASH | DRBG_STRENGTH256, .statelen = 111, /* 888 bits */ - .max_addtllen = 35, - .max_bits = 19, - .max_req = 48, .blocklen_bytes = 64, .cra_name = "sha512", .backend_cra_name = "sha512", }, { .flags = DRBG_HASH | DRBG_STRENGTH256, .statelen = 55, /* 440 bits */ - .max_addtllen = 35, - .max_bits = 19, - .max_req = 48, .blocklen_bytes = 32, .cra_name = "sha256", .backend_cra_name = "sha256", @@ -186,36 +165,24 @@ static const struct drbg_core drbg_cores[] = { { .flags = DRBG_HMAC | DRBG_STRENGTH128, .statelen = 20, /* block length of cipher */ - .max_addtllen = 35, - .max_bits = 19, - .max_req = 48, .blocklen_bytes = 20, .cra_name = "hmac_sha1", .backend_cra_name = "hmac(sha1)", }, { .flags = DRBG_HMAC | DRBG_STRENGTH256, .statelen = 48, /* block length of cipher */ - .max_addtllen = 35, - .max_bits = 19, - .max_req = 48, .blocklen_bytes = 48, .cra_name = "hmac_sha384", .backend_cra_name = "hmac(sha384)", }, { .flags = DRBG_HMAC | DRBG_STRENGTH256, .statelen = 64, /* block length of cipher */ - .max_addtllen = 35, - .max_bits = 19, - .max_req = 48, .blocklen_bytes = 64, .cra_name = "hmac_sha512", .backend_cra_name = "hmac(sha512)", }, { .flags = DRBG_HMAC | DRBG_STRENGTH256, .statelen = 32, /* block length of cipher */ - .max_addtllen = 35, - .max_bits = 19, - .max_req = 48, .blocklen_bytes = 32, .cra_name = "hmac_sha256", .backend_cra_name = "hmac(sha256)", @@ -302,20 +269,19 @@ static bool drbg_fips_continuous_test(struct drbg_state *drbg, * Convert an integer into a byte representation of this integer. * The byte representation is big-endian * - * @buf buffer holding the converted integer * @val value to be converted - * @buflen length of buffer + * @buf buffer holding the converted integer -- caller must ensure that + * buffer size is at least 32 bit */ #if (defined(CONFIG_CRYPTO_DRBG_HASH) || defined(CONFIG_CRYPTO_DRBG_CTR)) -static inline void drbg_int2byte(unsigned char *buf, uint64_t val, - size_t buflen) +static inline void drbg_cpu_to_be32(__u32 val, unsigned char *buf) { - unsigned char *byte; - uint64_t i; + struct s { + __be32 conv; + }; + struct s *conversion = (struct s *) buf; - byte = buf + (buflen - 1); - for (i = 0; i < buflen; i++) - *(byte--) = val >> (i * 8) & 0xff; + conversion->conv = cpu_to_be32(val); } /* @@ -483,10 +449,10 @@ static int drbg_ctr_df(struct drbg_state *drbg, /* 10.4.2 step 2 -- calculate the entire length of all input data */ list_for_each_entry(seed, seedlist, list) inputlen += seed->len; - drbg_int2byte(&L_N[0], inputlen, 4); + drbg_cpu_to_be32(inputlen, &L_N[0]); /* 10.4.2 step 3 */ - drbg_int2byte(&L_N[4], bytes_to_return, 4); + drbg_cpu_to_be32(bytes_to_return, &L_N[4]); /* 10.4.2 step 5: length is L_N, input_string, one byte, padding */ padlen = (inputlen + sizeof(L_N) + 1) % (drbg_blocklen(drbg)); @@ -517,7 +483,7 @@ static int drbg_ctr_df(struct drbg_state *drbg, * holds zeros after allocation -- even the increment of i * is irrelevant as the increment remains within length of i */ - drbg_int2byte(iv, i, 4); + drbg_cpu_to_be32(i, iv); /* 10.4.2 step 9.2 -- BCC and concatenation with temp */ ret = drbg_ctr_bcc(drbg, temp + templen, K, &bcc_list); if (ret) @@ -729,11 +695,9 @@ static int drbg_hmac_update(struct drbg_state *drbg, struct list_head *seed, LIST_HEAD(seedlist); LIST_HEAD(vdatalist); - if (!reseed) { - /* 10.1.2.3 step 2 */ - memset(drbg->C, 0, drbg_statelen(drbg)); + if (!reseed) + /* 10.1.2.3 step 2 -- memset(0) of C is implicit with kzalloc */ memset(drbg->V, 1, drbg_statelen(drbg)); - } drbg_string_fill(&seed1, drbg->V, drbg_statelen(drbg)); list_add_tail(&seed1.list, &seedlist); @@ -862,7 +826,7 @@ static int drbg_hash_df(struct drbg_state *drbg, /* 10.4.1 step 3 */ input[0] = 1; - drbg_int2byte(&input[1], (outlen * 8), 4); + drbg_cpu_to_be32((outlen * 8), &input[1]); /* 10.4.1 step 4.1 -- concatenation of data for input into hash */ drbg_string_fill(&data, input, 5); @@ -1023,7 +987,10 @@ static int drbg_hash_generate(struct drbg_state *drbg, { int len = 0; int ret = 0; - unsigned char req[8]; + union { + unsigned char req[8]; + __be64 req_int; + } u; unsigned char prefix = DRBG_PREFIX3; struct drbg_string data1, data2; LIST_HEAD(datalist); @@ -1053,8 +1020,8 @@ static int drbg_hash_generate(struct drbg_state *drbg, drbg->scratchpad, drbg_blocklen(drbg)); drbg_add_buf(drbg->V, drbg_statelen(drbg), drbg->C, drbg_statelen(drbg)); - drbg_int2byte(req, drbg->reseed_ctr, sizeof(req)); - drbg_add_buf(drbg->V, drbg_statelen(drbg), req, 8); + u.req_int = cpu_to_be64(drbg->reseed_ctr); + drbg_add_buf(drbg->V, drbg_statelen(drbg), u.req, 8); out: memset(drbg->scratchpad, 0, drbg_blocklen(drbg)); @@ -1142,6 +1109,11 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, pr_devel("DRBG: using personalization string\n"); } + if (!reseed) { + memset(drbg->V, 0, drbg_statelen(drbg)); + memset(drbg->C, 0, drbg_statelen(drbg)); + } + ret = drbg->d_ops->update(drbg, &seedlist, reseed); if (ret) goto out; @@ -1151,8 +1123,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, drbg->reseed_ctr = 1; out: - if (entropy) - kzfree(entropy); + kzfree(entropy); return ret; } @@ -1161,19 +1132,15 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg) { if (!drbg) return; - if (drbg->V) - kzfree(drbg->V); + kzfree(drbg->V); drbg->V = NULL; - if (drbg->C) - kzfree(drbg->C); + kzfree(drbg->C); drbg->C = NULL; - if (drbg->scratchpad) - kzfree(drbg->scratchpad); + kzfree(drbg->scratchpad); drbg->scratchpad = NULL; drbg->reseed_ctr = 0; #ifdef CONFIG_CRYPTO_FIPS - if (drbg->prev) - kzfree(drbg->prev); + kzfree(drbg->prev); drbg->prev = NULL; drbg->fips_primed = false; #endif @@ -1188,17 +1155,14 @@ static inline int drbg_alloc_state(struct drbg_state *drbg) int ret = -ENOMEM; unsigned int sb_size = 0; - if (!drbg) - return -EINVAL; - - drbg->V = kzalloc(drbg_statelen(drbg), GFP_KERNEL); + drbg->V = kmalloc(drbg_statelen(drbg), GFP_KERNEL); if (!drbg->V) goto err; - drbg->C = kzalloc(drbg_statelen(drbg), GFP_KERNEL); + drbg->C = kmalloc(drbg_statelen(drbg), GFP_KERNEL); if (!drbg->C) goto err; #ifdef CONFIG_CRYPTO_FIPS - drbg->prev = kzalloc(drbg_blocklen(drbg), GFP_KERNEL); + drbg->prev = kmalloc(drbg_blocklen(drbg), GFP_KERNEL); if (!drbg->prev) goto err; drbg->fips_primed = false; @@ -1263,15 +1227,6 @@ static int drbg_make_shadow(struct drbg_state *drbg, struct drbg_state **shadow) int ret = -ENOMEM; struct drbg_state *tmp = NULL; - if (!drbg || !drbg->core || !drbg->V || !drbg->C) { - pr_devel("DRBG: attempt to generate shadow copy for " - "uninitialized DRBG state rejected\n"); - return -EINVAL; - } - /* HMAC does not have a scratchpad */ - if (!(drbg->core->flags & DRBG_HMAC) && NULL == drbg->scratchpad) - return -EINVAL; - tmp = kzalloc(sizeof(struct drbg_state), GFP_KERNEL); if (!tmp) return -ENOMEM; @@ -1293,8 +1248,7 @@ static int drbg_make_shadow(struct drbg_state *drbg, struct drbg_state **shadow) return 0; err: - if (tmp) - kzfree(tmp); + kzfree(tmp); return ret; } @@ -1385,11 +1339,9 @@ static int drbg_generate(struct drbg_state *drbg, shadow->seeded = false; /* allocate cipher handle */ - if (shadow->d_ops->crypto_init) { - len = shadow->d_ops->crypto_init(shadow); - if (len) - goto err; - } + len = shadow->d_ops->crypto_init(shadow); + if (len) + goto err; if (shadow->pr || !shadow->seeded) { pr_devel("DRBG: reseeding before generation (prediction " @@ -1471,8 +1423,7 @@ static int drbg_generate(struct drbg_state *drbg, #endif err: - if (shadow->d_ops->crypto_fini) - shadow->d_ops->crypto_fini(shadow); + shadow->d_ops->crypto_fini(shadow); drbg_restore_shadow(drbg, &shadow); return len; } @@ -1566,11 +1517,10 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers, return ret; ret = -EFAULT; - if (drbg->d_ops->crypto_init && drbg->d_ops->crypto_init(drbg)) + if (drbg->d_ops->crypto_init(drbg)) goto err; ret = drbg_seed(drbg, pers, false); - if (drbg->d_ops->crypto_fini) - drbg->d_ops->crypto_fini(drbg); + drbg->d_ops->crypto_fini(drbg); if (ret) goto err; diff --git a/crypto/lz4.c b/crypto/lz4.c index 4586dd15b0d8..34d072b72a73 100644 --- a/crypto/lz4.c +++ b/crypto/lz4.c @@ -68,7 +68,7 @@ static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, size_t tmp_len = *dlen; size_t __slen = slen; - err = lz4_decompress(src, &__slen, dst, tmp_len); + err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len); if (err < 0) return -EINVAL; diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c index 151ba31d34e3..9218b3fed5e3 100644 --- a/crypto/lz4hc.c +++ b/crypto/lz4hc.c @@ -68,7 +68,7 @@ static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, size_t tmp_len = *dlen; size_t __slen = slen; - err = lz4_decompress(src, &__slen, dst, tmp_len); + err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len); if (err < 0) return -EINVAL; diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c new file mode 100644 index 000000000000..b39fbd530102 --- /dev/null +++ b/crypto/mcryptd.c @@ -0,0 +1,705 @@ +/* + * Software multibuffer async crypto daemon. + * + * Copyright (c) 2014 Tim Chen <tim.c.chen@linux.intel.com> + * + * Adapted from crypto daemon. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/algapi.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/aead.h> +#include <crypto/mcryptd.h> +#include <crypto/crypto_wq.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/scatterlist.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/hardirq.h> + +#define MCRYPTD_MAX_CPU_QLEN 100 +#define MCRYPTD_BATCH 9 + +static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head, + unsigned int tail); + +struct mcryptd_flush_list { + struct list_head list; + struct mutex lock; +}; + +static struct mcryptd_flush_list __percpu *mcryptd_flist; + +struct hashd_instance_ctx { + struct crypto_shash_spawn spawn; + struct mcryptd_queue *queue; +}; + +static void mcryptd_queue_worker(struct work_struct *work); + +void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay) +{ + struct mcryptd_flush_list *flist; + + if (!cstate->flusher_engaged) { + /* put the flusher on the flush list */ + flist = per_cpu_ptr(mcryptd_flist, smp_processor_id()); + mutex_lock(&flist->lock); + list_add_tail(&cstate->flush_list, &flist->list); + cstate->flusher_engaged = true; + cstate->next_flush = jiffies + delay; + queue_delayed_work_on(smp_processor_id(), kcrypto_wq, + &cstate->flush, delay); + mutex_unlock(&flist->lock); + } +} +EXPORT_SYMBOL(mcryptd_arm_flusher); + +static int mcryptd_init_queue(struct mcryptd_queue *queue, + unsigned int max_cpu_qlen) +{ + int cpu; + struct mcryptd_cpu_queue *cpu_queue; + + queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue); + pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue); + if (!queue->cpu_queue) + return -ENOMEM; + for_each_possible_cpu(cpu) { + cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); + pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); + crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); + INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); + } + return 0; +} + +static void mcryptd_fini_queue(struct mcryptd_queue *queue) +{ + int cpu; + struct mcryptd_cpu_queue *cpu_queue; + + for_each_possible_cpu(cpu) { + cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); + BUG_ON(cpu_queue->queue.qlen); + } + free_percpu(queue->cpu_queue); +} + +static int mcryptd_enqueue_request(struct mcryptd_queue *queue, + struct crypto_async_request *request, + struct mcryptd_hash_request_ctx *rctx) +{ + int cpu, err; + struct mcryptd_cpu_queue *cpu_queue; + + cpu = get_cpu(); + cpu_queue = this_cpu_ptr(queue->cpu_queue); + rctx->tag.cpu = cpu; + + err = crypto_enqueue_request(&cpu_queue->queue, request); + pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", + cpu, cpu_queue, request); + queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); + put_cpu(); + + return err; +} + +/* + * Try to opportunisticlly flush the partially completed jobs if + * crypto daemon is the only task running. + */ +static void mcryptd_opportunistic_flush(void) +{ + struct mcryptd_flush_list *flist; + struct mcryptd_alg_cstate *cstate; + + flist = per_cpu_ptr(mcryptd_flist, smp_processor_id()); + while (single_task_running()) { + mutex_lock(&flist->lock); + if (list_empty(&flist->list)) { + mutex_unlock(&flist->lock); + return; + } + cstate = list_entry(flist->list.next, + struct mcryptd_alg_cstate, flush_list); + if (!cstate->flusher_engaged) { + mutex_unlock(&flist->lock); + return; + } + list_del(&cstate->flush_list); + cstate->flusher_engaged = false; + mutex_unlock(&flist->lock); + cstate->alg_state->flusher(cstate); + } +} + +/* + * Called in workqueue context, do one real cryption work (via + * req->complete) and reschedule itself if there are more work to + * do. + */ +static void mcryptd_queue_worker(struct work_struct *work) +{ + struct mcryptd_cpu_queue *cpu_queue; + struct crypto_async_request *req, *backlog; + int i; + + /* + * Need to loop through more than once for multi-buffer to + * be effective. + */ + + cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); + i = 0; + while (i < MCRYPTD_BATCH || single_task_running()) { + /* + * preempt_disable/enable is used to prevent + * being preempted by mcryptd_enqueue_request() + */ + local_bh_disable(); + preempt_disable(); + backlog = crypto_get_backlog(&cpu_queue->queue); + req = crypto_dequeue_request(&cpu_queue->queue); + preempt_enable(); + local_bh_enable(); + + if (!req) { + mcryptd_opportunistic_flush(); + return; + } + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + req->complete(req, 0); + if (!cpu_queue->queue.qlen) + return; + ++i; + } + if (cpu_queue->queue.qlen) + queue_work(kcrypto_wq, &cpu_queue->work); +} + +void mcryptd_flusher(struct work_struct *__work) +{ + struct mcryptd_alg_cstate *alg_cpu_state; + struct mcryptd_alg_state *alg_state; + struct mcryptd_flush_list *flist; + int cpu; + + cpu = smp_processor_id(); + alg_cpu_state = container_of(to_delayed_work(__work), + struct mcryptd_alg_cstate, flush); + alg_state = alg_cpu_state->alg_state; + if (alg_cpu_state->cpu != cpu) + pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n", + cpu, alg_cpu_state->cpu); + + if (alg_cpu_state->flusher_engaged) { + flist = per_cpu_ptr(mcryptd_flist, cpu); + mutex_lock(&flist->lock); + list_del(&alg_cpu_state->flush_list); + alg_cpu_state->flusher_engaged = false; + mutex_unlock(&flist->lock); + alg_state->flusher(alg_cpu_state); + } +} +EXPORT_SYMBOL_GPL(mcryptd_flusher); + +static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); + struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst); + + return ictx->queue; +} + +static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head, + unsigned int tail) +{ + char *p; + struct crypto_instance *inst; + int err; + + p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL); + if (!p) + return ERR_PTR(-ENOMEM); + + inst = (void *)(p + head); + + err = -ENAMETOOLONG; + if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, + "mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + goto out_free_inst; + + memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); + + inst->alg.cra_priority = alg->cra_priority + 50; + inst->alg.cra_blocksize = alg->cra_blocksize; + inst->alg.cra_alignmask = alg->cra_alignmask; + +out: + return p; + +out_free_inst: + kfree(p); + p = ERR_PTR(err); + goto out; +} + +static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); + struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst); + struct crypto_shash_spawn *spawn = &ictx->spawn; + struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_shash *hash; + + hash = crypto_spawn_shash(spawn); + if (IS_ERR(hash)) + return PTR_ERR(hash); + + ctx->child = hash; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct mcryptd_hash_request_ctx) + + crypto_shash_descsize(hash)); + return 0; +} + +static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm) +{ + struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); + + crypto_free_shash(ctx->child); +} + +static int mcryptd_hash_setkey(struct crypto_ahash *parent, + const u8 *key, unsigned int keylen) +{ + struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(parent); + struct crypto_shash *child = ctx->child; + int err; + + crypto_shash_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_shash_set_flags(child, crypto_ahash_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_shash_setkey(child, key, keylen); + crypto_ahash_set_flags(parent, crypto_shash_get_flags(child) & + CRYPTO_TFM_RES_MASK); + return err; +} + +static int mcryptd_hash_enqueue(struct ahash_request *req, + crypto_completion_t complete) +{ + int ret; + + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct mcryptd_queue *queue = + mcryptd_get_queue(crypto_ahash_tfm(tfm)); + + rctx->complete = req->base.complete; + req->base.complete = complete; + + ret = mcryptd_enqueue_request(queue, &req->base, rctx); + + return ret; +} + +static void mcryptd_hash_init(struct crypto_async_request *req_async, int err) +{ + struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); + struct crypto_shash *child = ctx->child; + struct ahash_request *req = ahash_request_cast(req_async); + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + struct shash_desc *desc = &rctx->desc; + + if (unlikely(err == -EINPROGRESS)) + goto out; + + desc->tfm = child; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + err = crypto_shash_init(desc); + + req->base.complete = rctx->complete; + +out: + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); +} + +static int mcryptd_hash_init_enqueue(struct ahash_request *req) +{ + return mcryptd_hash_enqueue(req, mcryptd_hash_init); +} + +static void mcryptd_hash_update(struct crypto_async_request *req_async, int err) +{ + struct ahash_request *req = ahash_request_cast(req_async); + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + + if (unlikely(err == -EINPROGRESS)) + goto out; + + err = shash_ahash_mcryptd_update(req, &rctx->desc); + if (err) { + req->base.complete = rctx->complete; + goto out; + } + + return; +out: + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); +} + +static int mcryptd_hash_update_enqueue(struct ahash_request *req) +{ + return mcryptd_hash_enqueue(req, mcryptd_hash_update); +} + +static void mcryptd_hash_final(struct crypto_async_request *req_async, int err) +{ + struct ahash_request *req = ahash_request_cast(req_async); + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + + if (unlikely(err == -EINPROGRESS)) + goto out; + + err = shash_ahash_mcryptd_final(req, &rctx->desc); + if (err) { + req->base.complete = rctx->complete; + goto out; + } + + return; +out: + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); +} + +static int mcryptd_hash_final_enqueue(struct ahash_request *req) +{ + return mcryptd_hash_enqueue(req, mcryptd_hash_final); +} + +static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err) +{ + struct ahash_request *req = ahash_request_cast(req_async); + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + + if (unlikely(err == -EINPROGRESS)) + goto out; + + err = shash_ahash_mcryptd_finup(req, &rctx->desc); + + if (err) { + req->base.complete = rctx->complete; + goto out; + } + + return; +out: + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); +} + +static int mcryptd_hash_finup_enqueue(struct ahash_request *req) +{ + return mcryptd_hash_enqueue(req, mcryptd_hash_finup); +} + +static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err) +{ + struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); + struct crypto_shash *child = ctx->child; + struct ahash_request *req = ahash_request_cast(req_async); + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + struct shash_desc *desc = &rctx->desc; + + if (unlikely(err == -EINPROGRESS)) + goto out; + + desc->tfm = child; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; /* check this again */ + + err = shash_ahash_mcryptd_digest(req, desc); + + if (err) { + req->base.complete = rctx->complete; + goto out; + } + + return; +out: + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); +} + +static int mcryptd_hash_digest_enqueue(struct ahash_request *req) +{ + return mcryptd_hash_enqueue(req, mcryptd_hash_digest); +} + +static int mcryptd_hash_export(struct ahash_request *req, void *out) +{ + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + + return crypto_shash_export(&rctx->desc, out); +} + +static int mcryptd_hash_import(struct ahash_request *req, const void *in) +{ + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + + return crypto_shash_import(&rctx->desc, in); +} + +static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, + struct mcryptd_queue *queue) +{ + struct hashd_instance_ctx *ctx; + struct ahash_instance *inst; + struct shash_alg *salg; + struct crypto_alg *alg; + int err; + + salg = shash_attr_alg(tb[1], 0, 0); + if (IS_ERR(salg)) + return PTR_ERR(salg); + + alg = &salg->base; + pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name); + inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(), + sizeof(*ctx)); + err = PTR_ERR(inst); + if (IS_ERR(inst)) + goto out_put_alg; + + ctx = ahash_instance_ctx(inst); + ctx->queue = queue; + + err = crypto_init_shash_spawn(&ctx->spawn, salg, + ahash_crypto_instance(inst)); + if (err) + goto out_free_inst; + + inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC; + + inst->alg.halg.digestsize = salg->digestsize; + inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx); + + inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm; + inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm; + + inst->alg.init = mcryptd_hash_init_enqueue; + inst->alg.update = mcryptd_hash_update_enqueue; + inst->alg.final = mcryptd_hash_final_enqueue; + inst->alg.finup = mcryptd_hash_finup_enqueue; + inst->alg.export = mcryptd_hash_export; + inst->alg.import = mcryptd_hash_import; + inst->alg.setkey = mcryptd_hash_setkey; + inst->alg.digest = mcryptd_hash_digest_enqueue; + + err = ahash_register_instance(tmpl, inst); + if (err) { + crypto_drop_shash(&ctx->spawn); +out_free_inst: + kfree(inst); + } + +out_put_alg: + crypto_mod_put(alg); + return err; +} + +static struct mcryptd_queue mqueue; + +static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + struct crypto_attr_type *algt; + + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_DIGEST: + return mcryptd_create_hash(tmpl, tb, &mqueue); + break; + } + + return -EINVAL; +} + +static void mcryptd_free(struct crypto_instance *inst) +{ + struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst); + struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst); + + switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_AHASH: + crypto_drop_shash(&hctx->spawn); + kfree(ahash_instance(inst)); + return; + default: + crypto_drop_spawn(&ctx->spawn); + kfree(inst); + } +} + +static struct crypto_template mcryptd_tmpl = { + .name = "mcryptd", + .create = mcryptd_create, + .free = mcryptd_free, + .module = THIS_MODULE, +}; + +struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name, + u32 type, u32 mask) +{ + char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME]; + struct crypto_ahash *tfm; + + if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME, + "mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) + return ERR_PTR(-EINVAL); + tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask); + if (IS_ERR(tfm)) + return ERR_CAST(tfm); + if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { + crypto_free_ahash(tfm); + return ERR_PTR(-EINVAL); + } + + return __mcryptd_ahash_cast(tfm); +} +EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash); + +int shash_ahash_mcryptd_digest(struct ahash_request *req, + struct shash_desc *desc) +{ + int err; + + err = crypto_shash_init(desc) ?: + shash_ahash_mcryptd_finup(req, desc); + + return err; +} +EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_digest); + +int shash_ahash_mcryptd_update(struct ahash_request *req, + struct shash_desc *desc) +{ + struct crypto_shash *tfm = desc->tfm; + struct shash_alg *shash = crypto_shash_alg(tfm); + + /* alignment is to be done by multi-buffer crypto algorithm if needed */ + + return shash->update(desc, NULL, 0); +} +EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_update); + +int shash_ahash_mcryptd_finup(struct ahash_request *req, + struct shash_desc *desc) +{ + struct crypto_shash *tfm = desc->tfm; + struct shash_alg *shash = crypto_shash_alg(tfm); + + /* alignment is to be done by multi-buffer crypto algorithm if needed */ + + return shash->finup(desc, NULL, 0, req->result); +} +EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_finup); + +int shash_ahash_mcryptd_final(struct ahash_request *req, + struct shash_desc *desc) +{ + struct crypto_shash *tfm = desc->tfm; + struct shash_alg *shash = crypto_shash_alg(tfm); + + /* alignment is to be done by multi-buffer crypto algorithm if needed */ + + return shash->final(desc, req->result); +} +EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_final); + +struct crypto_shash *mcryptd_ahash_child(struct mcryptd_ahash *tfm) +{ + struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); + + return ctx->child; +} +EXPORT_SYMBOL_GPL(mcryptd_ahash_child); + +struct shash_desc *mcryptd_shash_desc(struct ahash_request *req) +{ + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + return &rctx->desc; +} +EXPORT_SYMBOL_GPL(mcryptd_shash_desc); + +void mcryptd_free_ahash(struct mcryptd_ahash *tfm) +{ + crypto_free_ahash(&tfm->base); +} +EXPORT_SYMBOL_GPL(mcryptd_free_ahash); + + +static int __init mcryptd_init(void) +{ + int err, cpu; + struct mcryptd_flush_list *flist; + + mcryptd_flist = alloc_percpu(struct mcryptd_flush_list); + for_each_possible_cpu(cpu) { + flist = per_cpu_ptr(mcryptd_flist, cpu); + INIT_LIST_HEAD(&flist->list); + mutex_init(&flist->lock); + } + + err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN); + if (err) { + free_percpu(mcryptd_flist); + return err; + } + + err = crypto_register_template(&mcryptd_tmpl); + if (err) { + mcryptd_fini_queue(&mqueue); + free_percpu(mcryptd_flist); + } + + return err; +} + +static void __exit mcryptd_exit(void) +{ + mcryptd_fini_queue(&mqueue); + crypto_unregister_template(&mcryptd_tmpl); + free_percpu(mcryptd_flist); +} + +subsys_initcall(mcryptd_init); +module_exit(mcryptd_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Software async multibuffer crypto daemon"); diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index 543366779524..0bb558344699 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c @@ -24,6 +24,7 @@ #include <linux/types.h> #include <crypto/sha.h> #include <asm/byteorder.h> +#include <asm/unaligned.h> static inline u32 Ch(u32 x, u32 y, u32 z) { @@ -42,7 +43,7 @@ static inline u32 Maj(u32 x, u32 y, u32 z) static inline void LOAD_OP(int I, u32 *W, const u8 *input) { - W[I] = __be32_to_cpu( ((__be32*)(input))[I] ); + W[I] = get_unaligned_be32((__u32 *)input + I); } static inline void BLEND_OP(int I, u32 *W) diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 6ed124f3ea0f..6dde57dc511b 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -20,6 +20,7 @@ #include <crypto/sha.h> #include <linux/percpu.h> #include <asm/byteorder.h> +#include <asm/unaligned.h> static inline u64 Ch(u64 x, u64 y, u64 z) { @@ -68,7 +69,7 @@ static const u64 sha512_K[80] = { static inline void LOAD_OP(int I, u64 *W, const u8 *input) { - W[I] = __be64_to_cpu( ((__be64*)(input))[I] ); + W[I] = get_unaligned_be64((__u64 *)input + I); } static inline void BLEND_OP(int I, u64 *W) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index ac2b63105afc..9459dfd7357f 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -178,9 +178,7 @@ static void testmgr_free_buf(char *buf[XBUFSIZE]) free_page((unsigned long)buf[i]); } -static int do_one_async_hash_op(struct ahash_request *req, - struct tcrypt_result *tr, - int ret) +static int wait_async_op(struct tcrypt_result *tr, int ret) { if (ret == -EINPROGRESS || ret == -EBUSY) { ret = wait_for_completion_interruptible(&tr->completion); @@ -264,30 +262,26 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, ahash_request_set_crypt(req, sg, result, template[i].psize); if (use_digest) { - ret = do_one_async_hash_op(req, &tresult, - crypto_ahash_digest(req)); + ret = wait_async_op(&tresult, crypto_ahash_digest(req)); if (ret) { pr_err("alg: hash: digest failed on test %d " "for %s: ret=%d\n", j, algo, -ret); goto out; } } else { - ret = do_one_async_hash_op(req, &tresult, - crypto_ahash_init(req)); + ret = wait_async_op(&tresult, crypto_ahash_init(req)); if (ret) { pr_err("alt: hash: init failed on test %d " "for %s: ret=%d\n", j, algo, -ret); goto out; } - ret = do_one_async_hash_op(req, &tresult, - crypto_ahash_update(req)); + ret = wait_async_op(&tresult, crypto_ahash_update(req)); if (ret) { pr_err("alt: hash: update failed on test %d " "for %s: ret=%d\n", j, algo, -ret); goto out; } - ret = do_one_async_hash_op(req, &tresult, - crypto_ahash_final(req)); + ret = wait_async_op(&tresult, crypto_ahash_final(req)); if (ret) { pr_err("alt: hash: final failed on test %d " "for %s: ret=%d\n", j, algo, -ret); @@ -311,78 +305,75 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, if (align_offset != 0) break; - if (template[i].np) { - j++; - memset(result, 0, MAX_DIGEST_SIZE); + if (!template[i].np) + continue; - temp = 0; - sg_init_table(sg, template[i].np); - ret = -EINVAL; - for (k = 0; k < template[i].np; k++) { - if (WARN_ON(offset_in_page(IDX[k]) + - template[i].tap[k] > PAGE_SIZE)) - goto out; - sg_set_buf(&sg[k], - memcpy(xbuf[IDX[k] >> PAGE_SHIFT] + - offset_in_page(IDX[k]), - template[i].plaintext + temp, - template[i].tap[k]), - template[i].tap[k]); - temp += template[i].tap[k]; - } - - if (template[i].ksize) { - if (template[i].ksize > MAX_KEYLEN) { - pr_err("alg: hash: setkey failed on test %d for %s: key size %d > %d\n", - j, algo, template[i].ksize, - MAX_KEYLEN); - ret = -EINVAL; - goto out; - } - crypto_ahash_clear_flags(tfm, ~0); - memcpy(key, template[i].key, template[i].ksize); - ret = crypto_ahash_setkey(tfm, key, - template[i].ksize); - - if (ret) { - printk(KERN_ERR "alg: hash: setkey " - "failed on chunking test %d " - "for %s: ret=%d\n", j, algo, - -ret); - goto out; - } - } - - ahash_request_set_crypt(req, sg, result, - template[i].psize); - ret = crypto_ahash_digest(req); - switch (ret) { - case 0: - break; - case -EINPROGRESS: - case -EBUSY: - ret = wait_for_completion_interruptible( - &tresult.completion); - if (!ret && !(ret = tresult.err)) { - reinit_completion(&tresult.completion); - break; - } - /* fall through */ - default: - printk(KERN_ERR "alg: hash: digest failed " - "on chunking test %d for %s: " - "ret=%d\n", j, algo, -ret); + j++; + memset(result, 0, MAX_DIGEST_SIZE); + + temp = 0; + sg_init_table(sg, template[i].np); + ret = -EINVAL; + for (k = 0; k < template[i].np; k++) { + if (WARN_ON(offset_in_page(IDX[k]) + + template[i].tap[k] > PAGE_SIZE)) goto out; - } + sg_set_buf(&sg[k], + memcpy(xbuf[IDX[k] >> PAGE_SHIFT] + + offset_in_page(IDX[k]), + template[i].plaintext + temp, + template[i].tap[k]), + template[i].tap[k]); + temp += template[i].tap[k]; + } - if (memcmp(result, template[i].digest, - crypto_ahash_digestsize(tfm))) { - printk(KERN_ERR "alg: hash: Chunking test %d " - "failed for %s\n", j, algo); - hexdump(result, crypto_ahash_digestsize(tfm)); + if (template[i].ksize) { + if (template[i].ksize > MAX_KEYLEN) { + pr_err("alg: hash: setkey failed on test %d for %s: key size %d > %d\n", + j, algo, template[i].ksize, MAX_KEYLEN); ret = -EINVAL; goto out; } + crypto_ahash_clear_flags(tfm, ~0); + memcpy(key, template[i].key, template[i].ksize); + ret = crypto_ahash_setkey(tfm, key, template[i].ksize); + + if (ret) { + printk(KERN_ERR "alg: hash: setkey " + "failed on chunking test %d " + "for %s: ret=%d\n", j, algo, -ret); + goto out; + } + } + + ahash_request_set_crypt(req, sg, result, template[i].psize); + ret = crypto_ahash_digest(req); + switch (ret) { + case 0: + break; + case -EINPROGRESS: + case -EBUSY: + ret = wait_for_completion_interruptible( + &tresult.completion); + if (!ret && !(ret = tresult.err)) { + reinit_completion(&tresult.completion); + break; + } + /* fall through */ + default: + printk(KERN_ERR "alg: hash: digest failed " + "on chunking test %d for %s: " + "ret=%d\n", j, algo, -ret); + goto out; + } + + if (memcmp(result, template[i].digest, + crypto_ahash_digestsize(tfm))) { + printk(KERN_ERR "alg: hash: Chunking test %d " + "failed for %s\n", j, algo); + hexdump(result, crypto_ahash_digestsize(tfm)); + ret = -EINVAL; + goto out; } } @@ -492,121 +483,116 @@ static int __test_aead(struct crypto_aead *tfm, int enc, tcrypt_complete, &result); for (i = 0, j = 0; i < tcount; i++) { - if (!template[i].np) { - j++; + if (template[i].np) + continue; - /* some templates have no input data but they will - * touch input - */ - input = xbuf[0]; - input += align_offset; - assoc = axbuf[0]; + j++; - ret = -EINVAL; - if (WARN_ON(align_offset + template[i].ilen > - PAGE_SIZE || template[i].alen > PAGE_SIZE)) - goto out; + /* some templates have no input data but they will + * touch input + */ + input = xbuf[0]; + input += align_offset; + assoc = axbuf[0]; - memcpy(input, template[i].input, template[i].ilen); - memcpy(assoc, template[i].assoc, template[i].alen); - if (template[i].iv) - memcpy(iv, template[i].iv, MAX_IVLEN); - else - memset(iv, 0, MAX_IVLEN); + ret = -EINVAL; + if (WARN_ON(align_offset + template[i].ilen > + PAGE_SIZE || template[i].alen > PAGE_SIZE)) + goto out; - crypto_aead_clear_flags(tfm, ~0); - if (template[i].wk) - crypto_aead_set_flags( - tfm, CRYPTO_TFM_REQ_WEAK_KEY); + memcpy(input, template[i].input, template[i].ilen); + memcpy(assoc, template[i].assoc, template[i].alen); + if (template[i].iv) + memcpy(iv, template[i].iv, MAX_IVLEN); + else + memset(iv, 0, MAX_IVLEN); - if (template[i].klen > MAX_KEYLEN) { - pr_err("alg: aead%s: setkey failed on test %d for %s: key size %d > %d\n", - d, j, algo, template[i].klen, - MAX_KEYLEN); - ret = -EINVAL; - goto out; - } - memcpy(key, template[i].key, template[i].klen); + crypto_aead_clear_flags(tfm, ~0); + if (template[i].wk) + crypto_aead_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); - ret = crypto_aead_setkey(tfm, key, - template[i].klen); - if (!ret == template[i].fail) { - pr_err("alg: aead%s: setkey failed on test %d for %s: flags=%x\n", - d, j, algo, crypto_aead_get_flags(tfm)); - goto out; - } else if (ret) - continue; + if (template[i].klen > MAX_KEYLEN) { + pr_err("alg: aead%s: setkey failed on test %d for %s: key size %d > %d\n", + d, j, algo, template[i].klen, + MAX_KEYLEN); + ret = -EINVAL; + goto out; + } + memcpy(key, template[i].key, template[i].klen); - authsize = abs(template[i].rlen - template[i].ilen); - ret = crypto_aead_setauthsize(tfm, authsize); - if (ret) { - pr_err("alg: aead%s: Failed to set authsize to %u on test %d for %s\n", - d, authsize, j, algo); - goto out; - } + ret = crypto_aead_setkey(tfm, key, template[i].klen); + if (!ret == template[i].fail) { + pr_err("alg: aead%s: setkey failed on test %d for %s: flags=%x\n", + d, j, algo, crypto_aead_get_flags(tfm)); + goto out; + } else if (ret) + continue; - if (diff_dst) { - output = xoutbuf[0]; - output += align_offset; - sg_init_one(&sg[0], input, template[i].ilen); - sg_init_one(&sgout[0], output, - template[i].rlen); - } else { - sg_init_one(&sg[0], input, - template[i].ilen + - (enc ? authsize : 0)); - output = input; - } + authsize = abs(template[i].rlen - template[i].ilen); + ret = crypto_aead_setauthsize(tfm, authsize); + if (ret) { + pr_err("alg: aead%s: Failed to set authsize to %u on test %d for %s\n", + d, authsize, j, algo); + goto out; + } - sg_init_one(&asg[0], assoc, template[i].alen); + if (diff_dst) { + output = xoutbuf[0]; + output += align_offset; + sg_init_one(&sg[0], input, template[i].ilen); + sg_init_one(&sgout[0], output, template[i].rlen); + } else { + sg_init_one(&sg[0], input, + template[i].ilen + (enc ? authsize : 0)); + output = input; + } - aead_request_set_crypt(req, sg, (diff_dst) ? sgout : sg, - template[i].ilen, iv); + sg_init_one(&asg[0], assoc, template[i].alen); - aead_request_set_assoc(req, asg, template[i].alen); + aead_request_set_crypt(req, sg, (diff_dst) ? sgout : sg, + template[i].ilen, iv); - ret = enc ? - crypto_aead_encrypt(req) : - crypto_aead_decrypt(req); + aead_request_set_assoc(req, asg, template[i].alen); - switch (ret) { - case 0: - if (template[i].novrfy) { - /* verification was supposed to fail */ - pr_err("alg: aead%s: %s failed on test %d for %s: ret was 0, expected -EBADMSG\n", - d, e, j, algo); - /* so really, we got a bad message */ - ret = -EBADMSG; - goto out; - } - break; - case -EINPROGRESS: - case -EBUSY: - ret = wait_for_completion_interruptible( - &result.completion); - if (!ret && !(ret = result.err)) { - reinit_completion(&result.completion); - break; - } - case -EBADMSG: - if (template[i].novrfy) - /* verification failure was expected */ - continue; - /* fall through */ - default: - pr_err("alg: aead%s: %s failed on test %d for %s: ret=%d\n", - d, e, j, algo, -ret); - goto out; - } + ret = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req); - q = output; - if (memcmp(q, template[i].result, template[i].rlen)) { - pr_err("alg: aead%s: Test %d failed on %s for %s\n", - d, j, e, algo); - hexdump(q, template[i].rlen); - ret = -EINVAL; + switch (ret) { + case 0: + if (template[i].novrfy) { + /* verification was supposed to fail */ + pr_err("alg: aead%s: %s failed on test %d for %s: ret was 0, expected -EBADMSG\n", + d, e, j, algo); + /* so really, we got a bad message */ + ret = -EBADMSG; goto out; } + break; + case -EINPROGRESS: + case -EBUSY: + ret = wait_for_completion_interruptible( + &result.completion); + if (!ret && !(ret = result.err)) { + reinit_completion(&result.completion); + break; + } + case -EBADMSG: + if (template[i].novrfy) + /* verification failure was expected */ + continue; + /* fall through */ + default: + pr_err("alg: aead%s: %s failed on test %d for %s: ret=%d\n", + d, e, j, algo, -ret); + goto out; + } + + q = output; + if (memcmp(q, template[i].result, template[i].rlen)) { + pr_err("alg: aead%s: Test %d failed on %s for %s\n", + d, j, e, algo); + hexdump(q, template[i].rlen); + ret = -EINVAL; + goto out; } } @@ -615,191 +601,182 @@ static int __test_aead(struct crypto_aead *tfm, int enc, if (align_offset != 0) break; - if (template[i].np) { - j++; - - if (template[i].iv) - memcpy(iv, template[i].iv, MAX_IVLEN); - else - memset(iv, 0, MAX_IVLEN); - - crypto_aead_clear_flags(tfm, ~0); - if (template[i].wk) - crypto_aead_set_flags( - tfm, CRYPTO_TFM_REQ_WEAK_KEY); - if (template[i].klen > MAX_KEYLEN) { - pr_err("alg: aead%s: setkey failed on test %d for %s: key size %d > %d\n", - d, j, algo, template[i].klen, - MAX_KEYLEN); - ret = -EINVAL; - goto out; - } - memcpy(key, template[i].key, template[i].klen); + if (!template[i].np) + continue; - ret = crypto_aead_setkey(tfm, key, template[i].klen); - if (!ret == template[i].fail) { - pr_err("alg: aead%s: setkey failed on chunk test %d for %s: flags=%x\n", - d, j, algo, crypto_aead_get_flags(tfm)); - goto out; - } else if (ret) - continue; + j++; - authsize = abs(template[i].rlen - template[i].ilen); + if (template[i].iv) + memcpy(iv, template[i].iv, MAX_IVLEN); + else + memset(iv, 0, MAX_IVLEN); + crypto_aead_clear_flags(tfm, ~0); + if (template[i].wk) + crypto_aead_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); + if (template[i].klen > MAX_KEYLEN) { + pr_err("alg: aead%s: setkey failed on test %d for %s: key size %d > %d\n", + d, j, algo, template[i].klen, MAX_KEYLEN); ret = -EINVAL; - sg_init_table(sg, template[i].np); - if (diff_dst) - sg_init_table(sgout, template[i].np); - for (k = 0, temp = 0; k < template[i].np; k++) { - if (WARN_ON(offset_in_page(IDX[k]) + - template[i].tap[k] > PAGE_SIZE)) - goto out; - - q = xbuf[IDX[k] >> PAGE_SHIFT] + - offset_in_page(IDX[k]); + goto out; + } + memcpy(key, template[i].key, template[i].klen); - memcpy(q, template[i].input + temp, - template[i].tap[k]); + ret = crypto_aead_setkey(tfm, key, template[i].klen); + if (!ret == template[i].fail) { + pr_err("alg: aead%s: setkey failed on chunk test %d for %s: flags=%x\n", + d, j, algo, crypto_aead_get_flags(tfm)); + goto out; + } else if (ret) + continue; - sg_set_buf(&sg[k], q, template[i].tap[k]); + authsize = abs(template[i].rlen - template[i].ilen); - if (diff_dst) { - q = xoutbuf[IDX[k] >> PAGE_SHIFT] + - offset_in_page(IDX[k]); + ret = -EINVAL; + sg_init_table(sg, template[i].np); + if (diff_dst) + sg_init_table(sgout, template[i].np); + for (k = 0, temp = 0; k < template[i].np; k++) { + if (WARN_ON(offset_in_page(IDX[k]) + + template[i].tap[k] > PAGE_SIZE)) + goto out; - memset(q, 0, template[i].tap[k]); + q = xbuf[IDX[k] >> PAGE_SHIFT] + offset_in_page(IDX[k]); + memcpy(q, template[i].input + temp, template[i].tap[k]); + sg_set_buf(&sg[k], q, template[i].tap[k]); - sg_set_buf(&sgout[k], q, - template[i].tap[k]); - } + if (diff_dst) { + q = xoutbuf[IDX[k] >> PAGE_SHIFT] + + offset_in_page(IDX[k]); - n = template[i].tap[k]; - if (k == template[i].np - 1 && enc) - n += authsize; - if (offset_in_page(q) + n < PAGE_SIZE) - q[n] = 0; + memset(q, 0, template[i].tap[k]); - temp += template[i].tap[k]; + sg_set_buf(&sgout[k], q, template[i].tap[k]); } - ret = crypto_aead_setauthsize(tfm, authsize); - if (ret) { - pr_err("alg: aead%s: Failed to set authsize to %u on chunk test %d for %s\n", - d, authsize, j, algo); + n = template[i].tap[k]; + if (k == template[i].np - 1 && enc) + n += authsize; + if (offset_in_page(q) + n < PAGE_SIZE) + q[n] = 0; + + temp += template[i].tap[k]; + } + + ret = crypto_aead_setauthsize(tfm, authsize); + if (ret) { + pr_err("alg: aead%s: Failed to set authsize to %u on chunk test %d for %s\n", + d, authsize, j, algo); + goto out; + } + + if (enc) { + if (WARN_ON(sg[k - 1].offset + + sg[k - 1].length + authsize > + PAGE_SIZE)) { + ret = -EINVAL; goto out; } - if (enc) { - if (WARN_ON(sg[k - 1].offset + - sg[k - 1].length + authsize > - PAGE_SIZE)) { - ret = -EINVAL; - goto out; - } + if (diff_dst) + sgout[k - 1].length += authsize; + else + sg[k - 1].length += authsize; + } - if (diff_dst) - sgout[k - 1].length += authsize; - else - sg[k - 1].length += authsize; + sg_init_table(asg, template[i].anp); + ret = -EINVAL; + for (k = 0, temp = 0; k < template[i].anp; k++) { + if (WARN_ON(offset_in_page(IDX[k]) + + template[i].atap[k] > PAGE_SIZE)) + goto out; + sg_set_buf(&asg[k], + memcpy(axbuf[IDX[k] >> PAGE_SHIFT] + + offset_in_page(IDX[k]), + template[i].assoc + temp, + template[i].atap[k]), + template[i].atap[k]); + temp += template[i].atap[k]; + } + + aead_request_set_crypt(req, sg, (diff_dst) ? sgout : sg, + template[i].ilen, + iv); + + aead_request_set_assoc(req, asg, template[i].alen); + + ret = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req); + + switch (ret) { + case 0: + if (template[i].novrfy) { + /* verification was supposed to fail */ + pr_err("alg: aead%s: %s failed on chunk test %d for %s: ret was 0, expected -EBADMSG\n", + d, e, j, algo); + /* so really, we got a bad message */ + ret = -EBADMSG; + goto out; } - - sg_init_table(asg, template[i].anp); - ret = -EINVAL; - for (k = 0, temp = 0; k < template[i].anp; k++) { - if (WARN_ON(offset_in_page(IDX[k]) + - template[i].atap[k] > PAGE_SIZE)) - goto out; - sg_set_buf(&asg[k], - memcpy(axbuf[IDX[k] >> PAGE_SHIFT] + - offset_in_page(IDX[k]), - template[i].assoc + temp, - template[i].atap[k]), - template[i].atap[k]); - temp += template[i].atap[k]; - } - - aead_request_set_crypt(req, sg, (diff_dst) ? sgout : sg, - template[i].ilen, - iv); - - aead_request_set_assoc(req, asg, template[i].alen); - - ret = enc ? - crypto_aead_encrypt(req) : - crypto_aead_decrypt(req); - - switch (ret) { - case 0: - if (template[i].novrfy) { - /* verification was supposed to fail */ - pr_err("alg: aead%s: %s failed on chunk test %d for %s: ret was 0, expected -EBADMSG\n", - d, e, j, algo); - /* so really, we got a bad message */ - ret = -EBADMSG; - goto out; - } + break; + case -EINPROGRESS: + case -EBUSY: + ret = wait_for_completion_interruptible( + &result.completion); + if (!ret && !(ret = result.err)) { + reinit_completion(&result.completion); break; - case -EINPROGRESS: - case -EBUSY: - ret = wait_for_completion_interruptible( - &result.completion); - if (!ret && !(ret = result.err)) { - reinit_completion(&result.completion); - break; - } - case -EBADMSG: - if (template[i].novrfy) - /* verification failure was expected */ - continue; - /* fall through */ - default: - pr_err("alg: aead%s: %s failed on chunk test %d for %s: ret=%d\n", - d, e, j, algo, -ret); - goto out; } + case -EBADMSG: + if (template[i].novrfy) + /* verification failure was expected */ + continue; + /* fall through */ + default: + pr_err("alg: aead%s: %s failed on chunk test %d for %s: ret=%d\n", + d, e, j, algo, -ret); + goto out; + } - ret = -EINVAL; - for (k = 0, temp = 0; k < template[i].np; k++) { - if (diff_dst) - q = xoutbuf[IDX[k] >> PAGE_SHIFT] + - offset_in_page(IDX[k]); - else - q = xbuf[IDX[k] >> PAGE_SHIFT] + - offset_in_page(IDX[k]); - - n = template[i].tap[k]; - if (k == template[i].np - 1) - n += enc ? authsize : -authsize; + ret = -EINVAL; + for (k = 0, temp = 0; k < template[i].np; k++) { + if (diff_dst) + q = xoutbuf[IDX[k] >> PAGE_SHIFT] + + offset_in_page(IDX[k]); + else + q = xbuf[IDX[k] >> PAGE_SHIFT] + + offset_in_page(IDX[k]); - if (memcmp(q, template[i].result + temp, n)) { - pr_err("alg: aead%s: Chunk test %d failed on %s at page %u for %s\n", - d, j, e, k, algo); - hexdump(q, n); - goto out; - } + n = template[i].tap[k]; + if (k == template[i].np - 1) + n += enc ? authsize : -authsize; - q += n; - if (k == template[i].np - 1 && !enc) { - if (!diff_dst && - memcmp(q, template[i].input + - temp + n, authsize)) - n = authsize; - else - n = 0; - } else { - for (n = 0; offset_in_page(q + n) && - q[n]; n++) - ; - } - if (n) { - pr_err("alg: aead%s: Result buffer corruption in chunk test %d on %s at page %u for %s: %u bytes:\n", - d, j, e, k, algo, n); - hexdump(q, n); - goto out; - } + if (memcmp(q, template[i].result + temp, n)) { + pr_err("alg: aead%s: Chunk test %d failed on %s at page %u for %s\n", + d, j, e, k, algo); + hexdump(q, n); + goto out; + } - temp += template[i].tap[k]; + q += n; + if (k == template[i].np - 1 && !enc) { + if (!diff_dst && + memcmp(q, template[i].input + + temp + n, authsize)) + n = authsize; + else + n = 0; + } else { + for (n = 0; offset_in_page(q + n) && q[n]; n++) + ; + } + if (n) { + pr_err("alg: aead%s: Result buffer corruption in chunk test %d on %s at page %u for %s: %u bytes:\n", + d, j, e, k, algo, n); + hexdump(q, n); + goto out; } + + temp += template[i].tap[k]; } } @@ -978,78 +955,73 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, j = 0; for (i = 0; i < tcount; i++) { + if (template[i].np && !template[i].also_non_np) + continue; + if (template[i].iv) memcpy(iv, template[i].iv, MAX_IVLEN); else memset(iv, 0, MAX_IVLEN); - if (!(template[i].np) || (template[i].also_non_np)) { - j++; + j++; + ret = -EINVAL; + if (WARN_ON(align_offset + template[i].ilen > PAGE_SIZE)) + goto out; - ret = -EINVAL; - if (WARN_ON(align_offset + template[i].ilen > - PAGE_SIZE)) - goto out; + data = xbuf[0]; + data += align_offset; + memcpy(data, template[i].input, template[i].ilen); - data = xbuf[0]; - data += align_offset; - memcpy(data, template[i].input, template[i].ilen); - - crypto_ablkcipher_clear_flags(tfm, ~0); - if (template[i].wk) - crypto_ablkcipher_set_flags( - tfm, CRYPTO_TFM_REQ_WEAK_KEY); - - ret = crypto_ablkcipher_setkey(tfm, template[i].key, - template[i].klen); - if (!ret == template[i].fail) { - pr_err("alg: skcipher%s: setkey failed on test %d for %s: flags=%x\n", - d, j, algo, - crypto_ablkcipher_get_flags(tfm)); - goto out; - } else if (ret) - continue; + crypto_ablkcipher_clear_flags(tfm, ~0); + if (template[i].wk) + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); - sg_init_one(&sg[0], data, template[i].ilen); - if (diff_dst) { - data = xoutbuf[0]; - data += align_offset; - sg_init_one(&sgout[0], data, template[i].ilen); - } + ret = crypto_ablkcipher_setkey(tfm, template[i].key, + template[i].klen); + if (!ret == template[i].fail) { + pr_err("alg: skcipher%s: setkey failed on test %d for %s: flags=%x\n", + d, j, algo, crypto_ablkcipher_get_flags(tfm)); + goto out; + } else if (ret) + continue; + + sg_init_one(&sg[0], data, template[i].ilen); + if (diff_dst) { + data = xoutbuf[0]; + data += align_offset; + sg_init_one(&sgout[0], data, template[i].ilen); + } - ablkcipher_request_set_crypt(req, sg, - (diff_dst) ? sgout : sg, - template[i].ilen, iv); - ret = enc ? - crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); + ablkcipher_request_set_crypt(req, sg, (diff_dst) ? sgout : sg, + template[i].ilen, iv); + ret = enc ? crypto_ablkcipher_encrypt(req) : + crypto_ablkcipher_decrypt(req); - switch (ret) { - case 0: + switch (ret) { + case 0: + break; + case -EINPROGRESS: + case -EBUSY: + ret = wait_for_completion_interruptible( + &result.completion); + if (!ret && !((ret = result.err))) { + reinit_completion(&result.completion); break; - case -EINPROGRESS: - case -EBUSY: - ret = wait_for_completion_interruptible( - &result.completion); - if (!ret && !((ret = result.err))) { - reinit_completion(&result.completion); - break; - } - /* fall through */ - default: - pr_err("alg: skcipher%s: %s failed on test %d for %s: ret=%d\n", - d, e, j, algo, -ret); - goto out; } + /* fall through */ + default: + pr_err("alg: skcipher%s: %s failed on test %d for %s: ret=%d\n", + d, e, j, algo, -ret); + goto out; + } - q = data; - if (memcmp(q, template[i].result, template[i].rlen)) { - pr_err("alg: skcipher%s: Test %d failed on %s for %s\n", - d, j, e, algo); - hexdump(q, template[i].rlen); - ret = -EINVAL; - goto out; - } + q = data; + if (memcmp(q, template[i].result, template[i].rlen)) { + pr_err("alg: skcipher%s: Test %d failed on %s for %s\n", + d, j, e, algo); + hexdump(q, template[i].rlen); + ret = -EINVAL; + goto out; } } @@ -1059,121 +1031,113 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, if (align_offset != 0) break; + if (!template[i].np) + continue; + if (template[i].iv) memcpy(iv, template[i].iv, MAX_IVLEN); else memset(iv, 0, MAX_IVLEN); - if (template[i].np) { - j++; + j++; + crypto_ablkcipher_clear_flags(tfm, ~0); + if (template[i].wk) + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); - crypto_ablkcipher_clear_flags(tfm, ~0); - if (template[i].wk) - crypto_ablkcipher_set_flags( - tfm, CRYPTO_TFM_REQ_WEAK_KEY); + ret = crypto_ablkcipher_setkey(tfm, template[i].key, + template[i].klen); + if (!ret == template[i].fail) { + pr_err("alg: skcipher%s: setkey failed on chunk test %d for %s: flags=%x\n", + d, j, algo, crypto_ablkcipher_get_flags(tfm)); + goto out; + } else if (ret) + continue; - ret = crypto_ablkcipher_setkey(tfm, template[i].key, - template[i].klen); - if (!ret == template[i].fail) { - pr_err("alg: skcipher%s: setkey failed on chunk test %d for %s: flags=%x\n", - d, j, algo, - crypto_ablkcipher_get_flags(tfm)); + temp = 0; + ret = -EINVAL; + sg_init_table(sg, template[i].np); + if (diff_dst) + sg_init_table(sgout, template[i].np); + for (k = 0; k < template[i].np; k++) { + if (WARN_ON(offset_in_page(IDX[k]) + + template[i].tap[k] > PAGE_SIZE)) goto out; - } else if (ret) - continue; - temp = 0; - ret = -EINVAL; - sg_init_table(sg, template[i].np); - if (diff_dst) - sg_init_table(sgout, template[i].np); - for (k = 0; k < template[i].np; k++) { - if (WARN_ON(offset_in_page(IDX[k]) + - template[i].tap[k] > PAGE_SIZE)) - goto out; + q = xbuf[IDX[k] >> PAGE_SHIFT] + offset_in_page(IDX[k]); - q = xbuf[IDX[k] >> PAGE_SHIFT] + + memcpy(q, template[i].input + temp, template[i].tap[k]); + + if (offset_in_page(q) + template[i].tap[k] < PAGE_SIZE) + q[template[i].tap[k]] = 0; + + sg_set_buf(&sg[k], q, template[i].tap[k]); + if (diff_dst) { + q = xoutbuf[IDX[k] >> PAGE_SHIFT] + offset_in_page(IDX[k]); - memcpy(q, template[i].input + temp, - template[i].tap[k]); + sg_set_buf(&sgout[k], q, template[i].tap[k]); - if (offset_in_page(q) + template[i].tap[k] < - PAGE_SIZE) + memset(q, 0, template[i].tap[k]); + if (offset_in_page(q) + + template[i].tap[k] < PAGE_SIZE) q[template[i].tap[k]] = 0; + } - sg_set_buf(&sg[k], q, template[i].tap[k]); - if (diff_dst) { - q = xoutbuf[IDX[k] >> PAGE_SHIFT] + - offset_in_page(IDX[k]); + temp += template[i].tap[k]; + } - sg_set_buf(&sgout[k], q, - template[i].tap[k]); + ablkcipher_request_set_crypt(req, sg, (diff_dst) ? sgout : sg, + template[i].ilen, iv); - memset(q, 0, template[i].tap[k]); - if (offset_in_page(q) + - template[i].tap[k] < PAGE_SIZE) - q[template[i].tap[k]] = 0; - } + ret = enc ? crypto_ablkcipher_encrypt(req) : + crypto_ablkcipher_decrypt(req); - temp += template[i].tap[k]; + switch (ret) { + case 0: + break; + case -EINPROGRESS: + case -EBUSY: + ret = wait_for_completion_interruptible( + &result.completion); + if (!ret && !((ret = result.err))) { + reinit_completion(&result.completion); + break; } + /* fall through */ + default: + pr_err("alg: skcipher%s: %s failed on chunk test %d for %s: ret=%d\n", + d, e, j, algo, -ret); + goto out; + } - ablkcipher_request_set_crypt(req, sg, - (diff_dst) ? sgout : sg, - template[i].ilen, iv); - - ret = enc ? - crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); + temp = 0; + ret = -EINVAL; + for (k = 0; k < template[i].np; k++) { + if (diff_dst) + q = xoutbuf[IDX[k] >> PAGE_SHIFT] + + offset_in_page(IDX[k]); + else + q = xbuf[IDX[k] >> PAGE_SHIFT] + + offset_in_page(IDX[k]); - switch (ret) { - case 0: - break; - case -EINPROGRESS: - case -EBUSY: - ret = wait_for_completion_interruptible( - &result.completion); - if (!ret && !((ret = result.err))) { - reinit_completion(&result.completion); - break; - } - /* fall through */ - default: - pr_err("alg: skcipher%s: %s failed on chunk test %d for %s: ret=%d\n", - d, e, j, algo, -ret); + if (memcmp(q, template[i].result + temp, + template[i].tap[k])) { + pr_err("alg: skcipher%s: Chunk test %d failed on %s at page %u for %s\n", + d, j, e, k, algo); + hexdump(q, template[i].tap[k]); goto out; } - temp = 0; - ret = -EINVAL; - for (k = 0; k < template[i].np; k++) { - if (diff_dst) - q = xoutbuf[IDX[k] >> PAGE_SHIFT] + - offset_in_page(IDX[k]); - else - q = xbuf[IDX[k] >> PAGE_SHIFT] + - offset_in_page(IDX[k]); - - if (memcmp(q, template[i].result + temp, - template[i].tap[k])) { - pr_err("alg: skcipher%s: Chunk test %d failed on %s at page %u for %s\n", - d, j, e, k, algo); - hexdump(q, template[i].tap[k]); - goto out; - } - - q += template[i].tap[k]; - for (n = 0; offset_in_page(q + n) && q[n]; n++) - ; - if (n) { - pr_err("alg: skcipher%s: Result buffer corruption in chunk test %d on %s at page %u for %s: %u bytes:\n", - d, j, e, k, algo, n); - hexdump(q, n); - goto out; - } - temp += template[i].tap[k]; + q += template[i].tap[k]; + for (n = 0; offset_in_page(q + n) && q[n]; n++) + ; + if (n) { + pr_err("alg: skcipher%s: Result buffer corruption in chunk test %d on %s at page %u for %s: %u bytes:\n", + d, j, e, k, algo, n); + hexdump(q, n); + goto out; } + temp += template[i].tap[k]; } } @@ -3213,6 +3177,38 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "lz4", + .test = alg_test_comp, + .fips_allowed = 1, + .suite = { + .comp = { + .comp = { + .vecs = lz4_comp_tv_template, + .count = LZ4_COMP_TEST_VECTORS + }, + .decomp = { + .vecs = lz4_decomp_tv_template, + .count = LZ4_DECOMP_TEST_VECTORS + } + } + } + }, { + .alg = "lz4hc", + .test = alg_test_comp, + .fips_allowed = 1, + .suite = { + .comp = { + .comp = { + .vecs = lz4hc_comp_tv_template, + .count = LZ4HC_COMP_TEST_VECTORS + }, + .decomp = { + .vecs = lz4hc_decomp_tv_template, + .count = LZ4HC_DECOMP_TEST_VECTORS + } + } + } + }, { .alg = "lzo", .test = alg_test_comp, .fips_allowed = 1, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 6597203eccfa..62e2485bb428 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -29473,4 +29473,70 @@ static struct hash_testvec bfin_crc_tv_template[] = { }; +#define LZ4_COMP_TEST_VECTORS 1 +#define LZ4_DECOMP_TEST_VECTORS 1 + +static struct comp_testvec lz4_comp_tv_template[] = { + { + .inlen = 70, + .outlen = 45, + .input = "Join us now and share the software " + "Join us now and share the software ", + .output = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75" + "\x73\x20\x6e\x6f\x77\x20\x61\x6e" + "\x64\x20\x73\x68\x61\x72\x65\x20" + "\x74\x68\x65\x20\x73\x6f\x66\x74" + "\x77\x0d\x00\x0f\x23\x00\x0b\x50" + "\x77\x61\x72\x65\x20", + }, +}; + +static struct comp_testvec lz4_decomp_tv_template[] = { + { + .inlen = 45, + .outlen = 70, + .input = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75" + "\x73\x20\x6e\x6f\x77\x20\x61\x6e" + "\x64\x20\x73\x68\x61\x72\x65\x20" + "\x74\x68\x65\x20\x73\x6f\x66\x74" + "\x77\x0d\x00\x0f\x23\x00\x0b\x50" + "\x77\x61\x72\x65\x20", + .output = "Join us now and share the software " + "Join us now and share the software ", + }, +}; + +#define LZ4HC_COMP_TEST_VECTORS 1 +#define LZ4HC_DECOMP_TEST_VECTORS 1 + +static struct comp_testvec lz4hc_comp_tv_template[] = { + { + .inlen = 70, + .outlen = 45, + .input = "Join us now and share the software " + "Join us now and share the software ", + .output = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75" + "\x73\x20\x6e\x6f\x77\x20\x61\x6e" + "\x64\x20\x73\x68\x61\x72\x65\x20" + "\x74\x68\x65\x20\x73\x6f\x66\x74" + "\x77\x0d\x00\x0f\x23\x00\x0b\x50" + "\x77\x61\x72\x65\x20", + }, +}; + +static struct comp_testvec lz4hc_decomp_tv_template[] = { + { + .inlen = 45, + .outlen = 70, + .input = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75" + "\x73\x20\x6e\x6f\x77\x20\x61\x6e" + "\x64\x20\x73\x68\x61\x72\x65\x20" + "\x74\x68\x65\x20\x73\x6f\x66\x74" + "\x77\x0d\x00\x0f\x23\x00\x0b\x50" + "\x77\x61\x72\x65\x20", + .output = "Join us now and share the software " + "Join us now and share the software ", + }, +}; + #endif /* _CRYPTO_TESTMGR_H */ diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 836b061ced35..91a04ae8003c 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -333,6 +333,19 @@ config HW_RANDOM_MSM If unsure, say Y. +config HW_RANDOM_XGENE + tristate "APM X-Gene True Random Number Generator (TRNG) support" + depends on HW_RANDOM && ARCH_XGENE + default HW_RANDOM + ---help--- + This driver provides kernel-side support for the Random Number + Generator hardware found on APM X-Gene SoC. + + To compile this driver as a module, choose M here: the + module will be called xgene_rng. + + If unsure, say Y. + endif # HW_RANDOM config UML_RANDOM diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index 199ed283e149..0b4cd57f4e24 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile @@ -29,3 +29,4 @@ obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o obj-$(CONFIG_HW_RANDOM_MSM) += msm-rng.o +obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c index c6af038682f1..48f6a83cdd61 100644 --- a/drivers/char/hw_random/amd-rng.c +++ b/drivers/char/hw_random/amd-rng.c @@ -142,10 +142,10 @@ found: amd_rng.priv = (unsigned long)pmbase; amd_pdev = pdev; - printk(KERN_INFO "AMD768 RNG detected\n"); + pr_info("AMD768 RNG detected\n"); err = hwrng_register(&amd_rng); if (err) { - printk(KERN_ERR PFX "RNG registering failed (%d)\n", + pr_err(PFX "RNG registering failed (%d)\n", err); release_region(pmbase + 0xF0, 8); goto out; diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c index 4c4d4e140f98..0d0579fe465e 100644 --- a/drivers/char/hw_random/geode-rng.c +++ b/drivers/char/hw_random/geode-rng.c @@ -109,10 +109,10 @@ found: goto out; geode_rng.priv = (unsigned long)mem; - printk(KERN_INFO "AMD Geode RNG detected\n"); + pr_info("AMD Geode RNG detected\n"); err = hwrng_register(&geode_rng); if (err) { - printk(KERN_ERR PFX "RNG registering failed (%d)\n", + pr_err(PFX "RNG registering failed (%d)\n", err); goto err_unmap; } diff --git a/drivers/char/hw_random/intel-rng.c b/drivers/char/hw_random/intel-rng.c index 86fe45c19968..290c880266bf 100644 --- a/drivers/char/hw_random/intel-rng.c +++ b/drivers/char/hw_random/intel-rng.c @@ -199,7 +199,7 @@ static int intel_rng_init(struct hwrng *rng) if ((hw_status & INTEL_RNG_ENABLED) == 0) hw_status = hwstatus_set(mem, hw_status | INTEL_RNG_ENABLED); if ((hw_status & INTEL_RNG_ENABLED) == 0) { - printk(KERN_ERR PFX "cannot enable RNG, aborting\n"); + pr_err(PFX "cannot enable RNG, aborting\n"); goto out; } err = 0; @@ -216,7 +216,7 @@ static void intel_rng_cleanup(struct hwrng *rng) if (hw_status & INTEL_RNG_ENABLED) hwstatus_set(mem, hw_status & ~INTEL_RNG_ENABLED); else - printk(KERN_WARNING PFX "unusual: RNG already disabled\n"); + pr_warn(PFX "unusual: RNG already disabled\n"); } @@ -274,7 +274,7 @@ static int __init intel_rng_hw_init(void *_intel_rng_hw) if (mfc != INTEL_FWH_MANUFACTURER_CODE || (dvc != INTEL_FWH_DEVICE_CODE_8M && dvc != INTEL_FWH_DEVICE_CODE_4M)) { - printk(KERN_NOTICE PFX "FWH not detected\n"); + pr_notice(PFX "FWH not detected\n"); return -ENODEV; } @@ -306,7 +306,6 @@ static int __init intel_init_hw_struct(struct intel_rng_hw *intel_rng_hw, (BIOS_CNTL_LOCK_ENABLE_MASK|BIOS_CNTL_WRITE_ENABLE_MASK)) == BIOS_CNTL_LOCK_ENABLE_MASK) { static __initdata /*const*/ char warning[] = - KERN_WARNING PFX "Firmware space is locked read-only. If you can't or\n" PFX "don't want to disable this in firmware setup, and if\n" PFX "you are certain that your system has a functional\n" @@ -314,7 +313,7 @@ PFX "RNG, try using the 'no_fwh_detect' option.\n"; if (no_fwh_detect) return -ENODEV; - printk(warning); + pr_warn("%s", warning); return -EBUSY; } @@ -392,10 +391,10 @@ fwh_done: goto out; } - printk(KERN_INFO "Intel 82802 RNG detected\n"); + pr_info("Intel 82802 RNG detected\n"); err = hwrng_register(&intel_rng); if (err) { - printk(KERN_ERR PFX "RNG registering failed (%d)\n", + pr_err(PFX "RNG registering failed (%d)\n", err); iounmap(mem); } diff --git a/drivers/char/hw_random/pasemi-rng.c b/drivers/char/hw_random/pasemi-rng.c index c66279bb6ef3..c0347d1dded0 100644 --- a/drivers/char/hw_random/pasemi-rng.c +++ b/drivers/char/hw_random/pasemi-rng.c @@ -113,7 +113,7 @@ static int rng_probe(struct platform_device *ofdev) pasemi_rng.priv = (unsigned long)rng_regs; - printk(KERN_INFO "Registering PA Semi RNG\n"); + pr_info("Registering PA Semi RNG\n"); err = hwrng_register(&pasemi_rng); diff --git a/drivers/char/hw_random/pseries-rng.c b/drivers/char/hw_random/pseries-rng.c index ab7ffdec0ec3..6226aa08c36a 100644 --- a/drivers/char/hw_random/pseries-rng.c +++ b/drivers/char/hw_random/pseries-rng.c @@ -86,7 +86,7 @@ static struct vio_driver pseries_rng_driver = { static int __init rng_init(void) { - printk(KERN_INFO "Registering IBM pSeries RNG driver\n"); + pr_info("Registering IBM pSeries RNG driver\n"); return vio_register_driver(&pseries_rng_driver); } diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index de5a6dcfb3e2..a3bebef255ad 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -141,7 +141,7 @@ static int via_rng_init(struct hwrng *rng) * register */ if ((c->x86 == 6) && (c->x86_model >= 0x0f)) { if (!cpu_has_xstore_enabled) { - printk(KERN_ERR PFX "can't enable hardware RNG " + pr_err(PFX "can't enable hardware RNG " "if XSTORE is not enabled\n"); return -ENODEV; } @@ -180,7 +180,7 @@ static int via_rng_init(struct hwrng *rng) unneeded */ rdmsr(MSR_VIA_RNG, lo, hi); if ((lo & VIA_RNG_ENABLE) == 0) { - printk(KERN_ERR PFX "cannot enable VIA C3 RNG, aborting\n"); + pr_err(PFX "cannot enable VIA C3 RNG, aborting\n"); return -ENODEV; } @@ -202,10 +202,10 @@ static int __init mod_init(void) if (!cpu_has_xstore) return -ENODEV; - printk(KERN_INFO "VIA RNG detected\n"); + pr_info("VIA RNG detected\n"); err = hwrng_register(&via_rng); if (err) { - printk(KERN_ERR PFX "RNG registering failed (%d)\n", + pr_err(PFX "RNG registering failed (%d)\n", err); goto out; } diff --git a/drivers/char/hw_random/xgene-rng.c b/drivers/char/hw_random/xgene-rng.c new file mode 100644 index 000000000000..23caa05380a8 --- /dev/null +++ b/drivers/char/hw_random/xgene-rng.c @@ -0,0 +1,423 @@ +/* + * APM X-Gene SoC RNG Driver + * + * Copyright (c) 2014, Applied Micro Circuits Corporation + * Author: Rameshwar Prasad Sahu <rsahu@apm.com> + * Shamal Winchurkar <swinchurkar@apm.com> + * Feng Kan <fkan@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/hw_random.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of_platform.h> +#include <linux/of_irq.h> +#include <linux/of_address.h> +#include <linux/timer.h> + +#define RNG_MAX_DATUM 4 +#define MAX_TRY 100 +#define XGENE_RNG_RETRY_COUNT 20 +#define XGENE_RNG_RETRY_INTERVAL 10 + +/* RNG Registers */ +#define RNG_INOUT_0 0x00 +#define RNG_INTR_STS_ACK 0x10 +#define RNG_CONTROL 0x14 +#define RNG_CONFIG 0x18 +#define RNG_ALARMCNT 0x1c +#define RNG_FROENABLE 0x20 +#define RNG_FRODETUNE 0x24 +#define RNG_ALARMMASK 0x28 +#define RNG_ALARMSTOP 0x2c +#define RNG_OPTIONS 0x78 +#define RNG_EIP_REV 0x7c + +#define MONOBIT_FAIL_MASK BIT(7) +#define POKER_FAIL_MASK BIT(6) +#define LONG_RUN_FAIL_MASK BIT(5) +#define RUN_FAIL_MASK BIT(4) +#define NOISE_FAIL_MASK BIT(3) +#define STUCK_OUT_MASK BIT(2) +#define SHUTDOWN_OFLO_MASK BIT(1) +#define READY_MASK BIT(0) + +#define MAJOR_HW_REV_RD(src) (((src) & 0x0f000000) >> 24) +#define MINOR_HW_REV_RD(src) (((src) & 0x00f00000) >> 20) +#define HW_PATCH_LEVEL_RD(src) (((src) & 0x000f0000) >> 16) +#define MAX_REFILL_CYCLES_SET(dst, src) \ + ((dst & ~0xffff0000) | (((u32)src << 16) & 0xffff0000)) +#define MIN_REFILL_CYCLES_SET(dst, src) \ + ((dst & ~0x000000ff) | (((u32)src) & 0x000000ff)) +#define ALARM_THRESHOLD_SET(dst, src) \ + ((dst & ~0x000000ff) | (((u32)src) & 0x000000ff)) +#define ENABLE_RNG_SET(dst, src) \ + ((dst & ~BIT(10)) | (((u32)src << 10) & BIT(10))) +#define REGSPEC_TEST_MODE_SET(dst, src) \ + ((dst & ~BIT(8)) | (((u32)src << 8) & BIT(8))) +#define MONOBIT_FAIL_MASK_SET(dst, src) \ + ((dst & ~BIT(7)) | (((u32)src << 7) & BIT(7))) +#define POKER_FAIL_MASK_SET(dst, src) \ + ((dst & ~BIT(6)) | (((u32)src << 6) & BIT(6))) +#define LONG_RUN_FAIL_MASK_SET(dst, src) \ + ((dst & ~BIT(5)) | (((u32)src << 5) & BIT(5))) +#define RUN_FAIL_MASK_SET(dst, src) \ + ((dst & ~BIT(4)) | (((u32)src << 4) & BIT(4))) +#define NOISE_FAIL_MASK_SET(dst, src) \ + ((dst & ~BIT(3)) | (((u32)src << 3) & BIT(3))) +#define STUCK_OUT_MASK_SET(dst, src) \ + ((dst & ~BIT(2)) | (((u32)src << 2) & BIT(2))) +#define SHUTDOWN_OFLO_MASK_SET(dst, src) \ + ((dst & ~BIT(1)) | (((u32)src << 1) & BIT(1))) + +struct xgene_rng_dev { + u32 irq; + void __iomem *csr_base; + u32 revision; + u32 datum_size; + u32 failure_cnt; /* Failure count last minute */ + unsigned long failure_ts;/* First failure timestamp */ + struct timer_list failure_timer; + struct device *dev; + struct clk *clk; +}; + +static void xgene_rng_expired_timer(unsigned long arg) +{ + struct xgene_rng_dev *ctx = (struct xgene_rng_dev *) arg; + + /* Clear failure counter as timer expired */ + disable_irq(ctx->irq); + ctx->failure_cnt = 0; + del_timer(&ctx->failure_timer); + enable_irq(ctx->irq); +} + +static void xgene_rng_start_timer(struct xgene_rng_dev *ctx) +{ + ctx->failure_timer.data = (unsigned long) ctx; + ctx->failure_timer.function = xgene_rng_expired_timer; + ctx->failure_timer.expires = jiffies + 120 * HZ; + add_timer(&ctx->failure_timer); +} + +/* + * Initialize or reinit free running oscillators (FROs) + */ +static void xgene_rng_init_fro(struct xgene_rng_dev *ctx, u32 fro_val) +{ + writel(fro_val, ctx->csr_base + RNG_FRODETUNE); + writel(0x00000000, ctx->csr_base + RNG_ALARMMASK); + writel(0x00000000, ctx->csr_base + RNG_ALARMSTOP); + writel(0xFFFFFFFF, ctx->csr_base + RNG_FROENABLE); +} + +static void xgene_rng_chk_overflow(struct xgene_rng_dev *ctx) +{ + u32 val; + + val = readl(ctx->csr_base + RNG_INTR_STS_ACK); + if (val & MONOBIT_FAIL_MASK) + /* + * LFSR detected an out-of-bounds number of 1s after + * checking 20,000 bits (test T1 as specified in the + * AIS-31 standard) + */ + dev_err(ctx->dev, "test monobit failure error 0x%08X\n", val); + if (val & POKER_FAIL_MASK) + /* + * LFSR detected an out-of-bounds value in at least one + * of the 16 poker_count_X counters or an out of bounds sum + * of squares value after checking 20,000 bits (test T2 as + * specified in the AIS-31 standard) + */ + dev_err(ctx->dev, "test poker failure error 0x%08X\n", val); + if (val & LONG_RUN_FAIL_MASK) + /* + * LFSR detected a sequence of 34 identical bits + * (test T4 as specified in the AIS-31 standard) + */ + dev_err(ctx->dev, "test long run failure error 0x%08X\n", val); + if (val & RUN_FAIL_MASK) + /* + * LFSR detected an outof-bounds value for at least one + * of the running counters after checking 20,000 bits + * (test T3 as specified in the AIS-31 standard) + */ + dev_err(ctx->dev, "test run failure error 0x%08X\n", val); + if (val & NOISE_FAIL_MASK) + /* LFSR detected a sequence of 48 identical bits */ + dev_err(ctx->dev, "noise failure error 0x%08X\n", val); + if (val & STUCK_OUT_MASK) + /* + * Detected output data registers generated same value twice + * in a row + */ + dev_err(ctx->dev, "stuck out failure error 0x%08X\n", val); + + if (val & SHUTDOWN_OFLO_MASK) { + u32 frostopped; + + /* FROs shut down after a second error event. Try recover. */ + if (++ctx->failure_cnt == 1) { + /* 1st time, just recover */ + ctx->failure_ts = jiffies; + frostopped = readl(ctx->csr_base + RNG_ALARMSTOP); + xgene_rng_init_fro(ctx, frostopped); + + /* + * We must start a timer to clear out this error + * in case the system timer wrap around + */ + xgene_rng_start_timer(ctx); + } else { + /* 2nd time failure in lesser than 1 minute? */ + if (time_after(ctx->failure_ts + 60 * HZ, jiffies)) { + dev_err(ctx->dev, + "FRO shutdown failure error 0x%08X\n", + val); + } else { + /* 2nd time failure after 1 minutes, recover */ + ctx->failure_ts = jiffies; + ctx->failure_cnt = 1; + /* + * We must start a timer to clear out this + * error in case the system timer wrap + * around + */ + xgene_rng_start_timer(ctx); + } + frostopped = readl(ctx->csr_base + RNG_ALARMSTOP); + xgene_rng_init_fro(ctx, frostopped); + } + } + /* Clear them all */ + writel(val, ctx->csr_base + RNG_INTR_STS_ACK); +} + +static irqreturn_t xgene_rng_irq_handler(int irq, void *id) +{ + struct xgene_rng_dev *ctx = (struct xgene_rng_dev *) id; + + /* RNG Alarm Counter overflow */ + xgene_rng_chk_overflow(ctx); + + return IRQ_HANDLED; +} + +static int xgene_rng_data_present(struct hwrng *rng, int wait) +{ + struct xgene_rng_dev *ctx = (struct xgene_rng_dev *) rng->priv; + u32 i, val = 0; + + for (i = 0; i < XGENE_RNG_RETRY_COUNT; i++) { + val = readl(ctx->csr_base + RNG_INTR_STS_ACK); + if ((val & READY_MASK) || !wait) + break; + udelay(XGENE_RNG_RETRY_INTERVAL); + } + + return (val & READY_MASK); +} + +static int xgene_rng_data_read(struct hwrng *rng, u32 *data) +{ + struct xgene_rng_dev *ctx = (struct xgene_rng_dev *) rng->priv; + int i; + + for (i = 0; i < ctx->datum_size; i++) + data[i] = readl(ctx->csr_base + RNG_INOUT_0 + i * 4); + + /* Clear ready bit to start next transaction */ + writel(READY_MASK, ctx->csr_base + RNG_INTR_STS_ACK); + + return ctx->datum_size << 2; +} + +static void xgene_rng_init_internal(struct xgene_rng_dev *ctx) +{ + u32 val; + + writel(0x00000000, ctx->csr_base + RNG_CONTROL); + + val = MAX_REFILL_CYCLES_SET(0, 10); + val = MIN_REFILL_CYCLES_SET(val, 10); + writel(val, ctx->csr_base + RNG_CONFIG); + + val = ALARM_THRESHOLD_SET(0, 0xFF); + writel(val, ctx->csr_base + RNG_ALARMCNT); + + xgene_rng_init_fro(ctx, 0); + + writel(MONOBIT_FAIL_MASK | + POKER_FAIL_MASK | + LONG_RUN_FAIL_MASK | + RUN_FAIL_MASK | + NOISE_FAIL_MASK | + STUCK_OUT_MASK | + SHUTDOWN_OFLO_MASK | + READY_MASK, ctx->csr_base + RNG_INTR_STS_ACK); + + val = ENABLE_RNG_SET(0, 1); + val = MONOBIT_FAIL_MASK_SET(val, 1); + val = POKER_FAIL_MASK_SET(val, 1); + val = LONG_RUN_FAIL_MASK_SET(val, 1); + val = RUN_FAIL_MASK_SET(val, 1); + val = NOISE_FAIL_MASK_SET(val, 1); + val = STUCK_OUT_MASK_SET(val, 1); + val = SHUTDOWN_OFLO_MASK_SET(val, 1); + writel(val, ctx->csr_base + RNG_CONTROL); +} + +static int xgene_rng_init(struct hwrng *rng) +{ + struct xgene_rng_dev *ctx = (struct xgene_rng_dev *) rng->priv; + + ctx->failure_cnt = 0; + init_timer(&ctx->failure_timer); + + ctx->revision = readl(ctx->csr_base + RNG_EIP_REV); + + dev_dbg(ctx->dev, "Rev %d.%d.%d\n", + MAJOR_HW_REV_RD(ctx->revision), + MINOR_HW_REV_RD(ctx->revision), + HW_PATCH_LEVEL_RD(ctx->revision)); + + dev_dbg(ctx->dev, "Options 0x%08X", + readl(ctx->csr_base + RNG_OPTIONS)); + + xgene_rng_init_internal(ctx); + + ctx->datum_size = RNG_MAX_DATUM; + + return 0; +} + +static struct hwrng xgene_rng_func = { + .name = "xgene-rng", + .init = xgene_rng_init, + .data_present = xgene_rng_data_present, + .data_read = xgene_rng_data_read, +}; + +static int xgene_rng_probe(struct platform_device *pdev) +{ + struct resource *res; + struct xgene_rng_dev *ctx; + int rc = 0; + + ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->dev = &pdev->dev; + platform_set_drvdata(pdev, ctx); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ctx->csr_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(ctx->csr_base)) + return PTR_ERR(ctx->csr_base); + + ctx->irq = platform_get_irq(pdev, 0); + if (ctx->irq < 0) { + dev_err(&pdev->dev, "No IRQ resource\n"); + return ctx->irq; + } + + dev_dbg(&pdev->dev, "APM X-Gene RNG BASE %p ALARM IRQ %d", + ctx->csr_base, ctx->irq); + + rc = devm_request_irq(&pdev->dev, ctx->irq, xgene_rng_irq_handler, 0, + dev_name(&pdev->dev), ctx); + if (rc) { + dev_err(&pdev->dev, "Could not request RNG alarm IRQ\n"); + return rc; + } + + /* Enable IP clock */ + ctx->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(ctx->clk)) { + dev_warn(&pdev->dev, "Couldn't get the clock for RNG\n"); + } else { + rc = clk_prepare_enable(ctx->clk); + if (rc) { + dev_warn(&pdev->dev, + "clock prepare enable failed for RNG"); + return rc; + } + } + + xgene_rng_func.priv = (unsigned long) ctx; + + rc = hwrng_register(&xgene_rng_func); + if (rc) { + dev_err(&pdev->dev, "RNG registering failed error %d\n", rc); + if (!IS_ERR(ctx->clk)) + clk_disable_unprepare(ctx->clk); + return rc; + } + + rc = device_init_wakeup(&pdev->dev, 1); + if (rc) { + dev_err(&pdev->dev, "RNG device_init_wakeup failed error %d\n", + rc); + if (!IS_ERR(ctx->clk)) + clk_disable_unprepare(ctx->clk); + hwrng_unregister(&xgene_rng_func); + return rc; + } + + return 0; +} + +static int xgene_rng_remove(struct platform_device *pdev) +{ + struct xgene_rng_dev *ctx = platform_get_drvdata(pdev); + int rc; + + rc = device_init_wakeup(&pdev->dev, 0); + if (rc) + dev_err(&pdev->dev, "RNG init wakeup failed error %d\n", rc); + if (!IS_ERR(ctx->clk)) + clk_disable_unprepare(ctx->clk); + hwrng_unregister(&xgene_rng_func); + + return rc; +} + +static const struct of_device_id xgene_rng_of_match[] = { + { .compatible = "apm,xgene-rng" }, + { } +}; + +MODULE_DEVICE_TABLE(of, xgene_rng_of_match); + +static struct platform_driver xgene_rng_driver = { + .probe = xgene_rng_probe, + .remove = xgene_rng_remove, + .driver = { + .name = "xgene-rng", + .of_match_table = xgene_rng_of_match, + }, +}; + +module_platform_driver(xgene_rng_driver); +MODULE_DESCRIPTION("APM X-Gene RNG driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index b464d03ebf40..f347ab7eea95 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -836,8 +836,9 @@ static int ahash_update_ctx(struct ahash_request *req) edesc->sec4_sg + sec4_sg_src_index, chained); if (*next_buflen) { - sg_copy_part(next_buf, req->src, to_hash - - *buflen, req->nbytes); + scatterwalk_map_and_copy(next_buf, req->src, + to_hash - *buflen, + *next_buflen, 0); state->current_buf = !state->current_buf; } } else { @@ -878,7 +879,8 @@ static int ahash_update_ctx(struct ahash_request *req) kfree(edesc); } } else if (*next_buflen) { - sg_copy(buf + *buflen, req->src, req->nbytes); + scatterwalk_map_and_copy(buf + *buflen, req->src, 0, + req->nbytes, 0); *buflen = *next_buflen; *next_buflen = last_buflen; } @@ -1262,8 +1264,9 @@ static int ahash_update_no_ctx(struct ahash_request *req) src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + 1, chained); if (*next_buflen) { - sg_copy_part(next_buf, req->src, to_hash - *buflen, - req->nbytes); + scatterwalk_map_and_copy(next_buf, req->src, + to_hash - *buflen, + *next_buflen, 0); state->current_buf = !state->current_buf; } @@ -1304,7 +1307,8 @@ static int ahash_update_no_ctx(struct ahash_request *req) kfree(edesc); } } else if (*next_buflen) { - sg_copy(buf + *buflen, req->src, req->nbytes); + scatterwalk_map_and_copy(buf + *buflen, req->src, 0, + req->nbytes, 0); *buflen = *next_buflen; *next_buflen = 0; } @@ -1413,9 +1417,9 @@ static int ahash_update_first(struct ahash_request *req) struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; - u8 *next_buf = state->buf_0 + state->current_buf * - CAAM_MAX_HASH_BLOCK_SIZE; - int *next_buflen = &state->buflen_0 + state->current_buf; + u8 *next_buf = state->current_buf ? state->buf_1 : state->buf_0; + int *next_buflen = state->current_buf ? + &state->buflen_1 : &state->buflen_0; int to_hash; u32 *sh_desc = ctx->sh_desc_update_first, *desc; dma_addr_t ptr = ctx->sh_desc_update_first_dma; @@ -1476,7 +1480,8 @@ static int ahash_update_first(struct ahash_request *req) } if (*next_buflen) - sg_copy_part(next_buf, req->src, to_hash, req->nbytes); + scatterwalk_map_and_copy(next_buf, req->src, to_hash, + *next_buflen, 0); sh_len = desc_len(sh_desc); desc = edesc->hw_desc; @@ -1511,7 +1516,8 @@ static int ahash_update_first(struct ahash_request *req) state->update = ahash_update_no_ctx; state->finup = ahash_finup_no_ctx; state->final = ahash_final_no_ctx; - sg_copy(next_buf, req->src, req->nbytes); + scatterwalk_map_and_copy(next_buf, req->src, 0, + req->nbytes, 0); } #ifdef DEBUG print_hex_dump(KERN_ERR, "next buf@"__stringify(__LINE__)": ", diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 3cade79ea41e..31000c8c4a90 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -1,5 +1,4 @@ -/* - * CAAM control-plane driver backend +/* * CAAM control-plane driver backend * Controller-level driver, kernel property detection, initialization * * Copyright 2008-2012 Freescale Semiconductor, Inc. @@ -81,38 +80,37 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, u32 *status) { struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev); - struct caam_full __iomem *topregs; + struct caam_ctrl __iomem *ctrl = ctrlpriv->ctrl; + struct caam_deco __iomem *deco = ctrlpriv->deco; unsigned int timeout = 100000; u32 deco_dbg_reg, flags; int i; - /* Set the bit to request direct access to DECO0 */ - topregs = (struct caam_full __iomem *)ctrlpriv->ctrl; if (ctrlpriv->virt_en == 1) { - setbits32(&topregs->ctrl.deco_rsr, DECORSR_JR0); + setbits32(&ctrl->deco_rsr, DECORSR_JR0); - while (!(rd_reg32(&topregs->ctrl.deco_rsr) & DECORSR_VALID) && + while (!(rd_reg32(&ctrl->deco_rsr) & DECORSR_VALID) && --timeout) cpu_relax(); timeout = 100000; } - setbits32(&topregs->ctrl.deco_rq, DECORR_RQD0ENABLE); + setbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); - while (!(rd_reg32(&topregs->ctrl.deco_rq) & DECORR_DEN0) && + while (!(rd_reg32(&ctrl->deco_rq) & DECORR_DEN0) && --timeout) cpu_relax(); if (!timeout) { dev_err(ctrldev, "failed to acquire DECO 0\n"); - clrbits32(&topregs->ctrl.deco_rq, DECORR_RQD0ENABLE); + clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); return -ENODEV; } for (i = 0; i < desc_len(desc); i++) - wr_reg32(&topregs->deco.descbuf[i], *(desc + i)); + wr_reg32(&deco->descbuf[i], *(desc + i)); flags = DECO_JQCR_WHL; /* @@ -123,11 +121,11 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, flags |= DECO_JQCR_FOUR; /* Instruct the DECO to execute it */ - wr_reg32(&topregs->deco.jr_ctl_hi, flags); + wr_reg32(&deco->jr_ctl_hi, flags); timeout = 10000000; do { - deco_dbg_reg = rd_reg32(&topregs->deco.desc_dbg); + deco_dbg_reg = rd_reg32(&deco->desc_dbg); /* * If an error occured in the descriptor, then * the DECO status field will be set to 0x0D @@ -138,14 +136,14 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, cpu_relax(); } while ((deco_dbg_reg & DESC_DBG_DECO_STAT_VALID) && --timeout); - *status = rd_reg32(&topregs->deco.op_status_hi) & + *status = rd_reg32(&deco->op_status_hi) & DECO_OP_STATUS_HI_ERR_MASK; if (ctrlpriv->virt_en == 1) - clrbits32(&topregs->ctrl.deco_rsr, DECORSR_JR0); + clrbits32(&ctrl->deco_rsr, DECORSR_JR0); /* Mark the DECO as free */ - clrbits32(&topregs->ctrl.deco_rq, DECORR_RQD0ENABLE); + clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); if (!timeout) return -EAGAIN; @@ -176,13 +174,13 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, int gen_sk) { struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev); - struct caam_full __iomem *topregs; + struct caam_ctrl __iomem *ctrl; struct rng4tst __iomem *r4tst; u32 *desc, status, rdsta_val; int ret = 0, sh_idx; - topregs = (struct caam_full __iomem *)ctrlpriv->ctrl; - r4tst = &topregs->ctrl.r4tst[0]; + ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; + r4tst = &ctrl->r4tst[0]; desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL); if (!desc) @@ -212,12 +210,11 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, * CAAM eras), then try again. */ rdsta_val = - rd_reg32(&topregs->ctrl.r4tst[0].rdsta) & RDSTA_IFMASK; + rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; if (status || !(rdsta_val & (1 << sh_idx))) ret = -EAGAIN; if (ret) break; - dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx); /* Clear the contents before recreating the descriptor */ memset(desc, 0x00, CAAM_CMD_SZ * 7); @@ -285,12 +282,12 @@ static int caam_remove(struct platform_device *pdev) { struct device *ctrldev; struct caam_drv_private *ctrlpriv; - struct caam_full __iomem *topregs; + struct caam_ctrl __iomem *ctrl; int ring, ret = 0; ctrldev = &pdev->dev; ctrlpriv = dev_get_drvdata(ctrldev); - topregs = (struct caam_full __iomem *)ctrlpriv->ctrl; + ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; /* Remove platform devices for JobRs */ for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) { @@ -308,7 +305,7 @@ static int caam_remove(struct platform_device *pdev) #endif /* Unmap controller region */ - iounmap(&topregs->ctrl); + iounmap(&ctrl); return ret; } @@ -323,12 +320,12 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) { struct device *ctrldev = &pdev->dev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev); - struct caam_full __iomem *topregs; + struct caam_ctrl __iomem *ctrl; struct rng4tst __iomem *r4tst; u32 val; - topregs = (struct caam_full __iomem *)ctrlpriv->ctrl; - r4tst = &topregs->ctrl.r4tst[0]; + ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; + r4tst = &ctrl->r4tst[0]; /* put RNG4 into program mode */ setbits32(&r4tst->rtmctl, RTMCTL_PRGM); @@ -355,10 +352,19 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) wr_reg32(&r4tst->rtsdctl, val); /* min. freq. count, equal to 1/4 of the entropy sample length */ wr_reg32(&r4tst->rtfrqmin, ent_delay >> 2); - /* max. freq. count, equal to 8 times the entropy sample length */ - wr_reg32(&r4tst->rtfrqmax, ent_delay << 3); + /* disable maximum frequency count */ + wr_reg32(&r4tst->rtfrqmax, RTFRQMAX_DISABLE); + /* read the control register */ + val = rd_reg32(&r4tst->rtmctl); + /* + * select raw sampling in both entropy shifter + * and statistical checker + */ + setbits32(&val, RTMCTL_SAMP_MODE_RAW_ES_SC); /* put RNG4 into run mode */ - clrbits32(&r4tst->rtmctl, RTMCTL_PRGM); + clrbits32(&val, RTMCTL_PRGM); + /* write back the control register */ + wr_reg32(&r4tst->rtmctl, val); } /** @@ -387,13 +393,14 @@ static int caam_probe(struct platform_device *pdev) struct device *dev; struct device_node *nprop, *np; struct caam_ctrl __iomem *ctrl; - struct caam_full __iomem *topregs; struct caam_drv_private *ctrlpriv; #ifdef CONFIG_DEBUG_FS struct caam_perfmon *perfmon; #endif u32 scfgr, comp_params; u32 cha_vid_ls; + int pg_size; + int BLOCK_OFFSET = 0; ctrlpriv = devm_kzalloc(&pdev->dev, sizeof(struct caam_drv_private), GFP_KERNEL); @@ -412,10 +419,27 @@ static int caam_probe(struct platform_device *pdev) dev_err(dev, "caam: of_iomap() failed\n"); return -ENOMEM; } - ctrlpriv->ctrl = (struct caam_ctrl __force *)ctrl; + /* Finding the page size for using the CTPR_MS register */ + comp_params = rd_reg32(&ctrl->perfmon.comp_parms_ms); + pg_size = (comp_params & CTPR_MS_PG_SZ_MASK) >> CTPR_MS_PG_SZ_SHIFT; - /* topregs used to derive pointers to CAAM sub-blocks only */ - topregs = (struct caam_full __iomem *)ctrl; + /* Allocating the BLOCK_OFFSET based on the supported page size on + * the platform + */ + if (pg_size == 0) + BLOCK_OFFSET = PG_SIZE_4K; + else + BLOCK_OFFSET = PG_SIZE_64K; + + ctrlpriv->ctrl = (struct caam_ctrl __force *)ctrl; + ctrlpriv->assure = (struct caam_assurance __force *) + ((uint8_t *)ctrl + + BLOCK_OFFSET * ASSURE_BLOCK_NUMBER + ); + ctrlpriv->deco = (struct caam_deco __force *) + ((uint8_t *)ctrl + + BLOCK_OFFSET * DECO_BLOCK_NUMBER + ); /* Get the IRQ of the controller (for security violations only) */ ctrlpriv->secvio_irq = irq_of_parse_and_map(nprop, 0); @@ -424,15 +448,14 @@ static int caam_probe(struct platform_device *pdev) * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel, * long pointers in master configuration register */ - setbits32(&topregs->ctrl.mcr, MCFGR_WDENABLE | + setbits32(&ctrl->mcr, MCFGR_WDENABLE | (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0)); /* * Read the Compile Time paramters and SCFGR to determine * if Virtualization is enabled for this platform */ - comp_params = rd_reg32(&topregs->ctrl.perfmon.comp_parms_ms); - scfgr = rd_reg32(&topregs->ctrl.scfgr); + scfgr = rd_reg32(&ctrl->scfgr); ctrlpriv->virt_en = 0; if (comp_params & CTPR_MS_VIRT_EN_INCL) { @@ -450,7 +473,7 @@ static int caam_probe(struct platform_device *pdev) } if (ctrlpriv->virt_en == 1) - setbits32(&topregs->ctrl.jrstart, JRSTART_JR0_START | + setbits32(&ctrl->jrstart, JRSTART_JR0_START | JRSTART_JR1_START | JRSTART_JR2_START | JRSTART_JR3_START); @@ -477,7 +500,7 @@ static int caam_probe(struct platform_device *pdev) sizeof(struct platform_device *) * rspec, GFP_KERNEL); if (ctrlpriv->jrpdev == NULL) { - iounmap(&topregs->ctrl); + iounmap(&ctrl); return -ENOMEM; } @@ -493,18 +516,26 @@ static int caam_probe(struct platform_device *pdev) ring); continue; } + ctrlpriv->jr[ring] = (struct caam_job_ring __force *) + ((uint8_t *)ctrl + + (ring + JR_BLOCK_NUMBER) * + BLOCK_OFFSET + ); ctrlpriv->total_jobrs++; ring++; - } + } /* Check to see if QI present. If so, enable */ ctrlpriv->qi_present = - !!(rd_reg32(&topregs->ctrl.perfmon.comp_parms_ms) & + !!(rd_reg32(&ctrl->perfmon.comp_parms_ms) & CTPR_MS_QI_MASK); if (ctrlpriv->qi_present) { - ctrlpriv->qi = (struct caam_queue_if __force *)&topregs->qi; + ctrlpriv->qi = (struct caam_queue_if __force *) + ((uint8_t *)ctrl + + BLOCK_OFFSET * QI_BLOCK_NUMBER + ); /* This is all that's required to physically enable QI */ - wr_reg32(&topregs->qi.qi_control_lo, QICTL_DQEN); + wr_reg32(&ctrlpriv->qi->qi_control_lo, QICTL_DQEN); } /* If no QI and no rings specified, quit and go home */ @@ -514,7 +545,7 @@ static int caam_probe(struct platform_device *pdev) return -ENOMEM; } - cha_vid_ls = rd_reg32(&topregs->ctrl.perfmon.cha_id_ls); + cha_vid_ls = rd_reg32(&ctrl->perfmon.cha_id_ls); /* * If SEC has RNG version >= 4 and RNG state handle has not been @@ -522,7 +553,7 @@ static int caam_probe(struct platform_device *pdev) */ if ((cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) { ctrlpriv->rng4_sh_init = - rd_reg32(&topregs->ctrl.r4tst[0].rdsta); + rd_reg32(&ctrl->r4tst[0].rdsta); /* * If the secure keys (TDKEK, JDKEK, TDSK), were already * generated, signal this to the function that is instantiating @@ -533,7 +564,7 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->rng4_sh_init &= RDSTA_IFMASK; do { int inst_handles = - rd_reg32(&topregs->ctrl.r4tst[0].rdsta) & + rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; /* * If either SH were instantiated by somebody else @@ -544,6 +575,9 @@ static int caam_probe(struct platform_device *pdev) * the TRNG parameters. */ if (!(ctrlpriv->rng4_sh_init || inst_handles)) { + dev_info(dev, + "Entropy delay = %u\n", + ent_delay); kick_trng(pdev, ent_delay); ent_delay += 400; } @@ -556,6 +590,12 @@ static int caam_probe(struct platform_device *pdev) */ ret = instantiate_rng(dev, inst_handles, gen_sk); + if (ret == -EAGAIN) + /* + * if here, the loop will rerun, + * so don't hog the CPU + */ + cpu_relax(); } while ((ret == -EAGAIN) && (ent_delay < RTSDCTL_ENT_DLY_MAX)); if (ret) { dev_err(dev, "failed to instantiate RNG"); @@ -569,13 +609,13 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK; /* Enable RDB bit so that RNG works faster */ - setbits32(&topregs->ctrl.scfgr, SCFGR_RDBENABLE); + setbits32(&ctrl->scfgr, SCFGR_RDBENABLE); } /* NOTE: RTIC detection ought to go here, around Si time */ - caam_id = (u64)rd_reg32(&topregs->ctrl.perfmon.caam_id_ms) << 32 | - (u64)rd_reg32(&topregs->ctrl.perfmon.caam_id_ls); + caam_id = (u64)rd_reg32(&ctrl->perfmon.caam_id_ms) << 32 | + (u64)rd_reg32(&ctrl->perfmon.caam_id_ls); /* Report "alive" for developer to see */ dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id, diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index 97363db4e56e..89b94cc9e7a2 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -70,10 +70,11 @@ struct caam_drv_private { struct platform_device *pdev; /* Physical-presence section */ - struct caam_ctrl *ctrl; /* controller region */ - struct caam_deco **deco; /* DECO/CCB views */ - struct caam_assurance *ac; - struct caam_queue_if *qi; /* QI control region */ + struct caam_ctrl __iomem *ctrl; /* controller region */ + struct caam_deco __iomem *deco; /* DECO/CCB views */ + struct caam_assurance __iomem *assure; + struct caam_queue_if __iomem *qi; /* QI control region */ + struct caam_job_ring __iomem *jr[4]; /* JobR's register space */ /* * Detected geometry block. Filled in from device tree if powerpc, diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index f48e344ffc39..378ddc17f60e 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -194,6 +194,8 @@ struct caam_perfmon { #define CTPR_MS_QI_MASK (0x1ull << CTPR_MS_QI_SHIFT) #define CTPR_MS_VIRT_EN_INCL 0x00000001 #define CTPR_MS_VIRT_EN_POR 0x00000002 +#define CTPR_MS_PG_SZ_MASK 0x10 +#define CTPR_MS_PG_SZ_SHIFT 4 u32 comp_parms_ms; /* CTPR - Compile Parameters Register */ u32 comp_parms_ls; /* CTPR - Compile Parameters Register */ u64 rsvd1[2]; @@ -269,6 +271,16 @@ struct rngtst { /* RNG4 TRNG test registers */ struct rng4tst { #define RTMCTL_PRGM 0x00010000 /* 1 -> program mode, 0 -> run mode */ +#define RTMCTL_SAMP_MODE_VON_NEUMANN_ES_SC 0 /* use von Neumann data in + both entropy shifter and + statistical checker */ +#define RTMCTL_SAMP_MODE_RAW_ES_SC 1 /* use raw data in both + entropy shifter and + statistical checker */ +#define RTMCTL_SAMP_MODE_VON_NEUMANN_ES_RAW_SC 2 /* use von Neumann data in + entropy shifter, raw data + in statistical checker */ +#define RTMCTL_SAMP_MODE_INVALID 3 /* invalid combination */ u32 rtmctl; /* misc. control register */ u32 rtscmisc; /* statistical check misc. register */ u32 rtpkrrng; /* poker range register */ @@ -278,7 +290,7 @@ struct rng4tst { }; #define RTSDCTL_ENT_DLY_SHIFT 16 #define RTSDCTL_ENT_DLY_MASK (0xffff << RTSDCTL_ENT_DLY_SHIFT) -#define RTSDCTL_ENT_DLY_MIN 1200 +#define RTSDCTL_ENT_DLY_MIN 3200 #define RTSDCTL_ENT_DLY_MAX 12800 u32 rtsdctl; /* seed control register */ union { @@ -286,6 +298,7 @@ struct rng4tst { u32 rttotsam; /* PRGM=0: total samples register */ }; u32 rtfrqmin; /* frequency count min. limit register */ +#define RTFRQMAX_DISABLE (1 << 20) union { u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */ u32 rtfrqcnt; /* PRGM=0: freq. count register */ @@ -758,34 +771,10 @@ struct caam_deco { #define DECO_JQCR_WHL 0x20000000 #define DECO_JQCR_FOUR 0x10000000 -/* - * Current top-level view of memory map is: - * - * 0x0000 - 0x0fff - CAAM Top-Level Control - * 0x1000 - 0x1fff - Job Ring 0 - * 0x2000 - 0x2fff - Job Ring 1 - * 0x3000 - 0x3fff - Job Ring 2 - * 0x4000 - 0x4fff - Job Ring 3 - * 0x5000 - 0x5fff - (unused) - * 0x6000 - 0x6fff - Assurance Controller - * 0x7000 - 0x7fff - Queue Interface - * 0x8000 - 0x8fff - DECO-CCB 0 - * 0x9000 - 0x9fff - DECO-CCB 1 - * 0xa000 - 0xafff - DECO-CCB 2 - * 0xb000 - 0xbfff - DECO-CCB 3 - * 0xc000 - 0xcfff - DECO-CCB 4 - * - * caam_full describes the full register view of CAAM if useful, - * although many configurations may choose to implement parts of - * the register map separately, in differing privilege regions - */ -struct caam_full { - struct caam_ctrl __iomem ctrl; - struct caam_job_ring jr[4]; - u64 rsvd[512]; - struct caam_assurance assure; - struct caam_queue_if qi; - struct caam_deco deco; -}; - +#define JR_BLOCK_NUMBER 1 +#define ASSURE_BLOCK_NUMBER 6 +#define QI_BLOCK_NUMBER 7 +#define DECO_BLOCK_NUMBER 8 +#define PG_SIZE_4K 0x1000 +#define PG_SIZE_64K 0x10000 #endif /* REGS_H */ diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index b12ff85f4241..ce28a563effc 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -116,57 +116,3 @@ static int dma_unmap_sg_chained(struct device *dev, struct scatterlist *sg, } return nents; } - -/* Map SG page in kernel virtual address space and copy */ -static inline void sg_map_copy(u8 *dest, struct scatterlist *sg, - int len, int offset) -{ - u8 *mapped_addr; - - /* - * Page here can be user-space pinned using get_user_pages - * Same must be kmapped before use and kunmapped subsequently - */ - mapped_addr = kmap_atomic(sg_page(sg)); - memcpy(dest, mapped_addr + offset, len); - kunmap_atomic(mapped_addr); -} - -/* Copy from len bytes of sg to dest, starting from beginning */ -static inline void sg_copy(u8 *dest, struct scatterlist *sg, unsigned int len) -{ - struct scatterlist *current_sg = sg; - int cpy_index = 0, next_cpy_index = current_sg->length; - - while (next_cpy_index < len) { - sg_map_copy(dest + cpy_index, current_sg, current_sg->length, - current_sg->offset); - current_sg = scatterwalk_sg_next(current_sg); - cpy_index = next_cpy_index; - next_cpy_index += current_sg->length; - } - if (cpy_index < len) - sg_map_copy(dest + cpy_index, current_sg, len-cpy_index, - current_sg->offset); -} - -/* Copy sg data, from to_skip to end, to dest */ -static inline void sg_copy_part(u8 *dest, struct scatterlist *sg, - int to_skip, unsigned int end) -{ - struct scatterlist *current_sg = sg; - int sg_index, cpy_index, offset; - - sg_index = current_sg->length; - while (sg_index <= to_skip) { - current_sg = scatterwalk_sg_next(current_sg); - sg_index += current_sg->length; - } - cpy_index = sg_index - to_skip; - offset = current_sg->offset + current_sg->length - cpy_index; - sg_map_copy(dest, current_sg, cpy_index, offset); - if (end - sg_index) { - current_sg = scatterwalk_sg_next(current_sg); - sg_copy(dest + cpy_index, current_sg, end - sg_index); - } -} diff --git a/drivers/crypto/mv_cesa.h b/drivers/crypto/mv_cesa.h index 08fcb1116d90..9249d3ed184b 100644 --- a/drivers/crypto/mv_cesa.h +++ b/drivers/crypto/mv_cesa.h @@ -1,4 +1,5 @@ #ifndef __MV_CRYPTO_H__ +#define __MV_CRYPTO_H__ #define DIGEST_INITIAL_VAL_A 0xdd00 #define DIGEST_INITIAL_VAL_B 0xdd04 diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c index 6a92284a86b2..244d73378f0e 100644 --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -111,7 +111,7 @@ static int adf_chr_drv_create(void) drv_device = device_create(adt_ctl_drv.drv_class, NULL, MKDEV(adt_ctl_drv.major, 0), NULL, DEVICE_NAME); - if (!drv_device) { + if (IS_ERR(drv_device)) { pr_err("QAT: failed to create device\n"); goto err_cdev_del; } diff --git a/drivers/crypto/qat/qat_common/adf_transport_internal.h b/drivers/crypto/qat/qat_common/adf_transport_internal.h index f854bac276b0..c40546079981 100644 --- a/drivers/crypto/qat/qat_common/adf_transport_internal.h +++ b/drivers/crypto/qat/qat_common/adf_transport_internal.h @@ -75,7 +75,7 @@ struct adf_etr_ring_data { struct adf_etr_bank_data { struct adf_etr_ring_data rings[ADF_ETR_MAX_RINGS_PER_BANK]; - struct tasklet_struct resp_hanlder; + struct tasklet_struct resp_handler; void __iomem *csr_addr; struct adf_accel_dev *accel_dev; uint32_t irq_coalesc_timer; diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 59df48872955..3e26fa2b293f 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -105,7 +105,7 @@ struct qat_alg_cd { #define MAX_AUTH_STATE_SIZE sizeof(struct icp_qat_hw_auth_algo_blk) struct qat_auth_state { - uint8_t data[MAX_AUTH_STATE_SIZE]; + uint8_t data[MAX_AUTH_STATE_SIZE + 64]; } __aligned(64); struct qat_alg_session_ctx { @@ -113,10 +113,6 @@ struct qat_alg_session_ctx { dma_addr_t enc_cd_paddr; struct qat_alg_cd *dec_cd; dma_addr_t dec_cd_paddr; - struct qat_auth_state *auth_hw_state_enc; - dma_addr_t auth_state_enc_paddr; - struct qat_auth_state *auth_hw_state_dec; - dma_addr_t auth_state_dec_paddr; struct icp_qat_fw_la_bulk_req enc_fw_req_tmpl; struct icp_qat_fw_la_bulk_req dec_fw_req_tmpl; struct qat_crypto_instance *inst; @@ -150,8 +146,9 @@ static int qat_get_inter_state_size(enum icp_qat_hw_auth_algo qat_hash_alg) static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, struct qat_alg_session_ctx *ctx, const uint8_t *auth_key, - unsigned int auth_keylen, uint8_t *auth_state) + unsigned int auth_keylen) { + struct qat_auth_state auth_state; struct { struct shash_desc shash; char ctx[crypto_shash_descsize(ctx->hash_tfm)]; @@ -161,12 +158,13 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, struct sha512_state sha512; int block_size = crypto_shash_blocksize(ctx->hash_tfm); int digest_size = crypto_shash_digestsize(ctx->hash_tfm); - uint8_t *ipad = auth_state; + uint8_t *ipad = auth_state.data; uint8_t *opad = ipad + block_size; __be32 *hash_state_out; __be64 *hash512_state_out; int i, offset; + memset(auth_state.data, '\0', MAX_AUTH_STATE_SIZE + 64); desc.shash.tfm = ctx->hash_tfm; desc.shash.flags = 0x0; @@ -298,10 +296,6 @@ static int qat_alg_init_enc_session(struct qat_alg_session_ctx *ctx, void *ptr = &req_tmpl->cd_ctrl; struct icp_qat_fw_cipher_cd_ctrl_hdr *cipher_cd_ctrl = ptr; struct icp_qat_fw_auth_cd_ctrl_hdr *hash_cd_ctrl = ptr; - struct icp_qat_fw_la_auth_req_params *auth_param = - (struct icp_qat_fw_la_auth_req_params *) - ((char *)&req_tmpl->serv_specif_rqpars + - sizeof(struct icp_qat_fw_la_cipher_req_params)); /* CD setup */ cipher->aes.cipher_config.val = QAT_AES_HW_CONFIG_ENC(alg); @@ -312,8 +306,7 @@ static int qat_alg_init_enc_session(struct qat_alg_session_ctx *ctx, hash->sha.inner_setup.auth_counter.counter = cpu_to_be32(crypto_shash_blocksize(ctx->hash_tfm)); - if (qat_alg_do_precomputes(hash, ctx, keys->authkey, keys->authkeylen, - (uint8_t *)ctx->auth_hw_state_enc)) + if (qat_alg_do_precomputes(hash, ctx, keys->authkey, keys->authkeylen)) return -EFAULT; /* Request setup */ @@ -359,9 +352,6 @@ static int qat_alg_init_enc_session(struct qat_alg_session_ctx *ctx, hash_cd_ctrl->inner_state2_offset = hash_cd_ctrl->hash_cfg_offset + ((sizeof(struct icp_qat_hw_auth_setup) + round_up(hash_cd_ctrl->inner_state1_sz, 8)) >> 3); - auth_param->u1.auth_partial_st_prefix = ctx->auth_state_enc_paddr + - sizeof(struct icp_qat_hw_auth_counter) + - round_up(hash_cd_ctrl->inner_state1_sz, 8); ICP_QAT_FW_COMN_CURR_ID_SET(hash_cd_ctrl, ICP_QAT_FW_SLICE_AUTH); ICP_QAT_FW_COMN_NEXT_ID_SET(hash_cd_ctrl, ICP_QAT_FW_SLICE_DRAM_WR); return 0; @@ -399,8 +389,7 @@ static int qat_alg_init_dec_session(struct qat_alg_session_ctx *ctx, hash->sha.inner_setup.auth_counter.counter = cpu_to_be32(crypto_shash_blocksize(ctx->hash_tfm)); - if (qat_alg_do_precomputes(hash, ctx, keys->authkey, keys->authkeylen, - (uint8_t *)ctx->auth_hw_state_dec)) + if (qat_alg_do_precomputes(hash, ctx, keys->authkey, keys->authkeylen)) return -EFAULT; /* Request setup */ @@ -450,9 +439,6 @@ static int qat_alg_init_dec_session(struct qat_alg_session_ctx *ctx, hash_cd_ctrl->inner_state2_offset = hash_cd_ctrl->hash_cfg_offset + ((sizeof(struct icp_qat_hw_auth_setup) + round_up(hash_cd_ctrl->inner_state1_sz, 8)) >> 3); - auth_param->u1.auth_partial_st_prefix = ctx->auth_state_enc_paddr + - sizeof(struct icp_qat_hw_auth_counter) + - round_up(hash_cd_ctrl->inner_state1_sz, 8); auth_param->auth_res_sz = digestsize; ICP_QAT_FW_COMN_CURR_ID_SET(hash_cd_ctrl, ICP_QAT_FW_SLICE_AUTH); ICP_QAT_FW_COMN_NEXT_ID_SET(hash_cd_ctrl, ICP_QAT_FW_SLICE_CIPHER); @@ -512,10 +498,6 @@ static int qat_alg_setkey(struct crypto_aead *tfm, const uint8_t *key, dev = &GET_DEV(ctx->inst->accel_dev); memset(ctx->enc_cd, 0, sizeof(struct qat_alg_cd)); memset(ctx->dec_cd, 0, sizeof(struct qat_alg_cd)); - memset(ctx->auth_hw_state_enc, 0, - sizeof(struct qat_auth_state)); - memset(ctx->auth_hw_state_dec, 0, - sizeof(struct qat_auth_state)); memset(&ctx->enc_fw_req_tmpl, 0, sizeof(struct icp_qat_fw_la_bulk_req)); memset(&ctx->dec_fw_req_tmpl, 0, @@ -548,22 +530,6 @@ static int qat_alg_setkey(struct crypto_aead *tfm, const uint8_t *key, spin_unlock(&ctx->lock); goto out_free_enc; } - ctx->auth_hw_state_enc = - dma_zalloc_coherent(dev, sizeof(struct qat_auth_state), - &ctx->auth_state_enc_paddr, - GFP_ATOMIC); - if (!ctx->auth_hw_state_enc) { - spin_unlock(&ctx->lock); - goto out_free_dec; - } - ctx->auth_hw_state_dec = - dma_zalloc_coherent(dev, sizeof(struct qat_auth_state), - &ctx->auth_state_dec_paddr, - GFP_ATOMIC); - if (!ctx->auth_hw_state_dec) { - spin_unlock(&ctx->lock); - goto out_free_auth_enc; - } } spin_unlock(&ctx->lock); if (qat_alg_init_sessions(ctx, key, keylen)) @@ -572,14 +538,6 @@ static int qat_alg_setkey(struct crypto_aead *tfm, const uint8_t *key, return 0; out_free_all: - dma_free_coherent(dev, sizeof(struct qat_auth_state), - ctx->auth_hw_state_dec, ctx->auth_state_dec_paddr); - ctx->auth_hw_state_dec = NULL; -out_free_auth_enc: - dma_free_coherent(dev, sizeof(struct qat_auth_state), - ctx->auth_hw_state_enc, ctx->auth_state_enc_paddr); - ctx->auth_hw_state_enc = NULL; -out_free_dec: dma_free_coherent(dev, sizeof(struct qat_alg_cd), ctx->dec_cd, ctx->dec_cd_paddr); ctx->dec_cd = NULL; @@ -924,16 +882,6 @@ static void qat_alg_exit(struct crypto_tfm *tfm) if (ctx->dec_cd) dma_free_coherent(dev, sizeof(struct qat_alg_cd), ctx->dec_cd, ctx->dec_cd_paddr); - if (ctx->auth_hw_state_enc) - dma_free_coherent(dev, sizeof(struct qat_auth_state), - ctx->auth_hw_state_enc, - ctx->auth_state_enc_paddr); - - if (ctx->auth_hw_state_dec) - dma_free_coherent(dev, sizeof(struct qat_auth_state), - ctx->auth_hw_state_dec, - ctx->auth_state_dec_paddr); - qat_crypto_put_instance(inst); } diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_isr.c b/drivers/crypto/qat/qat_dh895xcc/adf_isr.c index d4172dedf775..67ec61e51185 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_isr.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_isr.c @@ -70,9 +70,9 @@ static int adf_enable_msix(struct adf_accel_dev *accel_dev) for (i = 0; i < msix_num_entries; i++) pci_dev_info->msix_entries.entries[i].entry = i; - if (pci_enable_msix(pci_dev_info->pci_dev, - pci_dev_info->msix_entries.entries, - msix_num_entries)) { + if (pci_enable_msix_exact(pci_dev_info->pci_dev, + pci_dev_info->msix_entries.entries, + msix_num_entries)) { pr_err("QAT: Failed to enable MSIX IRQ\n"); return -EFAULT; } @@ -89,7 +89,7 @@ static irqreturn_t adf_msix_isr_bundle(int irq, void *bank_ptr) struct adf_etr_bank_data *bank = bank_ptr; WRITE_CSR_INT_FLAG_AND_COL(bank->csr_addr, bank->bank_number, 0); - tasklet_hi_schedule(&bank->resp_hanlder); + tasklet_hi_schedule(&bank->resp_handler); return IRQ_HANDLED; } @@ -217,7 +217,7 @@ static int adf_setup_bh(struct adf_accel_dev *accel_dev) int i; for (i = 0; i < hw_data->num_banks; i++) - tasklet_init(&priv_data->banks[i].resp_hanlder, + tasklet_init(&priv_data->banks[i].resp_handler, adf_response_handler, (unsigned long)&priv_data->banks[i]); return 0; @@ -230,8 +230,8 @@ static void adf_cleanup_bh(struct adf_accel_dev *accel_dev) int i; for (i = 0; i < hw_data->num_banks; i++) { - tasklet_disable(&priv_data->banks[i].resp_hanlder); - tasklet_kill(&priv_data->banks[i].resp_hanlder); + tasklet_disable(&priv_data->banks[i].resp_handler); + tasklet_kill(&priv_data->banks[i].resp_handler); } } diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index 882675e7c055..5186f750c713 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -82,15 +82,6 @@ typedef uint32_t drbg_flag_t; struct drbg_core { drbg_flag_t flags; /* flags for the cipher */ __u8 statelen; /* maximum state length */ - /* - * maximum length of personalization string or additional input - * string -- exponent for base 2 - */ - __u8 max_addtllen; - /* maximum bits per RNG request -- exponent for base 2*/ - __u8 max_bits; - /* maximum number of requests -- exponent for base 2 */ - __u8 max_req; __u8 blocklen_bytes; /* block size of output in bytes */ char cra_name[CRYPTO_MAX_ALG_NAME]; /* mapping to kernel crypto API */ /* kernel crypto API backend cipher name */ @@ -156,12 +147,13 @@ static inline __u8 drbg_keylen(struct drbg_state *drbg) static inline size_t drbg_max_request_bytes(struct drbg_state *drbg) { - /* max_bits is in bits, but buflen is in bytes */ - return (1 << (drbg->core->max_bits - 3)); + /* SP800-90A requires the limit 2**19 bits, but we return bytes */ + return (1 << 16); } static inline size_t drbg_max_addtl(struct drbg_state *drbg) { + /* SP800-90A requires 2**35 bytes additional info str / pers str */ #if (__BITS_PER_LONG == 32) /* * SP800-90A allows smaller maximum numbers to be returned -- we @@ -170,16 +162,17 @@ static inline size_t drbg_max_addtl(struct drbg_state *drbg) */ return (SIZE_MAX - 1); #else - return (1UL<<(drbg->core->max_addtllen)); + return (1UL<<35); #endif } static inline size_t drbg_max_requests(struct drbg_state *drbg) { + /* SP800-90A requires 2**48 maximum requests before reseeding */ #if (__BITS_PER_LONG == 32) return SIZE_MAX; #else - return (1UL<<(drbg->core->max_req)); + return (1UL<<48); #endif } diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 9b6f32a6cad1..3b4af1d7c7e9 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -117,6 +117,15 @@ int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc); +int shash_ahash_mcryptd_update(struct ahash_request *req, + struct shash_desc *desc); +int shash_ahash_mcryptd_final(struct ahash_request *req, + struct shash_desc *desc); +int shash_ahash_mcryptd_finup(struct ahash_request *req, + struct shash_desc *desc); +int shash_ahash_mcryptd_digest(struct ahash_request *req, + struct shash_desc *desc); + int crypto_init_shash_ops_async(struct crypto_tfm *tfm); static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm) diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h new file mode 100644 index 000000000000..c23ee1f7ee80 --- /dev/null +++ b/include/crypto/mcryptd.h @@ -0,0 +1,112 @@ +/* + * Software async multibuffer crypto daemon headers + * + * Author: + * Tim Chen <tim.c.chen@linux.intel.com> + * + * Copyright (c) 2014, Intel Corporation. + */ + +#ifndef _CRYPTO_MCRYPT_H +#define _CRYPTO_MCRYPT_H + +#include <linux/crypto.h> +#include <linux/kernel.h> +#include <crypto/hash.h> + +struct mcryptd_ahash { + struct crypto_ahash base; +}; + +static inline struct mcryptd_ahash *__mcryptd_ahash_cast( + struct crypto_ahash *tfm) +{ + return (struct mcryptd_ahash *)tfm; +} + +struct mcryptd_cpu_queue { + struct crypto_queue queue; + struct work_struct work; +}; + +struct mcryptd_queue { + struct mcryptd_cpu_queue __percpu *cpu_queue; +}; + +struct mcryptd_instance_ctx { + struct crypto_spawn spawn; + struct mcryptd_queue *queue; +}; + +struct mcryptd_hash_ctx { + struct crypto_shash *child; + struct mcryptd_alg_state *alg_state; +}; + +struct mcryptd_tag { + /* seq number of request */ + unsigned seq_num; + /* arrival time of request */ + unsigned long arrival; + unsigned long expire; + int cpu; +}; + +struct mcryptd_hash_request_ctx { + struct list_head waiter; + crypto_completion_t complete; + struct mcryptd_tag tag; + struct crypto_hash_walk walk; + u8 *out; + int flag; + struct shash_desc desc; +}; + +struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name, + u32 type, u32 mask); +struct crypto_shash *mcryptd_ahash_child(struct mcryptd_ahash *tfm); +struct shash_desc *mcryptd_shash_desc(struct ahash_request *req); +void mcryptd_free_ahash(struct mcryptd_ahash *tfm); +void mcryptd_flusher(struct work_struct *work); + +enum mcryptd_req_type { + MCRYPTD_NONE, + MCRYPTD_UPDATE, + MCRYPTD_FINUP, + MCRYPTD_DIGEST, + MCRYPTD_FINAL +}; + +struct mcryptd_alg_cstate { + unsigned long next_flush; + unsigned next_seq_num; + bool flusher_engaged; + struct delayed_work flush; + int cpu; + struct mcryptd_alg_state *alg_state; + void *mgr; + spinlock_t work_lock; + struct list_head work_list; + struct list_head flush_list; +}; + +struct mcryptd_alg_state { + struct mcryptd_alg_cstate __percpu *alg_cstate; + unsigned long (*flusher)(struct mcryptd_alg_cstate *cstate); +}; + +/* return delay in jiffies from current time */ +static inline unsigned long get_delay(unsigned long t) +{ + long delay; + + delay = (long) t - (long) jiffies; + if (delay <= 0) + return 0; + else + return (unsigned long) delay; +} + +void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay); + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index b867a4dab38a..9c6353d9e63a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -167,6 +167,7 @@ extern int nr_threads; DECLARE_PER_CPU(unsigned long, process_counts); extern int nr_processes(void); extern unsigned long nr_running(void); +extern bool single_task_running(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ec1a286684a5..59965ec0b7de 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2366,6 +2366,18 @@ unsigned long nr_running(void) return sum; } +/* + * Check if only the current task is running on the cpu. + */ +bool single_task_running(void) +{ + if (cpu_rq(smp_processor_id())->nr_running == 1) + return true; + else + return false; +} +EXPORT_SYMBOL(single_task_running); + unsigned long long nr_context_switches(void) { int i; |