diff options
| -rw-r--r-- | crypto/crc32c.c | 19 | ||||
| -rw-r--r-- | lib/crc/.kunitconfig | 3 | ||||
| -rw-r--r-- | lib/crc/Kconfig | 20 | ||||
| -rw-r--r-- | lib/crc/Makefile | 7 | ||||
| -rw-r--r-- | lib/crc/arm64/crc-t10dif-core.S | 56 | ||||
| -rw-r--r-- | lib/crc/arm64/crc32-core.S | 9 | ||||
| -rw-r--r-- | lib/crc/arm64/crc64-neon-inner.c | 65 | ||||
| -rw-r--r-- | lib/crc/arm64/crc64.h | 28 | ||||
| -rw-r--r-- | lib/crc/tests/crc_kunit.c | 28 | ||||
| -rw-r--r-- | tools/testing/kunit/configs/all_tests.config | 2 |
10 files changed, 172 insertions, 65 deletions
diff --git a/crypto/crc32c.c b/crypto/crc32c.c index 1eff54dde2f7..9c9d0ae21101 100644 --- a/crypto/crc32c.c +++ b/crypto/crc32c.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * Cryptographic API. - * - * CRC32C chksum + * crypto_shash support for CRC-32C * *@Article{castagnoli-crc, * author = { Guy Castagnoli and Stefan Braeuer and Martin Herrman}, @@ -15,16 +13,6 @@ * pages = {}, * month = {June}, *} - * Used by the iSCSI driver, possibly others, and derived from - * the iscsi-crc.c module of the linux-iscsi driver at - * http://linux-iscsi.sourceforge.net. - * - * Following the example of lib/crc32, this function is intended to be - * flexible and useful for all users. Modules that currently have their - * own crc32c, but hopefully may be able to use this one are: - * net/sctp (please add all your doco to here if you change to - * use this one!) - * <endoflist> * * Copyright (c) 2004 Cisco Systems, Inc. * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> @@ -49,11 +37,6 @@ struct chksum_desc_ctx { u32 crc; }; -/* - * Steps through buffer one byte at a time, calculates reflected - * crc using table. - */ - static int chksum_init(struct shash_desc *desc) { struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); diff --git a/lib/crc/.kunitconfig b/lib/crc/.kunitconfig new file mode 100644 index 000000000000..0a3671ba573f --- /dev/null +++ b/lib/crc/.kunitconfig @@ -0,0 +1,3 @@ +CONFIG_KUNIT=y +CONFIG_CRC_ENABLE_ALL_FOR_KUNIT=y +CONFIG_CRC_KUNIT_TEST=y diff --git a/lib/crc/Kconfig b/lib/crc/Kconfig index 70e7a6016de3..31038c8d111a 100644 --- a/lib/crc/Kconfig +++ b/lib/crc/Kconfig @@ -48,7 +48,7 @@ config CRC_T10DIF_ARCH bool depends on CRC_T10DIF && CRC_OPTIMIZATIONS default y if ARM && KERNEL_MODE_NEON - default y if ARM64 && KERNEL_MODE_NEON + default y if ARM64 default y if PPC64 && ALTIVEC default y if RISCV && RISCV_ISA_ZBC default y if X86 @@ -82,6 +82,7 @@ config CRC64 config CRC64_ARCH bool depends on CRC64 && CRC_OPTIMIZATIONS + default y if ARM64 default y if RISCV && RISCV_ISA_ZBC && 64BIT default y if X86_64 @@ -99,18 +100,27 @@ config CRC_OPTIMIZATIONS config CRC_KUNIT_TEST tristate "KUnit tests for CRC functions" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && (CRC7 || CRC16 || CRC_T10DIF || CRC32 || CRC64) default KUNIT_ALL_TESTS + help + Unit tests for the CRC library functions. + + This is intended to help people writing architecture-specific + optimized versions. If unsure, say N. + +config CRC_ENABLE_ALL_FOR_KUNIT + tristate "Enable all CRC functions for KUnit test" + depends on KUNIT select CRC7 select CRC16 select CRC_T10DIF select CRC32 select CRC64 help - Unit tests for the CRC library functions. + Enable all CRC functions that have test code in CRC_KUNIT_TEST. - This is intended to help people writing architecture-specific - optimized versions. If unsure, say N. + Enable this only if you'd like the CRC KUnit test suite to test all + the CRC variants, even ones that wouldn't otherwise need to be built. config CRC_BENCHMARK bool "Benchmark for the CRC functions" diff --git a/lib/crc/Makefile b/lib/crc/Makefile index 7543ad295ab6..ff213590e4e3 100644 --- a/lib/crc/Makefile +++ b/lib/crc/Makefile @@ -38,9 +38,14 @@ obj-$(CONFIG_CRC64) += crc64.o crc64-y := crc64-main.o ifeq ($(CONFIG_CRC64_ARCH),y) CFLAGS_crc64-main.o += -I$(src)/$(SRCARCH) + +CFLAGS_REMOVE_arm64/crc64-neon-inner.o += $(CC_FLAGS_NO_FPU) +CFLAGS_arm64/crc64-neon-inner.o += $(CC_FLAGS_FPU) -march=armv8-a+crypto +crc64-$(CONFIG_ARM64) += arm64/crc64-neon-inner.o + crc64-$(CONFIG_RISCV) += riscv/crc64_lsb.o riscv/crc64_msb.o crc64-$(CONFIG_X86) += x86/crc64-pclmul.o -endif +endif # CONFIG_CRC64_ARCH obj-y += tests/ diff --git a/lib/crc/arm64/crc-t10dif-core.S b/lib/crc/arm64/crc-t10dif-core.S index 87dd6d46224d..71388466825b 100644 --- a/lib/crc/arm64/crc-t10dif-core.S +++ b/lib/crc/arm64/crc-t10dif-core.S @@ -181,13 +181,13 @@ SYM_FUNC_END(__pmull_p8_16x64) pmull16x64_\p fold_consts, \reg1, v8 -CPU_LE( rev64 v11.16b, v11.16b ) -CPU_LE( rev64 v12.16b, v12.16b ) + rev64 v11.16b, v11.16b + rev64 v12.16b, v12.16b pmull16x64_\p fold_consts, \reg2, v9 -CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 ) -CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 ) + ext v11.16b, v11.16b, v11.16b, #8 + ext v12.16b, v12.16b, v12.16b, #8 eor \reg1\().16b, \reg1\().16b, v8.16b eor \reg2\().16b, \reg2\().16b, v9.16b @@ -220,22 +220,22 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 ) ldp q4, q5, [buf, #0x40] ldp q6, q7, [buf, #0x60] add buf, buf, #0x80 -CPU_LE( rev64 v0.16b, v0.16b ) -CPU_LE( rev64 v1.16b, v1.16b ) -CPU_LE( rev64 v2.16b, v2.16b ) -CPU_LE( rev64 v3.16b, v3.16b ) -CPU_LE( rev64 v4.16b, v4.16b ) -CPU_LE( rev64 v5.16b, v5.16b ) -CPU_LE( rev64 v6.16b, v6.16b ) -CPU_LE( rev64 v7.16b, v7.16b ) -CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) -CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 ) -CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 ) -CPU_LE( ext v3.16b, v3.16b, v3.16b, #8 ) -CPU_LE( ext v4.16b, v4.16b, v4.16b, #8 ) -CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 ) -CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 ) -CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) + rev64 v0.16b, v0.16b + rev64 v1.16b, v1.16b + rev64 v2.16b, v2.16b + rev64 v3.16b, v3.16b + rev64 v4.16b, v4.16b + rev64 v5.16b, v5.16b + rev64 v6.16b, v6.16b + rev64 v7.16b, v7.16b + ext v0.16b, v0.16b, v0.16b, #8 + ext v1.16b, v1.16b, v1.16b, #8 + ext v2.16b, v2.16b, v2.16b, #8 + ext v3.16b, v3.16b, v3.16b, #8 + ext v4.16b, v4.16b, v4.16b, #8 + ext v5.16b, v5.16b, v5.16b, #8 + ext v6.16b, v6.16b, v6.16b, #8 + ext v7.16b, v7.16b, v7.16b, #8 // XOR the first 16 data *bits* with the initial CRC value. movi v8.16b, #0 @@ -288,8 +288,8 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) pmull16x64_\p fold_consts, v7, v8 eor v7.16b, v7.16b, v8.16b ldr q0, [buf], #16 -CPU_LE( rev64 v0.16b, v0.16b ) -CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) + rev64 v0.16b, v0.16b + ext v0.16b, v0.16b, v0.16b, #8 eor v7.16b, v7.16b, v0.16b subs len, len, #16 b.ge .Lfold_16_bytes_loop_\@ @@ -310,8 +310,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) // v0 = last 16 original data bytes add buf, buf, len ldr q0, [buf, #-16] -CPU_LE( rev64 v0.16b, v0.16b ) -CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) + rev64 v0.16b, v0.16b + ext v0.16b, v0.16b, v0.16b, #8 // v1 = high order part of second chunk: v7 left-shifted by 'len' bytes. adr_l x4, .Lbyteshift_table + 16 @@ -344,8 +344,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) // Load the first 16 data bytes. ldr q7, [buf], #0x10 -CPU_LE( rev64 v7.16b, v7.16b ) -CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) + rev64 v7.16b, v7.16b + ext v7.16b, v7.16b, v7.16b, #8 // XOR the first 16 data *bits* with the initial CRC value. movi v0.16b, #0 @@ -382,8 +382,8 @@ SYM_FUNC_START(crc_t10dif_pmull_p8) crc_t10dif_pmull p8 -CPU_LE( rev64 v7.16b, v7.16b ) -CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) + rev64 v7.16b, v7.16b + ext v7.16b, v7.16b, v7.16b, #8 str q7, [x3] frame_pop diff --git a/lib/crc/arm64/crc32-core.S b/lib/crc/arm64/crc32-core.S index 68825317460f..49d02cc485b3 100644 --- a/lib/crc/arm64/crc32-core.S +++ b/lib/crc/arm64/crc32-core.S @@ -29,24 +29,19 @@ .endm .macro hwordle, reg -CPU_BE( rev16 \reg, \reg ) .endm .macro hwordbe, reg -CPU_LE( rev \reg, \reg ) + rev \reg, \reg rbit \reg, \reg -CPU_BE( lsr \reg, \reg, #16 ) .endm .macro le, regs:vararg - .irp r, \regs -CPU_BE( rev \r, \r ) - .endr .endm .macro be, regs:vararg .irp r, \regs -CPU_LE( rev \r, \r ) + rev \r, \r .endr .irp r, \regs rbit \r, \r diff --git a/lib/crc/arm64/crc64-neon-inner.c b/lib/crc/arm64/crc64-neon-inner.c new file mode 100644 index 000000000000..28527e544ff6 --- /dev/null +++ b/lib/crc/arm64/crc64-neon-inner.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Accelerated CRC64 (NVMe) using ARM NEON C intrinsics + */ + +#include <linux/types.h> +#include <asm/neon-intrinsics.h> + +u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len); + +/* x^191 mod G, x^127 mod G */ +static const u64 fold_consts_val[2] = { 0xeadc41fd2ba3d420ULL, + 0x21e9761e252621acULL }; +/* floor(x^127 / G), (G - x^64) / x */ +static const u64 bconsts_val[2] = { 0x27ecfa329aef9f77ULL, + 0x34d926535897936aULL }; + +static inline uint64x2_t pmull64(uint64x2_t a, uint64x2_t b) +{ + return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 0), + vgetq_lane_u64(b, 0))); +} + +static inline uint64x2_t pmull64_high(uint64x2_t a, uint64x2_t b) +{ + poly64x2_t l = vreinterpretq_p64_u64(a); + poly64x2_t m = vreinterpretq_p64_u64(b); + + return vreinterpretq_u64_p128(vmull_high_p64(l, m)); +} + +static inline uint64x2_t pmull64_hi_lo(uint64x2_t a, uint64x2_t b) +{ + return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 1), + vgetq_lane_u64(b, 0))); +} + +u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len) +{ + uint64x2_t fold_consts = vld1q_u64(fold_consts_val); + uint64x2_t v0 = { crc, 0 }; + uint64x2_t zero = { }; + + for (;;) { + v0 ^= vreinterpretq_u64_u8(vld1q_u8(p)); + + p += 16; + len -= 16; + if (len < 16) + break; + + v0 = pmull64(fold_consts, v0) ^ pmull64_high(fold_consts, v0); + } + + /* Multiply the 128-bit value by x^64 and reduce it back to 128 bits. */ + v0 = vextq_u64(v0, zero, 1) ^ pmull64_hi_lo(fold_consts, v0); + + /* Final Barrett reduction */ + uint64x2_t bconsts = vld1q_u64(bconsts_val); + uint64x2_t final = pmull64(bconsts, v0); + + v0 ^= vextq_u64(zero, final, 1) ^ pmull64_hi_lo(bconsts, final); + + return vgetq_lane_u64(v0, 1); +} diff --git a/lib/crc/arm64/crc64.h b/lib/crc/arm64/crc64.h new file mode 100644 index 000000000000..60151ec3035a --- /dev/null +++ b/lib/crc/arm64/crc64.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * CRC64 using ARM64 PMULL instructions + */ + +#include <linux/cpufeature.h> +#include <asm/simd.h> +#include <linux/minmax.h> +#include <linux/sizes.h> + +u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len); + +#define crc64_be_arch crc64_be_generic + +static inline u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len) +{ + if (len >= 128 && cpu_have_named_feature(PMULL) && + likely(may_use_simd())) { + size_t chunk = len & ~15; + + scoped_ksimd() + crc = crc64_nvme_arm64_c(crc, p, chunk); + + p += chunk; + len &= 15; + } + return crc64_nvme_generic(crc, p, len); +} diff --git a/lib/crc/tests/crc_kunit.c b/lib/crc/tests/crc_kunit.c index 9a450e25ac81..9428cd913625 100644 --- a/lib/crc/tests/crc_kunit.c +++ b/lib/crc/tests/crc_kunit.c @@ -268,8 +268,7 @@ crc_benchmark(struct kunit *test, } } -/* crc7_be */ - +#if IS_REACHABLE(CONFIG_CRC7) static u64 crc7_be_wrapper(u64 crc, const u8 *p, size_t len) { /* @@ -294,9 +293,9 @@ static void crc7_be_benchmark(struct kunit *test) { crc_benchmark(test, crc7_be_wrapper); } +#endif /* CONFIG_CRC7 */ -/* crc16 */ - +#if IS_REACHABLE(CONFIG_CRC16) static u64 crc16_wrapper(u64 crc, const u8 *p, size_t len) { return crc16(crc, p, len); @@ -318,9 +317,9 @@ static void crc16_benchmark(struct kunit *test) { crc_benchmark(test, crc16_wrapper); } +#endif /* CONFIG_CRC16 */ -/* crc_t10dif */ - +#if IS_REACHABLE(CONFIG_CRC_T10DIF) static u64 crc_t10dif_wrapper(u64 crc, const u8 *p, size_t len) { return crc_t10dif_update(crc, p, len); @@ -342,6 +341,9 @@ static void crc_t10dif_benchmark(struct kunit *test) { crc_benchmark(test, crc_t10dif_wrapper); } +#endif /* CONFIG_CRC_T10DIF */ + +#if IS_REACHABLE(CONFIG_CRC32) /* crc32_le */ @@ -414,6 +416,9 @@ static void crc32c_benchmark(struct kunit *test) { crc_benchmark(test, crc32c_wrapper); } +#endif /* CONFIG_CRC32 */ + +#if IS_REACHABLE(CONFIG_CRC64) /* crc64_be */ @@ -463,24 +468,35 @@ static void crc64_nvme_benchmark(struct kunit *test) { crc_benchmark(test, crc64_nvme_wrapper); } +#endif /* CONFIG_CRC64 */ static struct kunit_case crc_test_cases[] = { +#if IS_REACHABLE(CONFIG_CRC7) KUNIT_CASE(crc7_be_test), KUNIT_CASE(crc7_be_benchmark), +#endif +#if IS_REACHABLE(CONFIG_CRC16) KUNIT_CASE(crc16_test), KUNIT_CASE(crc16_benchmark), +#endif +#if IS_REACHABLE(CONFIG_CRC_T10DIF) KUNIT_CASE(crc_t10dif_test), KUNIT_CASE(crc_t10dif_benchmark), +#endif +#if IS_REACHABLE(CONFIG_CRC32) KUNIT_CASE(crc32_le_test), KUNIT_CASE(crc32_le_benchmark), KUNIT_CASE(crc32_be_test), KUNIT_CASE(crc32_be_benchmark), KUNIT_CASE(crc32c_test), KUNIT_CASE(crc32c_benchmark), +#endif +#if IS_REACHABLE(CONFIG_CRC64) KUNIT_CASE(crc64_be_test), KUNIT_CASE(crc64_be_benchmark), KUNIT_CASE(crc64_nvme_test), KUNIT_CASE(crc64_nvme_benchmark), +#endif {}, }; diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index 6910b07082da..bccc2c77196d 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -48,6 +48,8 @@ CONFIG_CRYPTO_LIB_ENABLE_ALL_FOR_KUNIT=y CONFIG_PRIME_NUMBERS=y +CONFIG_CRC_ENABLE_ALL_FOR_KUNIT=y + CONFIG_SECURITY=y CONFIG_SECURITY_APPARMOR=y CONFIG_SECURITY_LANDLOCK=y |
