summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crypto/crc32c.c19
-rw-r--r--lib/crc/.kunitconfig3
-rw-r--r--lib/crc/Kconfig20
-rw-r--r--lib/crc/Makefile7
-rw-r--r--lib/crc/arm64/crc-t10dif-core.S56
-rw-r--r--lib/crc/arm64/crc32-core.S9
-rw-r--r--lib/crc/arm64/crc64-neon-inner.c65
-rw-r--r--lib/crc/arm64/crc64.h28
-rw-r--r--lib/crc/tests/crc_kunit.c28
-rw-r--r--tools/testing/kunit/configs/all_tests.config2
10 files changed, 172 insertions, 65 deletions
diff --git a/crypto/crc32c.c b/crypto/crc32c.c
index 1eff54dde2f7..9c9d0ae21101 100644
--- a/crypto/crc32c.c
+++ b/crypto/crc32c.c
@@ -1,8 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Cryptographic API.
- *
- * CRC32C chksum
+ * crypto_shash support for CRC-32C
*
*@Article{castagnoli-crc,
* author = { Guy Castagnoli and Stefan Braeuer and Martin Herrman},
@@ -15,16 +13,6 @@
* pages = {},
* month = {June},
*}
- * Used by the iSCSI driver, possibly others, and derived from
- * the iscsi-crc.c module of the linux-iscsi driver at
- * http://linux-iscsi.sourceforge.net.
- *
- * Following the example of lib/crc32, this function is intended to be
- * flexible and useful for all users. Modules that currently have their
- * own crc32c, but hopefully may be able to use this one are:
- * net/sctp (please add all your doco to here if you change to
- * use this one!)
- * <endoflist>
*
* Copyright (c) 2004 Cisco Systems, Inc.
* Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
@@ -49,11 +37,6 @@ struct chksum_desc_ctx {
u32 crc;
};
-/*
- * Steps through buffer one byte at a time, calculates reflected
- * crc using table.
- */
-
static int chksum_init(struct shash_desc *desc)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
diff --git a/lib/crc/.kunitconfig b/lib/crc/.kunitconfig
new file mode 100644
index 000000000000..0a3671ba573f
--- /dev/null
+++ b/lib/crc/.kunitconfig
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_CRC_ENABLE_ALL_FOR_KUNIT=y
+CONFIG_CRC_KUNIT_TEST=y
diff --git a/lib/crc/Kconfig b/lib/crc/Kconfig
index 70e7a6016de3..31038c8d111a 100644
--- a/lib/crc/Kconfig
+++ b/lib/crc/Kconfig
@@ -48,7 +48,7 @@ config CRC_T10DIF_ARCH
bool
depends on CRC_T10DIF && CRC_OPTIMIZATIONS
default y if ARM && KERNEL_MODE_NEON
- default y if ARM64 && KERNEL_MODE_NEON
+ default y if ARM64
default y if PPC64 && ALTIVEC
default y if RISCV && RISCV_ISA_ZBC
default y if X86
@@ -82,6 +82,7 @@ config CRC64
config CRC64_ARCH
bool
depends on CRC64 && CRC_OPTIMIZATIONS
+ default y if ARM64
default y if RISCV && RISCV_ISA_ZBC && 64BIT
default y if X86_64
@@ -99,18 +100,27 @@ config CRC_OPTIMIZATIONS
config CRC_KUNIT_TEST
tristate "KUnit tests for CRC functions" if !KUNIT_ALL_TESTS
- depends on KUNIT
+ depends on KUNIT && (CRC7 || CRC16 || CRC_T10DIF || CRC32 || CRC64)
default KUNIT_ALL_TESTS
+ help
+ Unit tests for the CRC library functions.
+
+ This is intended to help people writing architecture-specific
+ optimized versions. If unsure, say N.
+
+config CRC_ENABLE_ALL_FOR_KUNIT
+ tristate "Enable all CRC functions for KUnit test"
+ depends on KUNIT
select CRC7
select CRC16
select CRC_T10DIF
select CRC32
select CRC64
help
- Unit tests for the CRC library functions.
+ Enable all CRC functions that have test code in CRC_KUNIT_TEST.
- This is intended to help people writing architecture-specific
- optimized versions. If unsure, say N.
+ Enable this only if you'd like the CRC KUnit test suite to test all
+ the CRC variants, even ones that wouldn't otherwise need to be built.
config CRC_BENCHMARK
bool "Benchmark for the CRC functions"
diff --git a/lib/crc/Makefile b/lib/crc/Makefile
index 7543ad295ab6..ff213590e4e3 100644
--- a/lib/crc/Makefile
+++ b/lib/crc/Makefile
@@ -38,9 +38,14 @@ obj-$(CONFIG_CRC64) += crc64.o
crc64-y := crc64-main.o
ifeq ($(CONFIG_CRC64_ARCH),y)
CFLAGS_crc64-main.o += -I$(src)/$(SRCARCH)
+
+CFLAGS_REMOVE_arm64/crc64-neon-inner.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_arm64/crc64-neon-inner.o += $(CC_FLAGS_FPU) -march=armv8-a+crypto
+crc64-$(CONFIG_ARM64) += arm64/crc64-neon-inner.o
+
crc64-$(CONFIG_RISCV) += riscv/crc64_lsb.o riscv/crc64_msb.o
crc64-$(CONFIG_X86) += x86/crc64-pclmul.o
-endif
+endif # CONFIG_CRC64_ARCH
obj-y += tests/
diff --git a/lib/crc/arm64/crc-t10dif-core.S b/lib/crc/arm64/crc-t10dif-core.S
index 87dd6d46224d..71388466825b 100644
--- a/lib/crc/arm64/crc-t10dif-core.S
+++ b/lib/crc/arm64/crc-t10dif-core.S
@@ -181,13 +181,13 @@ SYM_FUNC_END(__pmull_p8_16x64)
pmull16x64_\p fold_consts, \reg1, v8
-CPU_LE( rev64 v11.16b, v11.16b )
-CPU_LE( rev64 v12.16b, v12.16b )
+ rev64 v11.16b, v11.16b
+ rev64 v12.16b, v12.16b
pmull16x64_\p fold_consts, \reg2, v9
-CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
-CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
+ ext v11.16b, v11.16b, v11.16b, #8
+ ext v12.16b, v12.16b, v12.16b, #8
eor \reg1\().16b, \reg1\().16b, v8.16b
eor \reg2\().16b, \reg2\().16b, v9.16b
@@ -220,22 +220,22 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
ldp q4, q5, [buf, #0x40]
ldp q6, q7, [buf, #0x60]
add buf, buf, #0x80
-CPU_LE( rev64 v0.16b, v0.16b )
-CPU_LE( rev64 v1.16b, v1.16b )
-CPU_LE( rev64 v2.16b, v2.16b )
-CPU_LE( rev64 v3.16b, v3.16b )
-CPU_LE( rev64 v4.16b, v4.16b )
-CPU_LE( rev64 v5.16b, v5.16b )
-CPU_LE( rev64 v6.16b, v6.16b )
-CPU_LE( rev64 v7.16b, v7.16b )
-CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
-CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
-CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 )
-CPU_LE( ext v3.16b, v3.16b, v3.16b, #8 )
-CPU_LE( ext v4.16b, v4.16b, v4.16b, #8 )
-CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 )
-CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 )
-CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
+ rev64 v0.16b, v0.16b
+ rev64 v1.16b, v1.16b
+ rev64 v2.16b, v2.16b
+ rev64 v3.16b, v3.16b
+ rev64 v4.16b, v4.16b
+ rev64 v5.16b, v5.16b
+ rev64 v6.16b, v6.16b
+ rev64 v7.16b, v7.16b
+ ext v0.16b, v0.16b, v0.16b, #8
+ ext v1.16b, v1.16b, v1.16b, #8
+ ext v2.16b, v2.16b, v2.16b, #8
+ ext v3.16b, v3.16b, v3.16b, #8
+ ext v4.16b, v4.16b, v4.16b, #8
+ ext v5.16b, v5.16b, v5.16b, #8
+ ext v6.16b, v6.16b, v6.16b, #8
+ ext v7.16b, v7.16b, v7.16b, #8
// XOR the first 16 data *bits* with the initial CRC value.
movi v8.16b, #0
@@ -288,8 +288,8 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
pmull16x64_\p fold_consts, v7, v8
eor v7.16b, v7.16b, v8.16b
ldr q0, [buf], #16
-CPU_LE( rev64 v0.16b, v0.16b )
-CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
+ rev64 v0.16b, v0.16b
+ ext v0.16b, v0.16b, v0.16b, #8
eor v7.16b, v7.16b, v0.16b
subs len, len, #16
b.ge .Lfold_16_bytes_loop_\@
@@ -310,8 +310,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
// v0 = last 16 original data bytes
add buf, buf, len
ldr q0, [buf, #-16]
-CPU_LE( rev64 v0.16b, v0.16b )
-CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
+ rev64 v0.16b, v0.16b
+ ext v0.16b, v0.16b, v0.16b, #8
// v1 = high order part of second chunk: v7 left-shifted by 'len' bytes.
adr_l x4, .Lbyteshift_table + 16
@@ -344,8 +344,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
// Load the first 16 data bytes.
ldr q7, [buf], #0x10
-CPU_LE( rev64 v7.16b, v7.16b )
-CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
+ rev64 v7.16b, v7.16b
+ ext v7.16b, v7.16b, v7.16b, #8
// XOR the first 16 data *bits* with the initial CRC value.
movi v0.16b, #0
@@ -382,8 +382,8 @@ SYM_FUNC_START(crc_t10dif_pmull_p8)
crc_t10dif_pmull p8
-CPU_LE( rev64 v7.16b, v7.16b )
-CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
+ rev64 v7.16b, v7.16b
+ ext v7.16b, v7.16b, v7.16b, #8
str q7, [x3]
frame_pop
diff --git a/lib/crc/arm64/crc32-core.S b/lib/crc/arm64/crc32-core.S
index 68825317460f..49d02cc485b3 100644
--- a/lib/crc/arm64/crc32-core.S
+++ b/lib/crc/arm64/crc32-core.S
@@ -29,24 +29,19 @@
.endm
.macro hwordle, reg
-CPU_BE( rev16 \reg, \reg )
.endm
.macro hwordbe, reg
-CPU_LE( rev \reg, \reg )
+ rev \reg, \reg
rbit \reg, \reg
-CPU_BE( lsr \reg, \reg, #16 )
.endm
.macro le, regs:vararg
- .irp r, \regs
-CPU_BE( rev \r, \r )
- .endr
.endm
.macro be, regs:vararg
.irp r, \regs
-CPU_LE( rev \r, \r )
+ rev \r, \r
.endr
.irp r, \regs
rbit \r, \r
diff --git a/lib/crc/arm64/crc64-neon-inner.c b/lib/crc/arm64/crc64-neon-inner.c
new file mode 100644
index 000000000000..28527e544ff6
--- /dev/null
+++ b/lib/crc/arm64/crc64-neon-inner.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Accelerated CRC64 (NVMe) using ARM NEON C intrinsics
+ */
+
+#include <linux/types.h>
+#include <asm/neon-intrinsics.h>
+
+u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len);
+
+/* x^191 mod G, x^127 mod G */
+static const u64 fold_consts_val[2] = { 0xeadc41fd2ba3d420ULL,
+ 0x21e9761e252621acULL };
+/* floor(x^127 / G), (G - x^64) / x */
+static const u64 bconsts_val[2] = { 0x27ecfa329aef9f77ULL,
+ 0x34d926535897936aULL };
+
+static inline uint64x2_t pmull64(uint64x2_t a, uint64x2_t b)
+{
+ return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 0),
+ vgetq_lane_u64(b, 0)));
+}
+
+static inline uint64x2_t pmull64_high(uint64x2_t a, uint64x2_t b)
+{
+ poly64x2_t l = vreinterpretq_p64_u64(a);
+ poly64x2_t m = vreinterpretq_p64_u64(b);
+
+ return vreinterpretq_u64_p128(vmull_high_p64(l, m));
+}
+
+static inline uint64x2_t pmull64_hi_lo(uint64x2_t a, uint64x2_t b)
+{
+ return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 1),
+ vgetq_lane_u64(b, 0)));
+}
+
+u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len)
+{
+ uint64x2_t fold_consts = vld1q_u64(fold_consts_val);
+ uint64x2_t v0 = { crc, 0 };
+ uint64x2_t zero = { };
+
+ for (;;) {
+ v0 ^= vreinterpretq_u64_u8(vld1q_u8(p));
+
+ p += 16;
+ len -= 16;
+ if (len < 16)
+ break;
+
+ v0 = pmull64(fold_consts, v0) ^ pmull64_high(fold_consts, v0);
+ }
+
+ /* Multiply the 128-bit value by x^64 and reduce it back to 128 bits. */
+ v0 = vextq_u64(v0, zero, 1) ^ pmull64_hi_lo(fold_consts, v0);
+
+ /* Final Barrett reduction */
+ uint64x2_t bconsts = vld1q_u64(bconsts_val);
+ uint64x2_t final = pmull64(bconsts, v0);
+
+ v0 ^= vextq_u64(zero, final, 1) ^ pmull64_hi_lo(bconsts, final);
+
+ return vgetq_lane_u64(v0, 1);
+}
diff --git a/lib/crc/arm64/crc64.h b/lib/crc/arm64/crc64.h
new file mode 100644
index 000000000000..60151ec3035a
--- /dev/null
+++ b/lib/crc/arm64/crc64.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * CRC64 using ARM64 PMULL instructions
+ */
+
+#include <linux/cpufeature.h>
+#include <asm/simd.h>
+#include <linux/minmax.h>
+#include <linux/sizes.h>
+
+u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len);
+
+#define crc64_be_arch crc64_be_generic
+
+static inline u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
+{
+ if (len >= 128 && cpu_have_named_feature(PMULL) &&
+ likely(may_use_simd())) {
+ size_t chunk = len & ~15;
+
+ scoped_ksimd()
+ crc = crc64_nvme_arm64_c(crc, p, chunk);
+
+ p += chunk;
+ len &= 15;
+ }
+ return crc64_nvme_generic(crc, p, len);
+}
diff --git a/lib/crc/tests/crc_kunit.c b/lib/crc/tests/crc_kunit.c
index 9a450e25ac81..9428cd913625 100644
--- a/lib/crc/tests/crc_kunit.c
+++ b/lib/crc/tests/crc_kunit.c
@@ -268,8 +268,7 @@ crc_benchmark(struct kunit *test,
}
}
-/* crc7_be */
-
+#if IS_REACHABLE(CONFIG_CRC7)
static u64 crc7_be_wrapper(u64 crc, const u8 *p, size_t len)
{
/*
@@ -294,9 +293,9 @@ static void crc7_be_benchmark(struct kunit *test)
{
crc_benchmark(test, crc7_be_wrapper);
}
+#endif /* CONFIG_CRC7 */
-/* crc16 */
-
+#if IS_REACHABLE(CONFIG_CRC16)
static u64 crc16_wrapper(u64 crc, const u8 *p, size_t len)
{
return crc16(crc, p, len);
@@ -318,9 +317,9 @@ static void crc16_benchmark(struct kunit *test)
{
crc_benchmark(test, crc16_wrapper);
}
+#endif /* CONFIG_CRC16 */
-/* crc_t10dif */
-
+#if IS_REACHABLE(CONFIG_CRC_T10DIF)
static u64 crc_t10dif_wrapper(u64 crc, const u8 *p, size_t len)
{
return crc_t10dif_update(crc, p, len);
@@ -342,6 +341,9 @@ static void crc_t10dif_benchmark(struct kunit *test)
{
crc_benchmark(test, crc_t10dif_wrapper);
}
+#endif /* CONFIG_CRC_T10DIF */
+
+#if IS_REACHABLE(CONFIG_CRC32)
/* crc32_le */
@@ -414,6 +416,9 @@ static void crc32c_benchmark(struct kunit *test)
{
crc_benchmark(test, crc32c_wrapper);
}
+#endif /* CONFIG_CRC32 */
+
+#if IS_REACHABLE(CONFIG_CRC64)
/* crc64_be */
@@ -463,24 +468,35 @@ static void crc64_nvme_benchmark(struct kunit *test)
{
crc_benchmark(test, crc64_nvme_wrapper);
}
+#endif /* CONFIG_CRC64 */
static struct kunit_case crc_test_cases[] = {
+#if IS_REACHABLE(CONFIG_CRC7)
KUNIT_CASE(crc7_be_test),
KUNIT_CASE(crc7_be_benchmark),
+#endif
+#if IS_REACHABLE(CONFIG_CRC16)
KUNIT_CASE(crc16_test),
KUNIT_CASE(crc16_benchmark),
+#endif
+#if IS_REACHABLE(CONFIG_CRC_T10DIF)
KUNIT_CASE(crc_t10dif_test),
KUNIT_CASE(crc_t10dif_benchmark),
+#endif
+#if IS_REACHABLE(CONFIG_CRC32)
KUNIT_CASE(crc32_le_test),
KUNIT_CASE(crc32_le_benchmark),
KUNIT_CASE(crc32_be_test),
KUNIT_CASE(crc32_be_benchmark),
KUNIT_CASE(crc32c_test),
KUNIT_CASE(crc32c_benchmark),
+#endif
+#if IS_REACHABLE(CONFIG_CRC64)
KUNIT_CASE(crc64_be_test),
KUNIT_CASE(crc64_be_benchmark),
KUNIT_CASE(crc64_nvme_test),
KUNIT_CASE(crc64_nvme_benchmark),
+#endif
{},
};
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index 6910b07082da..bccc2c77196d 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -48,6 +48,8 @@ CONFIG_CRYPTO_LIB_ENABLE_ALL_FOR_KUNIT=y
CONFIG_PRIME_NUMBERS=y
+CONFIG_CRC_ENABLE_ALL_FOR_KUNIT=y
+
CONFIG_SECURITY=y
CONFIG_SECURITY_APPARMOR=y
CONFIG_SECURITY_LANDLOCK=y