diff options
| author | Christoph Hellwig <hch@lst.de> | 2026-03-27 07:16:46 +0100 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2026-04-02 23:36:18 -0700 |
| commit | 033bee3e49631bd0c7e081aeafeadc7623495107 (patch) | |
| tree | a4d1d1d9e6d1bf47bfbee336eb43c910394960af /arch | |
| parent | 3786f2ad009549c9e5e2af86e5829b31ad788eb4 (diff) | |
| download | lwn-033bee3e49631bd0c7e081aeafeadc7623495107.tar.gz lwn-033bee3e49631bd0c7e081aeafeadc7623495107.zip | |
loongarch: move the XOR code to lib/raid/
Move the optimized XOR into lib/raid and include it it in xor.ko instead
of always building it into the main kernel image.
Link: https://lkml.kernel.org/r/20260327061704.3707577-15-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Biggers <ebiggers@kernel.org>
Tested-by: Eric Biggers <ebiggers@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: David Sterba <dsterba@suse.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Li Nan <linan122@huawei.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Magnus Lindholm <linmag7@gmail.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/loongarch/include/asm/xor.h | 24 | ||||
| -rw-r--r-- | arch/loongarch/include/asm/xor_simd.h | 34 | ||||
| -rw-r--r-- | arch/loongarch/lib/Makefile | 2 | ||||
| -rw-r--r-- | arch/loongarch/lib/xor_simd.c | 93 | ||||
| -rw-r--r-- | arch/loongarch/lib/xor_simd.h | 38 | ||||
| -rw-r--r-- | arch/loongarch/lib/xor_simd_glue.c | 72 | ||||
| -rw-r--r-- | arch/loongarch/lib/xor_template.c | 110 |
7 files changed, 3 insertions, 370 deletions
diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h index d17c0e3b047f..7e32f72f8b03 100644 --- a/arch/loongarch/include/asm/xor.h +++ b/arch/loongarch/include/asm/xor.h @@ -6,27 +6,6 @@ #define _ASM_LOONGARCH_XOR_H #include <asm/cpu-features.h> -#include <asm/xor_simd.h> - -#ifdef CONFIG_CPU_HAS_LSX -static struct xor_block_template xor_block_lsx = { - .name = "lsx", - .do_2 = xor_lsx_2, - .do_3 = xor_lsx_3, - .do_4 = xor_lsx_4, - .do_5 = xor_lsx_5, -}; -#endif /* CONFIG_CPU_HAS_LSX */ - -#ifdef CONFIG_CPU_HAS_LASX -static struct xor_block_template xor_block_lasx = { - .name = "lasx", - .do_2 = xor_lasx_2, - .do_3 = xor_lasx_3, - .do_4 = xor_lasx_4, - .do_5 = xor_lasx_5, -}; -#endif /* CONFIG_CPU_HAS_LASX */ /* * For grins, also test the generic routines. @@ -38,6 +17,9 @@ static struct xor_block_template xor_block_lasx = { */ #include <asm-generic/xor.h> +extern struct xor_block_template xor_block_lsx; +extern struct xor_block_template xor_block_lasx; + #define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { diff --git a/arch/loongarch/include/asm/xor_simd.h b/arch/loongarch/include/asm/xor_simd.h deleted file mode 100644 index 471b96332f38..000000000000 --- a/arch/loongarch/include/asm/xor_simd.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> - */ -#ifndef _ASM_LOONGARCH_XOR_SIMD_H -#define _ASM_LOONGARCH_XOR_SIMD_H - -#ifdef CONFIG_CPU_HAS_LSX -void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3); -void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3, - const unsigned long * __restrict p4, const unsigned long * __restrict p5); -#endif /* CONFIG_CPU_HAS_LSX */ - -#ifdef CONFIG_CPU_HAS_LASX -void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3); -void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3, - const unsigned long * __restrict p4, const unsigned long * __restrict p5); -#endif /* CONFIG_CPU_HAS_LASX */ - -#endif /* _ASM_LOONGARCH_XOR_SIMD_H */ diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index ccea3bbd4353..827a88529a42 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -8,6 +8,4 @@ lib-y += delay.o memset.o memcpy.o memmove.o \ obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o -obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o - obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/loongarch/lib/xor_simd.c b/arch/loongarch/lib/xor_simd.c deleted file mode 100644 index 84cd24b728c4..000000000000 --- a/arch/loongarch/lib/xor_simd.c +++ /dev/null @@ -1,93 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * LoongArch SIMD XOR operations - * - * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> - */ - -#include "xor_simd.h" - -/* - * Process one cache line (64 bytes) per loop. This is assuming all future - * popular LoongArch cores are similar performance-characteristics-wise to the - * current models. - */ -#define LINE_WIDTH 64 - -#ifdef CONFIG_CPU_HAS_LSX - -#define LD(reg, base, offset) \ - "vld $vr" #reg ", %[" #base "], " #offset "\n\t" -#define ST(reg, base, offset) \ - "vst $vr" #reg ", %[" #base "], " #offset "\n\t" -#define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t" - -#define LD_INOUT_LINE(base) \ - LD(0, base, 0) \ - LD(1, base, 16) \ - LD(2, base, 32) \ - LD(3, base, 48) - -#define LD_AND_XOR_LINE(base) \ - LD(4, base, 0) \ - LD(5, base, 16) \ - LD(6, base, 32) \ - LD(7, base, 48) \ - XOR(0, 4) \ - XOR(1, 5) \ - XOR(2, 6) \ - XOR(3, 7) - -#define ST_LINE(base) \ - ST(0, base, 0) \ - ST(1, base, 16) \ - ST(2, base, 32) \ - ST(3, base, 48) - -#define XOR_FUNC_NAME(nr) __xor_lsx_##nr -#include "xor_template.c" - -#undef LD -#undef ST -#undef XOR -#undef LD_INOUT_LINE -#undef LD_AND_XOR_LINE -#undef ST_LINE -#undef XOR_FUNC_NAME - -#endif /* CONFIG_CPU_HAS_LSX */ - -#ifdef CONFIG_CPU_HAS_LASX - -#define LD(reg, base, offset) \ - "xvld $xr" #reg ", %[" #base "], " #offset "\n\t" -#define ST(reg, base, offset) \ - "xvst $xr" #reg ", %[" #base "], " #offset "\n\t" -#define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t" - -#define LD_INOUT_LINE(base) \ - LD(0, base, 0) \ - LD(1, base, 32) - -#define LD_AND_XOR_LINE(base) \ - LD(2, base, 0) \ - LD(3, base, 32) \ - XOR(0, 2) \ - XOR(1, 3) - -#define ST_LINE(base) \ - ST(0, base, 0) \ - ST(1, base, 32) - -#define XOR_FUNC_NAME(nr) __xor_lasx_##nr -#include "xor_template.c" - -#undef LD -#undef ST -#undef XOR -#undef LD_INOUT_LINE -#undef LD_AND_XOR_LINE -#undef ST_LINE -#undef XOR_FUNC_NAME - -#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/arch/loongarch/lib/xor_simd.h b/arch/loongarch/lib/xor_simd.h deleted file mode 100644 index f50f32514d80..000000000000 --- a/arch/loongarch/lib/xor_simd.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Simple interface to link xor_simd.c and xor_simd_glue.c - * - * Separating these files ensures that no SIMD instructions are run outside of - * the kfpu critical section. - */ - -#ifndef __LOONGARCH_LIB_XOR_SIMD_H -#define __LOONGARCH_LIB_XOR_SIMD_H - -#ifdef CONFIG_CPU_HAS_LSX -void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3); -void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3, - const unsigned long * __restrict p4, const unsigned long * __restrict p5); -#endif /* CONFIG_CPU_HAS_LSX */ - -#ifdef CONFIG_CPU_HAS_LASX -void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3); -void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3, - const unsigned long * __restrict p4, const unsigned long * __restrict p5); -#endif /* CONFIG_CPU_HAS_LASX */ - -#endif /* __LOONGARCH_LIB_XOR_SIMD_H */ diff --git a/arch/loongarch/lib/xor_simd_glue.c b/arch/loongarch/lib/xor_simd_glue.c deleted file mode 100644 index 393f689dbcf6..000000000000 --- a/arch/loongarch/lib/xor_simd_glue.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * LoongArch SIMD XOR operations - * - * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> - */ - -#include <linux/export.h> -#include <linux/sched.h> -#include <asm/fpu.h> -#include <asm/xor_simd.h> -#include "xor_simd.h" - -#define MAKE_XOR_GLUE_2(flavor) \ -void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1, \ - const unsigned long * __restrict p2) \ -{ \ - kernel_fpu_begin(); \ - __xor_##flavor##_2(bytes, p1, p2); \ - kernel_fpu_end(); \ -} \ -EXPORT_SYMBOL_GPL(xor_##flavor##_2) - -#define MAKE_XOR_GLUE_3(flavor) \ -void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1, \ - const unsigned long * __restrict p2, \ - const unsigned long * __restrict p3) \ -{ \ - kernel_fpu_begin(); \ - __xor_##flavor##_3(bytes, p1, p2, p3); \ - kernel_fpu_end(); \ -} \ -EXPORT_SYMBOL_GPL(xor_##flavor##_3) - -#define MAKE_XOR_GLUE_4(flavor) \ -void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1, \ - const unsigned long * __restrict p2, \ - const unsigned long * __restrict p3, \ - const unsigned long * __restrict p4) \ -{ \ - kernel_fpu_begin(); \ - __xor_##flavor##_4(bytes, p1, p2, p3, p4); \ - kernel_fpu_end(); \ -} \ -EXPORT_SYMBOL_GPL(xor_##flavor##_4) - -#define MAKE_XOR_GLUE_5(flavor) \ -void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1, \ - const unsigned long * __restrict p2, \ - const unsigned long * __restrict p3, \ - const unsigned long * __restrict p4, \ - const unsigned long * __restrict p5) \ -{ \ - kernel_fpu_begin(); \ - __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \ - kernel_fpu_end(); \ -} \ -EXPORT_SYMBOL_GPL(xor_##flavor##_5) - -#define MAKE_XOR_GLUES(flavor) \ - MAKE_XOR_GLUE_2(flavor); \ - MAKE_XOR_GLUE_3(flavor); \ - MAKE_XOR_GLUE_4(flavor); \ - MAKE_XOR_GLUE_5(flavor) - -#ifdef CONFIG_CPU_HAS_LSX -MAKE_XOR_GLUES(lsx); -#endif - -#ifdef CONFIG_CPU_HAS_LASX -MAKE_XOR_GLUES(lasx); -#endif diff --git a/arch/loongarch/lib/xor_template.c b/arch/loongarch/lib/xor_template.c deleted file mode 100644 index 0358ced7fe33..000000000000 --- a/arch/loongarch/lib/xor_template.c +++ /dev/null @@ -1,110 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> - * - * Template for XOR operations, instantiated in xor_simd.c. - * - * Expected preprocessor definitions: - * - * - LINE_WIDTH - * - XOR_FUNC_NAME(nr) - * - LD_INOUT_LINE(buf) - * - LD_AND_XOR_LINE(buf) - * - ST_LINE(buf) - */ - -void XOR_FUNC_NAME(2)(unsigned long bytes, - unsigned long * __restrict v1, - const unsigned long * __restrict v2) -{ - unsigned long lines = bytes / LINE_WIDTH; - - do { - __asm__ __volatile__ ( - LD_INOUT_LINE(v1) - LD_AND_XOR_LINE(v2) - ST_LINE(v1) - : : [v1] "r"(v1), [v2] "r"(v2) : "memory" - ); - - v1 += LINE_WIDTH / sizeof(unsigned long); - v2 += LINE_WIDTH / sizeof(unsigned long); - } while (--lines > 0); -} - -void XOR_FUNC_NAME(3)(unsigned long bytes, - unsigned long * __restrict v1, - const unsigned long * __restrict v2, - const unsigned long * __restrict v3) -{ - unsigned long lines = bytes / LINE_WIDTH; - - do { - __asm__ __volatile__ ( - LD_INOUT_LINE(v1) - LD_AND_XOR_LINE(v2) - LD_AND_XOR_LINE(v3) - ST_LINE(v1) - : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory" - ); - - v1 += LINE_WIDTH / sizeof(unsigned long); - v2 += LINE_WIDTH / sizeof(unsigned long); - v3 += LINE_WIDTH / sizeof(unsigned long); - } while (--lines > 0); -} - -void XOR_FUNC_NAME(4)(unsigned long bytes, - unsigned long * __restrict v1, - const unsigned long * __restrict v2, - const unsigned long * __restrict v3, - const unsigned long * __restrict v4) -{ - unsigned long lines = bytes / LINE_WIDTH; - - do { - __asm__ __volatile__ ( - LD_INOUT_LINE(v1) - LD_AND_XOR_LINE(v2) - LD_AND_XOR_LINE(v3) - LD_AND_XOR_LINE(v4) - ST_LINE(v1) - : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4) - : "memory" - ); - - v1 += LINE_WIDTH / sizeof(unsigned long); - v2 += LINE_WIDTH / sizeof(unsigned long); - v3 += LINE_WIDTH / sizeof(unsigned long); - v4 += LINE_WIDTH / sizeof(unsigned long); - } while (--lines > 0); -} - -void XOR_FUNC_NAME(5)(unsigned long bytes, - unsigned long * __restrict v1, - const unsigned long * __restrict v2, - const unsigned long * __restrict v3, - const unsigned long * __restrict v4, - const unsigned long * __restrict v5) -{ - unsigned long lines = bytes / LINE_WIDTH; - - do { - __asm__ __volatile__ ( - LD_INOUT_LINE(v1) - LD_AND_XOR_LINE(v2) - LD_AND_XOR_LINE(v3) - LD_AND_XOR_LINE(v4) - LD_AND_XOR_LINE(v5) - ST_LINE(v1) - : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4), - [v5] "r"(v5) : "memory" - ); - - v1 += LINE_WIDTH / sizeof(unsigned long); - v2 += LINE_WIDTH / sizeof(unsigned long); - v3 += LINE_WIDTH / sizeof(unsigned long); - v4 += LINE_WIDTH / sizeof(unsigned long); - v5 += LINE_WIDTH / sizeof(unsigned long); - } while (--lines > 0); -} |
