summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2026-03-27 07:16:46 +0100
committerAndrew Morton <akpm@linux-foundation.org>2026-04-02 23:36:18 -0700
commit033bee3e49631bd0c7e081aeafeadc7623495107 (patch)
treea4d1d1d9e6d1bf47bfbee336eb43c910394960af /arch
parent3786f2ad009549c9e5e2af86e5829b31ad788eb4 (diff)
downloadlwn-033bee3e49631bd0c7e081aeafeadc7623495107.tar.gz
lwn-033bee3e49631bd0c7e081aeafeadc7623495107.zip
loongarch: move the XOR code to lib/raid/
Move the optimized XOR into lib/raid and include it it in xor.ko instead of always building it into the main kernel image. Link: https://lkml.kernel.org/r/20260327061704.3707577-15-hch@lst.de Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Eric Biggers <ebiggers@kernel.org> Tested-by: Eric Biggers <ebiggers@kernel.org> Cc: Albert Ou <aou@eecs.berkeley.edu> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Alexandre Ghiti <alex@ghiti.fr> Cc: Andreas Larsson <andreas@gaisler.com> Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: "Borislav Petkov (AMD)" <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Mason <clm@fb.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: David S. Miller <davem@davemloft.net> Cc: David Sterba <dsterba@suse.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason A. Donenfeld <jason@zx2c4.com> Cc: Johannes Berg <johannes@sipsolutions.net> Cc: Li Nan <linan122@huawei.com> Cc: Madhavan Srinivasan <maddy@linux.ibm.com> Cc: Magnus Lindholm <linmag7@gmail.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Richard Henderson <richard.henderson@linaro.org> Cc: Richard Weinberger <richard@nod.at> Cc: Russell King <linux@armlinux.org.uk> Cc: Song Liu <song@kernel.org> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Ted Ts'o <tytso@mit.edu> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: WANG Xuerui <kernel@xen0n.name> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/loongarch/include/asm/xor.h24
-rw-r--r--arch/loongarch/include/asm/xor_simd.h34
-rw-r--r--arch/loongarch/lib/Makefile2
-rw-r--r--arch/loongarch/lib/xor_simd.c93
-rw-r--r--arch/loongarch/lib/xor_simd.h38
-rw-r--r--arch/loongarch/lib/xor_simd_glue.c72
-rw-r--r--arch/loongarch/lib/xor_template.c110
7 files changed, 3 insertions, 370 deletions
diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h
index d17c0e3b047f..7e32f72f8b03 100644
--- a/arch/loongarch/include/asm/xor.h
+++ b/arch/loongarch/include/asm/xor.h
@@ -6,27 +6,6 @@
#define _ASM_LOONGARCH_XOR_H
#include <asm/cpu-features.h>
-#include <asm/xor_simd.h>
-
-#ifdef CONFIG_CPU_HAS_LSX
-static struct xor_block_template xor_block_lsx = {
- .name = "lsx",
- .do_2 = xor_lsx_2,
- .do_3 = xor_lsx_3,
- .do_4 = xor_lsx_4,
- .do_5 = xor_lsx_5,
-};
-#endif /* CONFIG_CPU_HAS_LSX */
-
-#ifdef CONFIG_CPU_HAS_LASX
-static struct xor_block_template xor_block_lasx = {
- .name = "lasx",
- .do_2 = xor_lasx_2,
- .do_3 = xor_lasx_3,
- .do_4 = xor_lasx_4,
- .do_5 = xor_lasx_5,
-};
-#endif /* CONFIG_CPU_HAS_LASX */
/*
* For grins, also test the generic routines.
@@ -38,6 +17,9 @@ static struct xor_block_template xor_block_lasx = {
*/
#include <asm-generic/xor.h>
+extern struct xor_block_template xor_block_lsx;
+extern struct xor_block_template xor_block_lasx;
+
#define arch_xor_init arch_xor_init
static __always_inline void __init arch_xor_init(void)
{
diff --git a/arch/loongarch/include/asm/xor_simd.h b/arch/loongarch/include/asm/xor_simd.h
deleted file mode 100644
index 471b96332f38..000000000000
--- a/arch/loongarch/include/asm/xor_simd.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
- */
-#ifndef _ASM_LOONGARCH_XOR_SIMD_H
-#define _ASM_LOONGARCH_XOR_SIMD_H
-
-#ifdef CONFIG_CPU_HAS_LSX
-void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2);
-void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2, const unsigned long * __restrict p3);
-void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2, const unsigned long * __restrict p3,
- const unsigned long * __restrict p4);
-void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2, const unsigned long * __restrict p3,
- const unsigned long * __restrict p4, const unsigned long * __restrict p5);
-#endif /* CONFIG_CPU_HAS_LSX */
-
-#ifdef CONFIG_CPU_HAS_LASX
-void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2);
-void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2, const unsigned long * __restrict p3);
-void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2, const unsigned long * __restrict p3,
- const unsigned long * __restrict p4);
-void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2, const unsigned long * __restrict p3,
- const unsigned long * __restrict p4, const unsigned long * __restrict p5);
-#endif /* CONFIG_CPU_HAS_LASX */
-
-#endif /* _ASM_LOONGARCH_XOR_SIMD_H */
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index ccea3bbd4353..827a88529a42 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -8,6 +8,4 @@ lib-y += delay.o memset.o memcpy.o memmove.o \
obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o
-obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
-
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/loongarch/lib/xor_simd.c b/arch/loongarch/lib/xor_simd.c
deleted file mode 100644
index 84cd24b728c4..000000000000
--- a/arch/loongarch/lib/xor_simd.c
+++ /dev/null
@@ -1,93 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * LoongArch SIMD XOR operations
- *
- * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
- */
-
-#include "xor_simd.h"
-
-/*
- * Process one cache line (64 bytes) per loop. This is assuming all future
- * popular LoongArch cores are similar performance-characteristics-wise to the
- * current models.
- */
-#define LINE_WIDTH 64
-
-#ifdef CONFIG_CPU_HAS_LSX
-
-#define LD(reg, base, offset) \
- "vld $vr" #reg ", %[" #base "], " #offset "\n\t"
-#define ST(reg, base, offset) \
- "vst $vr" #reg ", %[" #base "], " #offset "\n\t"
-#define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
-
-#define LD_INOUT_LINE(base) \
- LD(0, base, 0) \
- LD(1, base, 16) \
- LD(2, base, 32) \
- LD(3, base, 48)
-
-#define LD_AND_XOR_LINE(base) \
- LD(4, base, 0) \
- LD(5, base, 16) \
- LD(6, base, 32) \
- LD(7, base, 48) \
- XOR(0, 4) \
- XOR(1, 5) \
- XOR(2, 6) \
- XOR(3, 7)
-
-#define ST_LINE(base) \
- ST(0, base, 0) \
- ST(1, base, 16) \
- ST(2, base, 32) \
- ST(3, base, 48)
-
-#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
-#include "xor_template.c"
-
-#undef LD
-#undef ST
-#undef XOR
-#undef LD_INOUT_LINE
-#undef LD_AND_XOR_LINE
-#undef ST_LINE
-#undef XOR_FUNC_NAME
-
-#endif /* CONFIG_CPU_HAS_LSX */
-
-#ifdef CONFIG_CPU_HAS_LASX
-
-#define LD(reg, base, offset) \
- "xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
-#define ST(reg, base, offset) \
- "xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
-#define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
-
-#define LD_INOUT_LINE(base) \
- LD(0, base, 0) \
- LD(1, base, 32)
-
-#define LD_AND_XOR_LINE(base) \
- LD(2, base, 0) \
- LD(3, base, 32) \
- XOR(0, 2) \
- XOR(1, 3)
-
-#define ST_LINE(base) \
- ST(0, base, 0) \
- ST(1, base, 32)
-
-#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
-#include "xor_template.c"
-
-#undef LD
-#undef ST
-#undef XOR
-#undef LD_INOUT_LINE
-#undef LD_AND_XOR_LINE
-#undef ST_LINE
-#undef XOR_FUNC_NAME
-
-#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/arch/loongarch/lib/xor_simd.h b/arch/loongarch/lib/xor_simd.h
deleted file mode 100644
index f50f32514d80..000000000000
--- a/arch/loongarch/lib/xor_simd.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Simple interface to link xor_simd.c and xor_simd_glue.c
- *
- * Separating these files ensures that no SIMD instructions are run outside of
- * the kfpu critical section.
- */
-
-#ifndef __LOONGARCH_LIB_XOR_SIMD_H
-#define __LOONGARCH_LIB_XOR_SIMD_H
-
-#ifdef CONFIG_CPU_HAS_LSX
-void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2);
-void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2, const unsigned long * __restrict p3);
-void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2, const unsigned long * __restrict p3,
- const unsigned long * __restrict p4);
-void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2, const unsigned long * __restrict p3,
- const unsigned long * __restrict p4, const unsigned long * __restrict p5);
-#endif /* CONFIG_CPU_HAS_LSX */
-
-#ifdef CONFIG_CPU_HAS_LASX
-void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2);
-void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2, const unsigned long * __restrict p3);
-void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2, const unsigned long * __restrict p3,
- const unsigned long * __restrict p4);
-void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2, const unsigned long * __restrict p3,
- const unsigned long * __restrict p4, const unsigned long * __restrict p5);
-#endif /* CONFIG_CPU_HAS_LASX */
-
-#endif /* __LOONGARCH_LIB_XOR_SIMD_H */
diff --git a/arch/loongarch/lib/xor_simd_glue.c b/arch/loongarch/lib/xor_simd_glue.c
deleted file mode 100644
index 393f689dbcf6..000000000000
--- a/arch/loongarch/lib/xor_simd_glue.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * LoongArch SIMD XOR operations
- *
- * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
- */
-
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <asm/fpu.h>
-#include <asm/xor_simd.h>
-#include "xor_simd.h"
-
-#define MAKE_XOR_GLUE_2(flavor) \
-void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1, \
- const unsigned long * __restrict p2) \
-{ \
- kernel_fpu_begin(); \
- __xor_##flavor##_2(bytes, p1, p2); \
- kernel_fpu_end(); \
-} \
-EXPORT_SYMBOL_GPL(xor_##flavor##_2)
-
-#define MAKE_XOR_GLUE_3(flavor) \
-void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1, \
- const unsigned long * __restrict p2, \
- const unsigned long * __restrict p3) \
-{ \
- kernel_fpu_begin(); \
- __xor_##flavor##_3(bytes, p1, p2, p3); \
- kernel_fpu_end(); \
-} \
-EXPORT_SYMBOL_GPL(xor_##flavor##_3)
-
-#define MAKE_XOR_GLUE_4(flavor) \
-void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1, \
- const unsigned long * __restrict p2, \
- const unsigned long * __restrict p3, \
- const unsigned long * __restrict p4) \
-{ \
- kernel_fpu_begin(); \
- __xor_##flavor##_4(bytes, p1, p2, p3, p4); \
- kernel_fpu_end(); \
-} \
-EXPORT_SYMBOL_GPL(xor_##flavor##_4)
-
-#define MAKE_XOR_GLUE_5(flavor) \
-void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1, \
- const unsigned long * __restrict p2, \
- const unsigned long * __restrict p3, \
- const unsigned long * __restrict p4, \
- const unsigned long * __restrict p5) \
-{ \
- kernel_fpu_begin(); \
- __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \
- kernel_fpu_end(); \
-} \
-EXPORT_SYMBOL_GPL(xor_##flavor##_5)
-
-#define MAKE_XOR_GLUES(flavor) \
- MAKE_XOR_GLUE_2(flavor); \
- MAKE_XOR_GLUE_3(flavor); \
- MAKE_XOR_GLUE_4(flavor); \
- MAKE_XOR_GLUE_5(flavor)
-
-#ifdef CONFIG_CPU_HAS_LSX
-MAKE_XOR_GLUES(lsx);
-#endif
-
-#ifdef CONFIG_CPU_HAS_LASX
-MAKE_XOR_GLUES(lasx);
-#endif
diff --git a/arch/loongarch/lib/xor_template.c b/arch/loongarch/lib/xor_template.c
deleted file mode 100644
index 0358ced7fe33..000000000000
--- a/arch/loongarch/lib/xor_template.c
+++ /dev/null
@@ -1,110 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
- *
- * Template for XOR operations, instantiated in xor_simd.c.
- *
- * Expected preprocessor definitions:
- *
- * - LINE_WIDTH
- * - XOR_FUNC_NAME(nr)
- * - LD_INOUT_LINE(buf)
- * - LD_AND_XOR_LINE(buf)
- * - ST_LINE(buf)
- */
-
-void XOR_FUNC_NAME(2)(unsigned long bytes,
- unsigned long * __restrict v1,
- const unsigned long * __restrict v2)
-{
- unsigned long lines = bytes / LINE_WIDTH;
-
- do {
- __asm__ __volatile__ (
- LD_INOUT_LINE(v1)
- LD_AND_XOR_LINE(v2)
- ST_LINE(v1)
- : : [v1] "r"(v1), [v2] "r"(v2) : "memory"
- );
-
- v1 += LINE_WIDTH / sizeof(unsigned long);
- v2 += LINE_WIDTH / sizeof(unsigned long);
- } while (--lines > 0);
-}
-
-void XOR_FUNC_NAME(3)(unsigned long bytes,
- unsigned long * __restrict v1,
- const unsigned long * __restrict v2,
- const unsigned long * __restrict v3)
-{
- unsigned long lines = bytes / LINE_WIDTH;
-
- do {
- __asm__ __volatile__ (
- LD_INOUT_LINE(v1)
- LD_AND_XOR_LINE(v2)
- LD_AND_XOR_LINE(v3)
- ST_LINE(v1)
- : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
- );
-
- v1 += LINE_WIDTH / sizeof(unsigned long);
- v2 += LINE_WIDTH / sizeof(unsigned long);
- v3 += LINE_WIDTH / sizeof(unsigned long);
- } while (--lines > 0);
-}
-
-void XOR_FUNC_NAME(4)(unsigned long bytes,
- unsigned long * __restrict v1,
- const unsigned long * __restrict v2,
- const unsigned long * __restrict v3,
- const unsigned long * __restrict v4)
-{
- unsigned long lines = bytes / LINE_WIDTH;
-
- do {
- __asm__ __volatile__ (
- LD_INOUT_LINE(v1)
- LD_AND_XOR_LINE(v2)
- LD_AND_XOR_LINE(v3)
- LD_AND_XOR_LINE(v4)
- ST_LINE(v1)
- : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
- : "memory"
- );
-
- v1 += LINE_WIDTH / sizeof(unsigned long);
- v2 += LINE_WIDTH / sizeof(unsigned long);
- v3 += LINE_WIDTH / sizeof(unsigned long);
- v4 += LINE_WIDTH / sizeof(unsigned long);
- } while (--lines > 0);
-}
-
-void XOR_FUNC_NAME(5)(unsigned long bytes,
- unsigned long * __restrict v1,
- const unsigned long * __restrict v2,
- const unsigned long * __restrict v3,
- const unsigned long * __restrict v4,
- const unsigned long * __restrict v5)
-{
- unsigned long lines = bytes / LINE_WIDTH;
-
- do {
- __asm__ __volatile__ (
- LD_INOUT_LINE(v1)
- LD_AND_XOR_LINE(v2)
- LD_AND_XOR_LINE(v3)
- LD_AND_XOR_LINE(v4)
- LD_AND_XOR_LINE(v5)
- ST_LINE(v1)
- : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
- [v5] "r"(v5) : "memory"
- );
-
- v1 += LINE_WIDTH / sizeof(unsigned long);
- v2 += LINE_WIDTH / sizeof(unsigned long);
- v3 += LINE_WIDTH / sizeof(unsigned long);
- v4 += LINE_WIDTH / sizeof(unsigned long);
- v5 += LINE_WIDTH / sizeof(unsigned long);
- } while (--lines > 0);
-}