summaryrefslogtreecommitdiff
path: root/arch/loongarch/lib/xor_simd.c
blob: 84cd24b728c47968344ef2b771895e0e321262dd (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * LoongArch SIMD XOR operations
 *
 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
 */

#include "xor_simd.h"

/*
 * Process one cache line (64 bytes) per loop. This is assuming all future
 * popular LoongArch cores are similar performance-characteristics-wise to the
 * current models.
 */
#define LINE_WIDTH 64

#ifdef CONFIG_CPU_HAS_LSX

#define LD(reg, base, offset)	\
	"vld $vr" #reg ", %[" #base "], " #offset "\n\t"
#define ST(reg, base, offset)	\
	"vst $vr" #reg ", %[" #base "], " #offset "\n\t"
#define XOR(dj, k)	"vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"

#define LD_INOUT_LINE(base)	\
	LD(0, base, 0)		\
	LD(1, base, 16)		\
	LD(2, base, 32)		\
	LD(3, base, 48)

#define LD_AND_XOR_LINE(base)	\
	LD(4, base, 0)		\
	LD(5, base, 16)		\
	LD(6, base, 32)		\
	LD(7, base, 48)		\
	XOR(0, 4)		\
	XOR(1, 5)		\
	XOR(2, 6)		\
	XOR(3, 7)

#define ST_LINE(base)		\
	ST(0, base, 0)		\
	ST(1, base, 16)		\
	ST(2, base, 32)		\
	ST(3, base, 48)

#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
#include "xor_template.c"

#undef LD
#undef ST
#undef XOR
#undef LD_INOUT_LINE
#undef LD_AND_XOR_LINE
#undef ST_LINE
#undef XOR_FUNC_NAME

#endif /* CONFIG_CPU_HAS_LSX */

#ifdef CONFIG_CPU_HAS_LASX

#define LD(reg, base, offset)	\
	"xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
#define ST(reg, base, offset)	\
	"xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
#define XOR(dj, k)	"xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"

#define LD_INOUT_LINE(base)	\
	LD(0, base, 0)		\
	LD(1, base, 32)

#define LD_AND_XOR_LINE(base)	\
	LD(2, base, 0)		\
	LD(3, base, 32)		\
	XOR(0, 2)		\
	XOR(1, 3)

#define ST_LINE(base)		\
	ST(0, base, 0)		\
	ST(1, base, 32)

#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
#include "xor_template.c"

#undef LD
#undef ST
#undef XOR
#undef LD_INOUT_LINE
#undef LD_AND_XOR_LINE
#undef ST_LINE
#undef XOR_FUNC_NAME

#endif /* CONFIG_CPU_HAS_LASX */