summaryrefslogtreecommitdiff
path: root/lib/raid6/x86.h
diff options
context:
space:
mode:
authorGayatri Kammela <gayatri.kammela@intel.com>2016-08-12 18:03:19 -0700
committerShaohua Li <shli@fb.com>2016-09-21 09:09:44 -0700
commite0a491c1296874a1aca51cc68452f12a4d950029 (patch)
treed69949fe4db93fc1cc886fa359b8a7c58f4f3021 /lib/raid6/x86.h
parentd6385db94196b253ae5eb3678fa95cdf1f839fcc (diff)
downloadlwn-e0a491c1296874a1aca51cc68452f12a4d950029.tar.gz
lwn-e0a491c1296874a1aca51cc68452f12a4d950029.zip
lib/raid6: Add AVX512 optimized gen_syndrome functions
Optimize RAID6 gen_syndrom functions to take advantage of the 512-bit ZMM integer instructions introduced in AVX512. AVX512 optimized gen_syndrom functions, which is simply based on avx2.c written by Yuanhan Liu and sse2.c written by hpa. The patch was tested and benchmarked before submission on a hardware that has AVX512 flags to support such instructions Cc: H. Peter Anvin <hpa@zytor.com> Cc: Jim Kukunas <james.t.kukunas@linux.intel.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Megha Dey <megha.dey@linux.intel.com> Signed-off-by: Gayatri Kammela <gayatri.kammela@intel.com> Reviewed-by: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'lib/raid6/x86.h')
-rw-r--r--lib/raid6/x86.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index 8fe9d9662abb..834d268a4b05 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -46,6 +46,16 @@ static inline void kernel_fpu_end(void)
#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ (9*32+17) /* AVX-512 DQ (Double/Quad granular)
+ * Instructions
+ */
+#define X86_FEATURE_AVX512BW (9*32+30) /* AVX-512 BW (Byte/Word granular)
+ * Instructions
+ */
+#define X86_FEATURE_AVX512VL (9*32+31) /* AVX-512 VL (128/256 Vector Length)
+ * Extensions
+ */
#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
/* Should work well enough on modern CPUs for testing */