crypto: powerpc - Add POWER8 optimised crc32c

Use the vector polynomial multiply-sum instructions in POWER8 to speed up crc32c. This is just over 41x faster than the slice-by-8 method that it replaces. Measurements on a 4.1 GHz POWER8 show it sustaining 52 GiB/sec. A simple btrfs write performance test: dd if=/dev/zero of=/mnt/tmpfile bs=1M count=4096 sync is over 3.7x faster. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author: Anton Blanchard <anton@samba.org> 2016-07-01 08:19:45 +1000
committer: Herbert Xu <herbert@gondor.apana.org.au> 2016-07-05 23:05:19 +0800
commit: 6dd7a82cc54ebd2936763befd3dcd4beb727a704 (patch)
tree: 4077ad30a801a6e7dcc0b915612e2c815239fd9b /arch/powerpc/include/asm/ppc-opcode.h
parent: 151f25112ff7befc134ed3fc58b0ff8792b3169e (diff)
download: lwn-6dd7a82cc54ebd2936763befd3dcd4beb727a704.tar.gz
lwn-6dd7a82cc54ebd2936763befd3dcd4beb727a704.zip
1 files changed, 12 insertions, 0 deletions
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1d035c1cc889..49cd8760aa7c 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -174,6 +174,8 @@
 #define PPC_INST_MFSPR_DSCR_USER_MASK	0xfc1fffff
 #define PPC_INST_MTSPR_DSCR_USER	0x7c0303a6
 #define PPC_INST_MTSPR_DSCR_USER_MASK	0xfc1fffff
+#define PPC_INST_MFVSRD			0x7c000066
+#define PPC_INST_MTVSRD			0x7c000166
 #define PPC_INST_SLBFEE			0x7c0007a7
 
 #define PPC_INST_STRING			0x7c00042a
@@ -188,6 +190,8 @@
 #define PPC_INST_WAIT			0x7c00007c
 #define PPC_INST_TLBIVAX		0x7c000624
 #define PPC_INST_TLBSRX_DOT		0x7c0006a5
+#define PPC_INST_VPMSUMW		0x10000488
+#define PPC_INST_VPMSUMD		0x100004c8
 #define PPC_INST_XXLOR			0xf0000510
 #define PPC_INST_XXSWAPD		0xf0000250
 #define PPC_INST_XVCPSGNDP		0xf0000780
@@ -359,6 +363,14 @@
 					       VSX_XX1((s), a, b))
 #define LXVD2X(s, a, b)		stringify_in_c(.long PPC_INST_LXVD2X | \
 					       VSX_XX1((s), a, b))
+#define MFVRD(a, t)		stringify_in_c(.long PPC_INST_MFVSRD | \
+					       VSX_XX1((t)+32, a, R0))
+#define MTVRD(t, a)		stringify_in_c(.long PPC_INST_MTVSRD | \
+					       VSX_XX1((t)+32, a, R0))
+#define VPMSUMW(t, a, b)	stringify_in_c(.long PPC_INST_VPMSUMW | \
+					       VSX_XX3((t), a, b))
+#define VPMSUMD(t, a, b)	stringify_in_c(.long PPC_INST_VPMSUMD | \
+					       VSX_XX3((t), a, b))
 #define XXLOR(t, a, b)		stringify_in_c(.long PPC_INST_XXLOR | \
 					       VSX_XX3((t), a, b))
 #define XXSWAPD(t, a)		stringify_in_c(.long PPC_INST_XXSWAPD | \
author	Anton Blanchard <anton@samba.org>	2016-07-01 08:19:45 +1000
committer	Herbert Xu <herbert@gondor.apana.org.au>	2016-07-05 23:05:19 +0800
commit	6dd7a82cc54ebd2936763befd3dcd4beb727a704 (patch)
tree	4077ad30a801a6e7dcc0b915612e2c815239fd9b /arch/powerpc/include/asm/ppc-opcode.h
parent	151f25112ff7befc134ed3fc58b0ff8792b3169e (diff)
download	lwn-6dd7a82cc54ebd2936763befd3dcd4beb727a704.tar.gz lwn-6dd7a82cc54ebd2936763befd3dcd4beb727a704.zip