Merge tag 'v6.2-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu: "API: - Optimise away self-test overhead when they are disabled - Support symmetric encryption via keyring keys in af_alg - Flip hwrng default_quality, the default is now maximum entropy Algorithms: - Add library version of aesgcm - CFI fixes for assembly code - Add arm/arm64 accelerated versions of sm3/sm4 Drivers: - Remove assumption on arm64 that kmalloc is DMA-aligned - Fix selftest failures in rockchip - Add support for RK3328/RK3399 in rockchip - Add deflate support in qat - Merge ux500 into stm32 - Add support for TEE for PCI ID 0x14CA in ccp - Add mt7986 support in mtk - Add MaxLinear platform support in inside-secure - Add NPCM8XX support in npcm" * tag 'v6.2-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (184 commits) crypto: ux500/cryp - delete driver crypto: stm32/cryp - enable for use with Ux500 crypto: stm32 - enable drivers to be used on Ux500 dt-bindings: crypto: Let STM32 define Ux500 CRYP hwrng: geode - Fix PCI device refcount leak hwrng: amd - Fix PCI device refcount leak crypto: qce - Set DMA alignment explicitly crypto: octeontx2 - Set DMA alignment explicitly crypto: octeontx - Set DMA alignment explicitly crypto: keembay - Set DMA alignment explicitly crypto: safexcel - Set DMA alignment explicitly crypto: hisilicon/hpre - Set DMA alignment explicitly crypto: chelsio - Set DMA alignment explicitly crypto: ccree - Set DMA alignment explicitly crypto: ccp - Set DMA alignment explicitly crypto: cavium - Set DMA alignment explicitly crypto: img-hash - Fix variable dereferenced before check 'hdev->req' crypto: arm64/ghash-ce - use frame_push/pop macros consistently crypto: arm64/crct10dif - use frame_push/pop macros consistently crypto: arm64/aes-modes - use frame_push/pop macros consistently ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2022-12-14 12:31:09 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2022-12-14 12:31:09 -0800
commit: 64e7003c6b85626a533a67c1ba938b75a3db24e6 (patch)
tree: 5e3e776d23a9520f51251b4838d4aa66d920dbff /lib
parent: 48ea09cddae0b794cde2070f106ef676703dbcd3 (diff)
parent: 453de3eb08c4b7e31b3019a4b0cc3ebce51a6219 (diff)
download: lwn-64e7003c6b85626a533a67c1ba938b75a3db24e6.tar.gz
lwn-64e7003c6b85626a533a67c1ba938b75a3db24e6.zip
4 files changed, 1177 insertions, 0 deletions
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 7e9683e9f5c6..45436bfc6dff 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -8,9 +8,18 @@ config CRYPTO_LIB_UTILS
 config CRYPTO_LIB_AES
 	tristate
 
+config CRYPTO_LIB_AESGCM
+	tristate
+	select CRYPTO_LIB_AES
+	select CRYPTO_LIB_GF128MUL
+	select CRYPTO_LIB_UTILS
+
 config CRYPTO_LIB_ARC4
 	tristate
 
+config CRYPTO_LIB_GF128MUL
+	tristate
+
 config CRYPTO_ARCH_HAVE_LIB_BLAKE2S
 	bool
 	help
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index c852f067ab06..6ec2d4543d9c 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -10,9 +10,14 @@ obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC)		+= libchacha.o
 obj-$(CONFIG_CRYPTO_LIB_AES)			+= libaes.o
 libaes-y					:= aes.o
 
+obj-$(CONFIG_CRYPTO_LIB_AESGCM)			+= libaesgcm.o
+libaesgcm-y					:= aesgcm.o
+
 obj-$(CONFIG_CRYPTO_LIB_ARC4)			+= libarc4.o
 libarc4-y					:= arc4.o
 
+obj-$(CONFIG_CRYPTO_LIB_GF128MUL)		+= gf128mul.o
+
 # blake2s is used by the /dev/random driver which is always builtin
 obj-y						+= libblake2s.o
 libblake2s-y					:= blake2s.o
diff --git a/lib/crypto/aesgcm.c b/lib/crypto/aesgcm.c
new file mode 100644
index 000000000000..c632d6e17af8
--- /dev/null
+++ b/lib/crypto/aesgcm.c
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Minimal library implementation of GCM
+ *
+ * Copyright 2022 Google LLC
+ */
+
+#include <linux/module.h>
+
+#include <crypto/algapi.h>
+#include <crypto/gcm.h>
+#include <crypto/ghash.h>
+
+#include <asm/irqflags.h>
+
+static void aesgcm_encrypt_block(const struct crypto_aes_ctx *ctx, void *dst,
+				 const void *src)
+{
+	unsigned long flags;
+
+	/*
+	 * In AES-GCM, both the GHASH key derivation and the CTR mode
+	 * encryption operate on known plaintext, making them susceptible to
+	 * timing attacks on the encryption key. The AES library already
+	 * mitigates this risk to some extent by pulling the entire S-box into
+	 * the caches before doing any substitutions, but this strategy is more
+	 * effective when running with interrupts disabled.
+	 */
+	local_irq_save(flags);
+	aes_encrypt(ctx, dst, src);
+	local_irq_restore(flags);
+}
+
+/**
+ * aesgcm_expandkey - Expands the AES and GHASH keys for the AES-GCM key
+ *		      schedule
+ *
+ * @ctx:	The data structure that will hold the AES-GCM key schedule
+ * @key:	The AES encryption input key
+ * @keysize:	The length in bytes of the input key
+ * @authsize:	The size in bytes of the GCM authentication tag
+ *
+ * Returns: 0 on success, or -EINVAL if @keysize or @authsize contain values
+ * that are not permitted by the GCM specification.
+ */
+int aesgcm_expandkey(struct aesgcm_ctx *ctx, const u8 *key,
+		     unsigned int keysize, unsigned int authsize)
+{
+	u8 kin[AES_BLOCK_SIZE] = {};
+	int ret;
+
+	ret = crypto_gcm_check_authsize(authsize) ?:
+	      aes_expandkey(&ctx->aes_ctx, key, keysize);
+	if (ret)
+		return ret;
+
+	ctx->authsize = authsize;
+	aesgcm_encrypt_block(&ctx->aes_ctx, &ctx->ghash_key, kin);
+
+	return 0;
+}
+EXPORT_SYMBOL(aesgcm_expandkey);
+
+static void aesgcm_ghash(be128 *ghash, const be128 *key, const void *src,
+			 int len)
+{
+	while (len > 0) {
+		crypto_xor((u8 *)ghash, src, min(len, GHASH_BLOCK_SIZE));
+		gf128mul_lle(ghash, key);
+
+		src += GHASH_BLOCK_SIZE;
+		len -= GHASH_BLOCK_SIZE;
+	}
+}
+
+static void aesgcm_mac(const struct aesgcm_ctx *ctx, const u8 *src, int src_len,
+		       const u8 *assoc, int assoc_len, __be32 *ctr, u8 *authtag)
+{
+	be128 tail = { cpu_to_be64(assoc_len * 8), cpu_to_be64(src_len * 8) };
+	u8 buf[AES_BLOCK_SIZE];
+	be128 ghash = {};
+
+	aesgcm_ghash(&ghash, &ctx->ghash_key, assoc, assoc_len);
+	aesgcm_ghash(&ghash, &ctx->ghash_key, src, src_len);
+	aesgcm_ghash(&ghash, &ctx->ghash_key, &tail, sizeof(tail));
+
+	ctr[3] = cpu_to_be32(1);
+	aesgcm_encrypt_block(&ctx->aes_ctx, buf, ctr);
+	crypto_xor_cpy(authtag, buf, (u8 *)&ghash, ctx->authsize);
+
+	memzero_explicit(&ghash, sizeof(ghash));
+	memzero_explicit(buf, sizeof(buf));
+}
+
+static void aesgcm_crypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src,
+			 int len, __be32 *ctr)
+{
+	u8 buf[AES_BLOCK_SIZE];
+	unsigned int n = 2;
+
+	while (len > 0) {
+		/*
+		 * The counter increment below must not result in overflow or
+		 * carry into the next 32-bit word, as this could result in
+		 * inadvertent IV reuse, which must be avoided at all cost for
+		 * stream ciphers such as AES-CTR. Given the range of 'int
+		 * len', this cannot happen, so no explicit test is necessary.
+		 */
+		ctr[3] = cpu_to_be32(n++);
+		aesgcm_encrypt_block(&ctx->aes_ctx, buf, ctr);
+		crypto_xor_cpy(dst, src, buf, min(len, AES_BLOCK_SIZE));
+
+		dst += AES_BLOCK_SIZE;
+		src += AES_BLOCK_SIZE;
+		len -= AES_BLOCK_SIZE;
+	}
+	memzero_explicit(buf, sizeof(buf));
+}
+
+/**
+ * aesgcm_encrypt - Perform AES-GCM encryption on a block of data
+ *
+ * @ctx:	The AES-GCM key schedule
+ * @dst:	Pointer to the ciphertext output buffer
+ * @src:	Pointer the plaintext (may equal @dst for encryption in place)
+ * @crypt_len:	The size in bytes of the plaintext and ciphertext.
+ * @assoc:	Pointer to the associated data,
+ * @assoc_len:	The size in bytes of the associated data
+ * @iv:		The initialization vector (IV) to use for this block of data
+ *		(must be 12 bytes in size as per the GCM spec recommendation)
+ * @authtag:	The address of the buffer in memory where the authentication
+ *		tag should be stored. The buffer is assumed to have space for
+ *		@ctx->authsize bytes.
+ */
+void aesgcm_encrypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src,
+		    int crypt_len, const u8 *assoc, int assoc_len,
+		    const u8 iv[GCM_AES_IV_SIZE], u8 *authtag)
+{
+	__be32 ctr[4];
+
+	memcpy(ctr, iv, GCM_AES_IV_SIZE);
+
+	aesgcm_crypt(ctx, dst, src, crypt_len, ctr);
+	aesgcm_mac(ctx, dst, crypt_len, assoc, assoc_len, ctr, authtag);
+}
+EXPORT_SYMBOL(aesgcm_encrypt);
+
+/**
+ * aesgcm_decrypt - Perform AES-GCM decryption on a block of data
+ *
+ * @ctx:	The AES-GCM key schedule
+ * @dst:	Pointer to the plaintext output buffer
+ * @src:	Pointer the ciphertext (may equal @dst for decryption in place)
+ * @crypt_len:	The size in bytes of the plaintext and ciphertext.
+ * @assoc:	Pointer to the associated data,
+ * @assoc_len:	The size in bytes of the associated data
+ * @iv:		The initialization vector (IV) to use for this block of data
+ *		(must be 12 bytes in size as per the GCM spec recommendation)
+ * @authtag:	The address of the buffer in memory where the authentication
+ *		tag is stored.
+ *
+ * Returns: true on success, or false if the ciphertext failed authentication.
+ * On failure, no plaintext will be returned.
+ */
+bool __must_check aesgcm_decrypt(const struct aesgcm_ctx *ctx, u8 *dst,
+				 const u8 *src, int crypt_len, const u8 *assoc,
+				 int assoc_len, const u8 iv[GCM_AES_IV_SIZE],
+				 const u8 *authtag)
+{
+	u8 tagbuf[AES_BLOCK_SIZE];
+	__be32 ctr[4];
+
+	memcpy(ctr, iv, GCM_AES_IV_SIZE);
+
+	aesgcm_mac(ctx, src, crypt_len, assoc, assoc_len, ctr, tagbuf);
+	if (crypto_memneq(authtag, tagbuf, ctx->authsize)) {
+		memzero_explicit(tagbuf, sizeof(tagbuf));
+		return false;
+	}
+	aesgcm_crypt(ctx, dst, src, crypt_len, ctr);
+	return true;
+}
+EXPORT_SYMBOL(aesgcm_decrypt);
+
+MODULE_DESCRIPTION("Generic AES-GCM library");
+MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>");
+MODULE_LICENSE("GPL");
+
+#ifndef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
+
+/*
+ * Test code below. Vectors taken from crypto/testmgr.h
+ */
+
+static const u8 __initconst ctext0[16] =
+	"\x58\xe2\xfc\xce\xfa\x7e\x30\x61"
+	"\x36\x7f\x1d\x57\xa4\xe7\x45\x5a";
+
+static const u8 __initconst ptext1[16];
+
+static const u8 __initconst ctext1[32] =
+	"\x03\x88\xda\xce\x60\xb6\xa3\x92"
+	"\xf3\x28\xc2\xb9\x71\xb2\xfe\x78"
+	"\xab\x6e\x47\xd4\x2c\xec\x13\xbd"
+	"\xf5\x3a\x67\xb2\x12\x57\xbd\xdf";
+
+static const u8 __initconst ptext2[64] =
+	"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+	"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+	"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+	"\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+	"\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+	"\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+	"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+	"\xba\x63\x7b\x39\x1a\xaf\xd2\x55";
+
+static const u8 __initconst ctext2[80] =
+	"\x42\x83\x1e\xc2\x21\x77\x74\x24"
+	"\x4b\x72\x21\xb7\x84\xd0\xd4\x9c"
+	"\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0"
+	"\x35\xc1\x7e\x23\x29\xac\xa1\x2e"
+	"\x21\xd5\x14\xb2\x54\x66\x93\x1c"
+	"\x7d\x8f\x6a\x5a\xac\x84\xaa\x05"
+	"\x1b\xa3\x0b\x39\x6a\x0a\xac\x97"
+	"\x3d\x58\xe0\x91\x47\x3f\x59\x85"
+	"\x4d\x5c\x2a\xf3\x27\xcd\x64\xa6"
+	"\x2c\xf3\x5a\xbd\x2b\xa6\xfa\xb4";
+
+static const u8 __initconst ptext3[60] =
+	"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+	"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+	"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+	"\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+	"\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+	"\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+	"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+	"\xba\x63\x7b\x39";
+
+static const u8 __initconst ctext3[76] =
+	"\x42\x83\x1e\xc2\x21\x77\x74\x24"
+	"\x4b\x72\x21\xb7\x84\xd0\xd4\x9c"
+	"\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0"
+	"\x35\xc1\x7e\x23\x29\xac\xa1\x2e"
+	"\x21\xd5\x14\xb2\x54\x66\x93\x1c"
+	"\x7d\x8f\x6a\x5a\xac\x84\xaa\x05"
+	"\x1b\xa3\x0b\x39\x6a\x0a\xac\x97"
+	"\x3d\x58\xe0\x91"
+	"\x5b\xc9\x4f\xbc\x32\x21\xa5\xdb"
+	"\x94\xfa\xe9\x5a\xe7\x12\x1a\x47";
+
+static const u8 __initconst ctext4[16] =
+	"\xcd\x33\xb2\x8a\xc7\x73\xf7\x4b"
+	"\xa0\x0e\xd1\xf3\x12\x57\x24\x35";
+
+static const u8 __initconst ctext5[32] =
+	"\x98\xe7\x24\x7c\x07\xf0\xfe\x41"
+	"\x1c\x26\x7e\x43\x84\xb0\xf6\x00"
+	"\x2f\xf5\x8d\x80\x03\x39\x27\xab"
+	"\x8e\xf4\xd4\x58\x75\x14\xf0\xfb";
+
+static const u8 __initconst ptext6[64] =
+	"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+	"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+	"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+	"\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+	"\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+	"\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+	"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+	"\xba\x63\x7b\x39\x1a\xaf\xd2\x55";
+
+static const u8 __initconst ctext6[80] =
+	"\x39\x80\xca\x0b\x3c\x00\xe8\x41"
+	"\xeb\x06\xfa\xc4\x87\x2a\x27\x57"
+	"\x85\x9e\x1c\xea\xa6\xef\xd9\x84"
+	"\x62\x85\x93\xb4\x0c\xa1\xe1\x9c"
+	"\x7d\x77\x3d\x00\xc1\x44\xc5\x25"
+	"\xac\x61\x9d\x18\xc8\x4a\x3f\x47"
+	"\x18\xe2\x44\x8b\x2f\xe3\x24\xd9"
+	"\xcc\xda\x27\x10\xac\xad\xe2\x56"
+	"\x99\x24\xa7\xc8\x58\x73\x36\xbf"
+	"\xb1\x18\x02\x4d\xb8\x67\x4a\x14";
+
+static const u8 __initconst ctext7[16] =
+	"\x53\x0f\x8a\xfb\xc7\x45\x36\xb9"
+	"\xa9\x63\xb4\xf1\xc4\xcb\x73\x8b";
+
+static const u8 __initconst ctext8[32] =
+	"\xce\xa7\x40\x3d\x4d\x60\x6b\x6e"
+	"\x07\x4e\xc5\xd3\xba\xf3\x9d\x18"
+	"\xd0\xd1\xc8\xa7\x99\x99\x6b\xf0"
+	"\x26\x5b\x98\xb5\xd4\x8a\xb9\x19";
+
+static const u8 __initconst ptext9[64] =
+	"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+	"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+	"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+	"\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+	"\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+	"\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+	"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+	"\xba\x63\x7b\x39\x1a\xaf\xd2\x55";
+
+static const u8 __initconst ctext9[80] =
+	"\x52\x2d\xc1\xf0\x99\x56\x7d\x07"
+	"\xf4\x7f\x37\xa3\x2a\x84\x42\x7d"
+	"\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9"
+	"\x75\x98\xa2\xbd\x25\x55\xd1\xaa"
+	"\x8c\xb0\x8e\x48\x59\x0d\xbb\x3d"
+	"\xa7\xb0\x8b\x10\x56\x82\x88\x38"
+	"\xc5\xf6\x1e\x63\x93\xba\x7a\x0a"
+	"\xbc\xc9\xf6\x62\x89\x80\x15\xad"
+	"\xb0\x94\xda\xc5\xd9\x34\x71\xbd"
+	"\xec\x1a\x50\x22\x70\xe3\xcc\x6c";
+
+static const u8 __initconst ptext10[60] =
+	"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+	"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+	"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+	"\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+	"\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+	"\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+	"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+	"\xba\x63\x7b\x39";
+
+static const u8 __initconst ctext10[76] =
+	"\x52\x2d\xc1\xf0\x99\x56\x7d\x07"
+	"\xf4\x7f\x37\xa3\x2a\x84\x42\x7d"
+	"\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9"
+	"\x75\x98\xa2\xbd\x25\x55\xd1\xaa"
+	"\x8c\xb0\x8e\x48\x59\x0d\xbb\x3d"
+	"\xa7\xb0\x8b\x10\x56\x82\x88\x38"
+	"\xc5\xf6\x1e\x63\x93\xba\x7a\x0a"
+	"\xbc\xc9\xf6\x62"
+	"\x76\xfc\x6e\xce\x0f\x4e\x17\x68"
+	"\xcd\xdf\x88\x53\xbb\x2d\x55\x1b";
+
+static const u8 __initconst ptext11[60] =
+	"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+	"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+	"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+	"\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+	"\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+	"\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+	"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+	"\xba\x63\x7b\x39";
+
+static const u8 __initconst ctext11[76] =
+	"\x39\x80\xca\x0b\x3c\x00\xe8\x41"
+	"\xeb\x06\xfa\xc4\x87\x2a\x27\x57"
+	"\x85\x9e\x1c\xea\xa6\xef\xd9\x84"
+	"\x62\x85\x93\xb4\x0c\xa1\xe1\x9c"
+	"\x7d\x77\x3d\x00\xc1\x44\xc5\x25"
+	"\xac\x61\x9d\x18\xc8\x4a\x3f\x47"
+	"\x18\xe2\x44\x8b\x2f\xe3\x24\xd9"
+	"\xcc\xda\x27\x10"
+	"\x25\x19\x49\x8e\x80\xf1\x47\x8f"
+	"\x37\xba\x55\xbd\x6d\x27\x61\x8c";
+
+static const u8 __initconst ptext12[719] =
+	"\x42\xc1\xcc\x08\x48\x6f\x41\x3f"
+	"\x2f\x11\x66\x8b\x2a\x16\xf0\xe0"
+	"\x58\x83\xf0\xc3\x70\x14\xc0\x5b"
+	"\x3f\xec\x1d\x25\x3c\x51\xd2\x03"
+	"\xcf\x59\x74\x1f\xb2\x85\xb4\x07"
+	"\xc6\x6a\x63\x39\x8a\x5b\xde\xcb"
+	"\xaf\x08\x44\xbd\x6f\x91\x15\xe1"
+	"\xf5\x7a\x6e\x18\xbd\xdd\x61\x50"
+	"\x59\xa9\x97\xab\xbb\x0e\x74\x5c"
+	"\x00\xa4\x43\x54\x04\x54\x9b\x3b"
+	"\x77\xec\xfd\x5c\xa6\xe8\x7b\x08"
+	"\xae\xe6\x10\x3f\x32\x65\xd1\xfc"
+	"\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3"
+	"\x35\x23\xf4\x20\x41\xd4\xad\x82"
+	"\x8b\xa4\xad\x96\x1c\x20\x53\xbe"
+	"\x0e\xa6\xf4\xdc\x78\x49\x3e\x72"
+	"\xb1\xa9\xb5\x83\xcb\x08\x54\xb7"
+	"\xad\x49\x3a\xae\x98\xce\xa6\x66"
+	"\x10\x30\x90\x8c\x55\x83\xd7\x7c"
+	"\x8b\xe6\x53\xde\xd2\x6e\x18\x21"
+	"\x01\x52\xd1\x9f\x9d\xbb\x9c\x73"
+	"\x57\xcc\x89\x09\x75\x9b\x78\x70"
+	"\xed\x26\x97\x4d\xb4\xe4\x0c\xa5"
+	"\xfa\x70\x04\x70\xc6\x96\x1c\x7d"
+	"\x54\x41\x77\xa8\xe3\xb0\x7e\x96"
+	"\x82\xd9\xec\xa2\x87\x68\x55\xf9"
+	"\x8f\x9e\x73\x43\x47\x6a\x08\x36"
+	"\x93\x67\xa8\x2d\xde\xac\x41\xa9"
+	"\x5c\x4d\x73\x97\x0f\x70\x68\xfa"
+	"\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9"
+	"\x78\x1f\x51\x07\xe3\x9a\x13\x4e"
+	"\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7"
+	"\xab\x19\x37\xd9\xba\x76\x5e\xd2"
+	"\xf2\x53\x15\x17\x4c\x6b\x16\x9f"
+	"\x02\x66\x49\xca\x7c\x91\x05\xf2"
+	"\x45\x36\x1e\xf5\x77\xad\x1f\x46"
+	"\xa8\x13\xfb\x63\xb6\x08\x99\x63"
+	"\x82\xa2\xed\xb3\xac\xdf\x43\x19"
+	"\x45\xea\x78\x73\xd9\xb7\x39\x11"
+	"\xa3\x13\x7c\xf8\x3f\xf7\xad\x81"
+	"\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79"
+	"\xa4\x47\x7d\x80\x20\x26\xfd\x63"
+	"\x0a\xc7\x7e\x6d\x75\x47\xff\x76"
+	"\x66\x2e\x8a\x6c\x81\x35\xaf\x0b"
+	"\x2e\x6a\x49\x60\xc1\x10\xe1\xe1"
+	"\x54\x03\xa4\x09\x0c\x37\x7a\x15"
+	"\x23\x27\x5b\x8b\x4b\xa5\x64\x97"
+	"\xae\x4a\x50\x73\x1f\x66\x1c\x5c"
+	"\x03\x25\x3c\x8d\x48\x58\x71\x34"
+	"\x0e\xec\x4e\x55\x1a\x03\x6a\xe5"
+	"\xb6\x19\x2b\x84\x2a\x20\xd1\xea"
+	"\x80\x6f\x96\x0e\x05\x62\xc7\x78"
+	"\x87\x79\x60\x38\x46\xb4\x25\x57"
+	"\x6e\x16\x63\xf8\xad\x6e\xd7\x42"
+	"\x69\xe1\x88\xef\x6e\xd5\xb4\x9a"
+	"\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22"
+	"\x86\x5c\x74\x3a\xeb\x24\x26\xc7"
+	"\x09\xfc\x91\x96\x47\x87\x4f\x1a"
+	"\xd6\x6b\x2c\x18\x47\xc0\xb8\x24"
+	"\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a"
+	"\x09\xe6\x4d\x9c\x6d\x86\x60\xf5"
+	"\x2f\x48\x69\x37\x9f\xf2\xd2\xcb"
+	"\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe"
+	"\x0b\x63\xde\x87\x42\x79\x8a\x68"
+	"\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f"
+	"\x9d\xd1\xc7\x45\x90\x08\xc9\x83"
+	"\xe9\x83\x84\xcb\x28\x69\x09\x69"
+	"\xce\x99\x46\x00\x54\xcb\xd8\x38"
+	"\xf9\x53\x4a\xbf\x31\xce\x57\x15"
+	"\x33\xfa\x96\x04\x33\x42\xe3\xc0"
+	"\xb7\x54\x4a\x65\x7a\x7c\x02\xe6"
+	"\x19\x95\xd0\x0e\x82\x07\x63\xf9"
+	"\xe1\x2b\x2a\xfc\x55\x92\x52\xc9"
+	"\xb5\x9f\x23\x28\x60\xe7\x20\x51"
+	"\x10\xd3\xed\x6d\x9b\xab\xb8\xe2"
+	"\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb"
+	"\x78\xc6\x91\x22\x40\x91\x80\xbe"
+	"\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9"
+	"\x67\x10\xa4\x83\x98\x79\x23\xe7"
+	"\x92\xda\xa9\x22\x16\xb1\xe7\x78"
+	"\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37"
+	"\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9"
+	"\xe6\x3d\x91\x0d\x32\x95\xaa\x3d"
+	"\x48\x11\x06\xbb\x2d\xf2\x63\x88"
+	"\x3f\x73\x09\xe2\x45\x56\x31\x51"
+	"\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9"
+	"\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66"
+	"\xf6\x90\x9a\x7f\xf2\x57\xcc\x23"
+	"\x59\xfa\xfa\xaa\x44\x04\x01\xa7"
+	"\xa4\x78\xdb\x74\x3d\x8b\xb5";
+
+static const u8 __initconst ctext12[735] =
+	"\x84\x0b\xdb\xd5\xb7\xa8\xfe\x20"
+	"\xbb\xb1\x12\x7f\x41\xea\xb3\xc0"
+	"\xa2\xb4\x37\x19\x11\x58\xb6\x0b"
+	"\x4c\x1d\x38\x05\x54\xd1\x16\x73"
+	"\x8e\x1c\x20\x90\xa2\x9a\xb7\x74"
+	"\x47\xe6\xd8\xfc\x18\x3a\xb4\xea"
+	"\xd5\x16\x5a\x2c\x53\x01\x46\xb3"
+	"\x18\x33\x74\x6c\x50\xf2\xe8\xc0"
+	"\x73\xda\x60\x22\xeb\xe3\xe5\x9b"
+	"\x20\x93\x6c\x4b\x37\x99\xb8\x23"
+	"\x3b\x4e\xac\xe8\x5b\xe8\x0f\xb7"
+	"\xc3\x8f\xfb\x4a\x37\xd9\x39\x95"
+	"\x34\xf1\xdb\x8f\x71\xd9\xc7\x0b"
+	"\x02\xf1\x63\xfc\x9b\xfc\xc5\xab"
+	"\xb9\x14\x13\x21\xdf\xce\xaa\x88"
+	"\x44\x30\x1e\xce\x26\x01\x92\xf8"
+	"\x9f\x00\x4b\x0c\x4b\xf7\x5f\xe0"
+	"\x89\xca\x94\x66\x11\x21\x97\xca"
+	"\x3e\x83\x74\x2d\xdb\x4d\x11\xeb"
+	"\x97\xc2\x14\xff\x9e\x1e\xa0\x6b"
+	"\x08\xb4\x31\x2b\x85\xc6\x85\x6c"
+	"\x90\xec\x39\xc0\xec\xb3\xb5\x4e"
+	"\xf3\x9c\xe7\x83\x3a\x77\x0a\xf4"
+	"\x56\xfe\xce\x18\x33\x6d\x0b\x2d"
+	"\x33\xda\xc8\x05\x5c\xb4\x09\x2a"
+	"\xde\x6b\x52\x98\x01\xef\x36\x3d"
+	"\xbd\xf9\x8f\xa8\x3e\xaa\xcd\xd1"
+	"\x01\x2d\x42\x49\xc3\xb6\x84\xbb"
+	"\x48\x96\xe0\x90\x93\x6c\x48\x64"
+	"\xd4\xfa\x7f\x93\x2c\xa6\x21\xc8"
+	"\x7a\x23\x7b\xaa\x20\x56\x12\xae"
+	"\x16\x9d\x94\x0f\x54\xa1\xec\xca"
+	"\x51\x4e\xf2\x39\xf4\xf8\x5f\x04"
+	"\x5a\x0d\xbf\xf5\x83\xa1\x15\xe1"
+	"\xf5\x3c\xd8\x62\xa3\xed\x47\x89"
+	"\x85\x4c\xe5\xdb\xac\x9e\x17\x1d"
+	"\x0c\x09\xe3\x3e\x39\x5b\x4d\x74"
+	"\x0e\xf5\x34\xee\x70\x11\x4c\xfd"
+	"\xdb\x34\xb1\xb5\x10\x3f\x73\xb7"
+	"\xf5\xfa\xed\xb0\x1f\xa5\xcd\x3c"
+	"\x8d\x35\x83\xd4\x11\x44\x6e\x6c"
+	"\x5b\xe0\x0e\x69\xa5\x39\xe5\xbb"
+	"\xa9\x57\x24\x37\xe6\x1f\xdd\xcf"
+	"\x16\x2a\x13\xf9\x6a\x2d\x90\xa0"
+	"\x03\x60\x7a\xed\x69\xd5\x00\x8b"
+	"\x7e\x4f\xcb\xb9\xfa\x91\xb9\x37"
+	"\xc1\x26\xce\x90\x97\x22\x64\x64"
+	"\xc1\x72\x43\x1b\xf6\xac\xc1\x54"
+	"\x8a\x10\x9c\xdd\x8d\xd5\x8e\xb2"
+	"\xe4\x85\xda\xe0\x20\x5f\xf4\xb4"
+	"\x15\xb5\xa0\x8d\x12\x74\x49\x23"
+	"\x3a\xdf\x4a\xd3\xf0\x3b\x89\xeb"
+	"\xf8\xcc\x62\x7b\xfb\x93\x07\x41"
+	"\x61\x26\x94\x58\x70\xa6\x3c\xe4"
+	"\xff\x58\xc4\x13\x3d\xcb\x36\x6b"
+	"\x32\xe5\xb2\x6d\x03\x74\x6f\x76"
+	"\x93\x77\xde\x48\xc4\xfa\x30\x4a"
+	"\xda\x49\x80\x77\x0f\x1c\xbe\x11"
+	"\xc8\x48\xb1\xe5\xbb\xf2\x8a\xe1"
+	"\x96\x2f\x9f\xd1\x8e\x8a\x5c\xe2"
+	"\xf7\xd7\xd8\x54\xf3\x3f\xc4\x91"
+	"\xb8\xfb\x86\xdc\x46\x24\x91\x60"
+	"\x6c\x2f\xc9\x41\x37\x51\x49\x54"
+	"\x09\x81\x21\xf3\x03\x9f\x2b\xe3"
+	"\x1f\x39\x63\xaf\xf4\xd7\x53\x60"
+	"\xa7\xc7\x54\xf9\xee\xb1\xb1\x7d"
+	"\x75\x54\x65\x93\xfe\xb1\x68\x6b"
+	"\x57\x02\xf9\xbb\x0e\xf9\xf8\xbf"
+	"\x01\x12\x27\xb4\xfe\xe4\x79\x7a"
+	"\x40\x5b\x51\x4b\xdf\x38\xec\xb1"
+	"\x6a\x56\xff\x35\x4d\x42\x33\xaa"
+	"\x6f\x1b\xe4\xdc\xe0\xdb\x85\x35"
+	"\x62\x10\xd4\xec\xeb\xc5\x7e\x45"
+	"\x1c\x6f\x17\xca\x3b\x8e\x2d\x66"
+	"\x4f\x4b\x36\x56\xcd\x1b\x59\xaa"
+	"\xd2\x9b\x17\xb9\x58\xdf\x7b\x64"
+	"\x8a\xff\x3b\x9c\xa6\xb5\x48\x9e"
+	"\xaa\xe2\x5d\x09\x71\x32\x5f\xb6"
+	"\x29\xbe\xe7\xc7\x52\x7e\x91\x82"
+	"\x6b\x6d\x33\xe1\x34\x06\x36\x21"
+	"\x5e\xbe\x1e\x2f\x3e\xc1\xfb\xea"
+	"\x49\x2c\xb5\xca\xf7\xb0\x37\xea"
+	"\x1f\xed\x10\x04\xd9\x48\x0d\x1a"
+	"\x1c\xfb\xe7\x84\x0e\x83\x53\x74"
+	"\xc7\x65\xe2\x5c\xe5\xba\x73\x4c"
+	"\x0e\xe1\xb5\x11\x45\x61\x43\x46"
+	"\xaa\x25\x8f\xbd\x85\x08\xfa\x4c"
+	"\x15\xc1\xc0\xd8\xf5\xdc\x16\xbb"
+	"\x7b\x1d\xe3\x87\x57\xa7\x2a\x1d"
+	"\x38\x58\x9e\x8a\x43\xdc\x57"
+	"\xd1\x81\x7d\x2b\xe9\xff\x99\x3a"
+	"\x4b\x24\x52\x58\x55\xe1\x49\x14";
+
+static struct {
+	const u8	*ptext;
+	const u8	*ctext;
+
+	u8		key[AES_MAX_KEY_SIZE];
+	u8		iv[GCM_AES_IV_SIZE];
+	u8		assoc[20];
+
+	int		klen;
+	int		clen;
+	int		plen;
+	int		alen;
+} const aesgcm_tv[] __initconst = {
+	{ /* From McGrew & Viega - http://citeseer.ist.psu.edu/656989.html */
+		.klen	= 16,
+		.ctext	= ctext0,
+		.clen	= sizeof(ctext0),
+	}, {
+		.klen	= 16,
+		.ptext	= ptext1,
+		.plen	= sizeof(ptext1),
+		.ctext	= ctext1,
+		.clen	= sizeof(ctext1),
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+		.klen	= 16,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= ptext2,
+		.plen	= sizeof(ptext2),
+		.ctext	= ctext2,
+		.clen	= sizeof(ctext2),
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+		.klen	= 16,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= ptext3,
+		.plen	= sizeof(ptext3),
+		.assoc	= "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xab\xad\xda\xd2",
+		.alen	= 20,
+		.ctext	= ctext3,
+		.clen	= sizeof(ctext3),
+	}, {
+		.klen	= 24,
+		.ctext	= ctext4,
+		.clen	= sizeof(ctext4),
+	}, {
+		.klen	= 24,
+		.ptext	= ptext1,
+		.plen	= sizeof(ptext1),
+		.ctext	= ctext5,
+		.clen	= sizeof(ctext5),
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+			  "\xfe\xff\xe9\x92\x86\x65\x73\x1c",
+		.klen	= 24,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= ptext6,
+		.plen	= sizeof(ptext6),
+		.ctext	= ctext6,
+		.clen	= sizeof(ctext6),
+	}, {
+		.klen	= 32,
+		.ctext	= ctext7,
+		.clen	= sizeof(ctext7),
+	}, {
+		.klen	= 32,
+		.ptext	= ptext1,
+		.plen	= sizeof(ptext1),
+		.ctext	= ctext8,
+		.clen	= sizeof(ctext8),
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+			  "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+		.klen	= 32,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= ptext9,
+		.plen	= sizeof(ptext9),
+		.ctext	= ctext9,
+		.clen	= sizeof(ctext9),
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+			  "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+		.klen	= 32,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= ptext10,
+		.plen	= sizeof(ptext10),
+		.assoc	= "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xab\xad\xda\xd2",
+		.alen	= 20,
+		.ctext	= ctext10,
+		.clen	= sizeof(ctext10),
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+			  "\xfe\xff\xe9\x92\x86\x65\x73\x1c",
+		.klen	= 24,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= ptext11,
+		.plen	= sizeof(ptext11),
+		.assoc	= "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xab\xad\xda\xd2",
+		.alen	= 20,
+		.ctext	= ctext11,
+		.clen	= sizeof(ctext11),
+	}, {
+		.key	= "\x62\x35\xf8\x95\xfc\xa5\xeb\xf6"
+			  "\x0e\x92\x12\x04\xd3\xa1\x3f\x2e"
+			  "\x8b\x32\xcf\xe7\x44\xed\x13\x59"
+			  "\x04\x38\x77\xb0\xb9\xad\xb4\x38",
+		.klen	= 32,
+		.iv	= "\x00\xff\xff\xff\xff\x00\x00\xff"
+			  "\xff\xff\x00\xff",
+		.ptext	= ptext12,
+		.plen	= sizeof(ptext12),
+		.ctext	= ctext12,
+		.clen	= sizeof(ctext12),
+	}
+};
+
+static int __init libaesgcm_init(void)
+{
+	for (int i = 0; i < ARRAY_SIZE(aesgcm_tv); i++) {
+		u8 tagbuf[AES_BLOCK_SIZE];
+		int plen = aesgcm_tv[i].plen;
+		struct aesgcm_ctx ctx;
+		u8 buf[sizeof(ptext12)];
+
+		if (aesgcm_expandkey(&ctx, aesgcm_tv[i].key, aesgcm_tv[i].klen,
+				     aesgcm_tv[i].clen - plen)) {
+			pr_err("aesgcm_expandkey() failed on vector %d\n", i);
+			return -ENODEV;
+		}
+
+		if (!aesgcm_decrypt(&ctx, buf, aesgcm_tv[i].ctext, plen,
+				    aesgcm_tv[i].assoc, aesgcm_tv[i].alen,
+				    aesgcm_tv[i].iv, aesgcm_tv[i].ctext + plen)
+		    || memcmp(buf, aesgcm_tv[i].ptext, plen)) {
+			pr_err("aesgcm_decrypt() #1 failed on vector %d\n", i);
+			return -ENODEV;
+		}
+
+		/* encrypt in place */
+		aesgcm_encrypt(&ctx, buf, buf, plen, aesgcm_tv[i].assoc,
+			       aesgcm_tv[i].alen, aesgcm_tv[i].iv, tagbuf);
+		if (memcmp(buf, aesgcm_tv[i].ctext, plen)) {
+			pr_err("aesgcm_encrypt() failed on vector %d\n", i);
+			return -ENODEV;
+		}
+
+		/* decrypt in place */
+		if (!aesgcm_decrypt(&ctx, buf, buf, plen, aesgcm_tv[i].assoc,
+				    aesgcm_tv[i].alen, aesgcm_tv[i].iv, tagbuf)
+		    || memcmp(buf, aesgcm_tv[i].ptext, plen)) {
+			pr_err("aesgcm_decrypt() #2 failed on vector %d\n", i);
+			return -ENODEV;
+		}
+	}
+	return 0;
+}
+module_init(libaesgcm_init);
+
+static void __exit libaesgcm_exit(void)
+{
+}
+module_exit(libaesgcm_exit);
+#endif
diff --git a/lib/crypto/gf128mul.c b/lib/crypto/gf128mul.c
new file mode 100644
index 000000000000..8f8c45e0cdcf
--- /dev/null
+++ b/lib/crypto/gf128mul.c
@@ -0,0 +1,436 @@
+/* gf128mul.c - GF(2^128) multiplication functions
+ *
+ * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
+ * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
+ *
+ * Based on Dr Brian Gladman's (GPL'd) work published at
+ * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
+ * See the original copyright notice below.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
+
+ LICENSE TERMS
+
+ The free distribution and use of this software in both source and binary
+ form is allowed (with or without changes) provided that:
+
+   1. distributions of this source code include the above copyright
+      notice, this list of conditions and the following disclaimer;
+
+   2. distributions in binary form include the above copyright
+      notice, this list of conditions and the following disclaimer
+      in the documentation and/or other associated materials;
+
+   3. the copyright holder's name is not used to endorse products
+      built using this software without specific written permission.
+
+ ALTERNATIVELY, provided that this notice is retained in full, this product
+ may be distributed under the terms of the GNU General Public License (GPL),
+ in which case the provisions of the GPL apply INSTEAD OF those given above.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue 31/01/2006
+
+ This file provides fast multiplication in GF(2^128) as required by several
+ cryptographic authentication modes
+*/
+
+#include <crypto/gf128mul.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#define gf128mul_dat(q) { \
+	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
+	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
+	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
+	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
+	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
+	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
+	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
+	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
+	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
+	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
+	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
+	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
+	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
+	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
+	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
+	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
+	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
+	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
+	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
+	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
+	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
+	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
+	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
+	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
+	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
+	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
+	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
+	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
+	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
+	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
+	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
+	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
+}
+
+/*
+ * Given a value i in 0..255 as the byte overflow when a field element
+ * in GF(2^128) is multiplied by x^8, the following macro returns the
+ * 16-bit value that must be XOR-ed into the low-degree end of the
+ * product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
+ *
+ * There are two versions of the macro, and hence two tables: one for
+ * the "be" convention where the highest-order bit is the coefficient of
+ * the highest-degree polynomial term, and one for the "le" convention
+ * where the highest-order bit is the coefficient of the lowest-degree
+ * polynomial term.  In both cases the values are stored in CPU byte
+ * endianness such that the coefficients are ordered consistently across
+ * bytes, i.e. in the "be" table bits 15..0 of the stored value
+ * correspond to the coefficients of x^15..x^0, and in the "le" table
+ * bits 15..0 correspond to the coefficients of x^0..x^15.
+ *
+ * Therefore, provided that the appropriate byte endianness conversions
+ * are done by the multiplication functions (and these must be in place
+ * anyway to support both little endian and big endian CPUs), the "be"
+ * table can be used for multiplications of both "bbe" and "ble"
+ * elements, and the "le" table can be used for multiplications of both
+ * "lle" and "lbe" elements.
+ */
+
+#define xda_be(i) ( \
+	(i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
+	(i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
+	(i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
+	(i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
+)
+
+#define xda_le(i) ( \
+	(i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
+	(i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
+	(i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
+	(i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
+)
+
+static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
+static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
+
+/*
+ * The following functions multiply a field element by x^8 in
+ * the polynomial field representation.  They use 64-bit word operations
+ * to gain speed but compensate for machine endianness and hence work
+ * correctly on both styles of machine.
+ */
+
+static void gf128mul_x8_lle(be128 *x)
+{
+	u64 a = be64_to_cpu(x->a);
+	u64 b = be64_to_cpu(x->b);
+	u64 _tt = gf128mul_table_le[b & 0xff];
+
+	x->b = cpu_to_be64((b >> 8) | (a << 56));
+	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
+}
+
+/* time invariant version of gf128mul_x8_lle */
+static void gf128mul_x8_lle_ti(be128 *x)
+{
+	u64 a = be64_to_cpu(x->a);
+	u64 b = be64_to_cpu(x->b);
+	u64 _tt = xda_le(b & 0xff); /* avoid table lookup */
+
+	x->b = cpu_to_be64((b >> 8) | (a << 56));
+	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
+}
+
+static void gf128mul_x8_bbe(be128 *x)
+{
+	u64 a = be64_to_cpu(x->a);
+	u64 b = be64_to_cpu(x->b);
+	u64 _tt = gf128mul_table_be[a >> 56];
+
+	x->a = cpu_to_be64((a << 8) | (b >> 56));
+	x->b = cpu_to_be64((b << 8) ^ _tt);
+}
+
+void gf128mul_x8_ble(le128 *r, const le128 *x)
+{
+	u64 a = le64_to_cpu(x->a);
+	u64 b = le64_to_cpu(x->b);
+	u64 _tt = gf128mul_table_be[a >> 56];
+
+	r->a = cpu_to_le64((a << 8) | (b >> 56));
+	r->b = cpu_to_le64((b << 8) ^ _tt);
+}
+EXPORT_SYMBOL(gf128mul_x8_ble);
+
+void gf128mul_lle(be128 *r, const be128 *b)
+{
+	/*
+	 * The p array should be aligned to twice the size of its element type,
+	 * so that every even/odd pair is guaranteed to share a cacheline
+	 * (assuming a cacheline size of 32 bytes or more, which is by far the
+	 * most common). This ensures that each be128_xor() call in the loop
+	 * takes the same amount of time regardless of the value of 'ch', which
+	 * is derived from function parameter 'b', which is commonly used as a
+	 * key, e.g., for GHASH. The odd array elements are all set to zero,
+	 * making each be128_xor() a NOP if its associated bit in 'ch' is not
+	 * set, and this is equivalent to calling be128_xor() conditionally.
+	 * This approach aims to avoid leaking information about such keys
+	 * through execution time variances.
+	 *
+	 * Unfortunately, __aligned(16) or higher does not work on x86 for
+	 * variables on the stack so we need to perform the alignment by hand.
+	 */
+	be128 array[16 + 3] = {};
+	be128 *p = PTR_ALIGN(&array[0], 2 * sizeof(be128));
+	int i;
+
+	p[0] = *r;
+	for (i = 0; i < 7; ++i)
+		gf128mul_x_lle(&p[2 * i + 2], &p[2 * i]);
+
+	memset(r, 0, sizeof(*r));
+	for (i = 0;;) {
+		u8 ch = ((u8 *)b)[15 - i];
+
+		be128_xor(r, r, &p[ 0 + !(ch & 0x80)]);
+		be128_xor(r, r, &p[ 2 + !(ch & 0x40)]);
+		be128_xor(r, r, &p[ 4 + !(ch & 0x20)]);
+		be128_xor(r, r, &p[ 6 + !(ch & 0x10)]);
+		be128_xor(r, r, &p[ 8 + !(ch & 0x08)]);
+		be128_xor(r, r, &p[10 + !(ch & 0x04)]);
+		be128_xor(r, r, &p[12 + !(ch & 0x02)]);
+		be128_xor(r, r, &p[14 + !(ch & 0x01)]);
+
+		if (++i >= 16)
+			break;
+
+		gf128mul_x8_lle_ti(r); /* use the time invariant version */
+	}
+}
+EXPORT_SYMBOL(gf128mul_lle);
+
+void gf128mul_bbe(be128 *r, const be128 *b)
+{
+	be128 p[8];
+	int i;
+
+	p[0] = *r;
+	for (i = 0; i < 7; ++i)
+		gf128mul_x_bbe(&p[i + 1], &p[i]);
+
+	memset(r, 0, sizeof(*r));
+	for (i = 0;;) {
+		u8 ch = ((u8 *)b)[i];
+
+		if (ch & 0x80)
+			be128_xor(r, r, &p[7]);
+		if (ch & 0x40)
+			be128_xor(r, r, &p[6]);
+		if (ch & 0x20)
+			be128_xor(r, r, &p[5]);
+		if (ch & 0x10)
+			be128_xor(r, r, &p[4]);
+		if (ch & 0x08)
+			be128_xor(r, r, &p[3]);
+		if (ch & 0x04)
+			be128_xor(r, r, &p[2]);
+		if (ch & 0x02)
+			be128_xor(r, r, &p[1]);
+		if (ch & 0x01)
+			be128_xor(r, r, &p[0]);
+
+		if (++i >= 16)
+			break;
+
+		gf128mul_x8_bbe(r);
+	}
+}
+EXPORT_SYMBOL(gf128mul_bbe);
+
+/*      This version uses 64k bytes of table space.
+    A 16 byte buffer has to be multiplied by a 16 byte key
+    value in GF(2^128).  If we consider a GF(2^128) value in
+    the buffer's lowest byte, we can construct a table of
+    the 256 16 byte values that result from the 256 values
+    of this byte.  This requires 4096 bytes. But we also
+    need tables for each of the 16 higher bytes in the
+    buffer as well, which makes 64 kbytes in total.
+*/
+/* additional explanation
+ * t[0][BYTE] contains g*BYTE
+ * t[1][BYTE] contains g*x^8*BYTE
+ *  ..
+ * t[15][BYTE] contains g*x^120*BYTE */
+struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
+{
+	struct gf128mul_64k *t;
+	int i, j, k;
+
+	t = kzalloc(sizeof(*t), GFP_KERNEL);
+	if (!t)
+		goto out;
+
+	for (i = 0; i < 16; i++) {
+		t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
+		if (!t->t[i]) {
+			gf128mul_free_64k(t);
+			t = NULL;
+			goto out;
+		}
+	}
+
+	t->t[0]->t[1] = *g;
+	for (j = 1; j <= 64; j <<= 1)
+		gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
+
+	for (i = 0;;) {
+		for (j = 2; j < 256; j += j)
+			for (k = 1; k < j; ++k)
+				be128_xor(&t->t[i]->t[j + k],
+					  &t->t[i]->t[j], &t->t[i]->t[k]);
+
+		if (++i >= 16)
+			break;
+
+		for (j = 128; j > 0; j >>= 1) {
+			t->t[i]->t[j] = t->t[i - 1]->t[j];
+			gf128mul_x8_bbe(&t->t[i]->t[j]);
+		}
+	}
+
+out:
+	return t;
+}
+EXPORT_SYMBOL(gf128mul_init_64k_bbe);
+
+void gf128mul_free_64k(struct gf128mul_64k *t)
+{
+	int i;
+
+	for (i = 0; i < 16; i++)
+		kfree_sensitive(t->t[i]);
+	kfree_sensitive(t);
+}
+EXPORT_SYMBOL(gf128mul_free_64k);
+
+void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t)
+{
+	u8 *ap = (u8 *)a;
+	be128 r[1];
+	int i;
+
+	*r = t->t[0]->t[ap[15]];
+	for (i = 1; i < 16; ++i)
+		be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
+	*a = *r;
+}
+EXPORT_SYMBOL(gf128mul_64k_bbe);
+
+/*      This version uses 4k bytes of table space.
+    A 16 byte buffer has to be multiplied by a 16 byte key
+    value in GF(2^128).  If we consider a GF(2^128) value in a
+    single byte, we can construct a table of the 256 16 byte
+    values that result from the 256 values of this byte.
+    This requires 4096 bytes. If we take the highest byte in
+    the buffer and use this table to get the result, we then
+    have to multiply by x^120 to get the final value. For the
+    next highest byte the result has to be multiplied by x^112
+    and so on. But we can do this by accumulating the result
+    in an accumulator starting with the result for the top
+    byte.  We repeatedly multiply the accumulator value by
+    x^8 and then add in (i.e. xor) the 16 bytes of the next
+    lower byte in the buffer, stopping when we reach the
+    lowest byte. This requires a 4096 byte table.
+*/
+struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
+{
+	struct gf128mul_4k *t;
+	int j, k;
+
+	t = kzalloc(sizeof(*t), GFP_KERNEL);
+	if (!t)
+		goto out;
+
+	t->t[128] = *g;
+	for (j = 64; j > 0; j >>= 1)
+		gf128mul_x_lle(&t->t[j], &t->t[j+j]);
+
+	for (j = 2; j < 256; j += j)
+		for (k = 1; k < j; ++k)
+			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
+
+out:
+	return t;
+}
+EXPORT_SYMBOL(gf128mul_init_4k_lle);
+
+struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
+{
+	struct gf128mul_4k *t;
+	int j, k;
+
+	t = kzalloc(sizeof(*t), GFP_KERNEL);
+	if (!t)
+		goto out;
+
+	t->t[1] = *g;
+	for (j = 1; j <= 64; j <<= 1)
+		gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
+
+	for (j = 2; j < 256; j += j)
+		for (k = 1; k < j; ++k)
+			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
+
+out:
+	return t;
+}
+EXPORT_SYMBOL(gf128mul_init_4k_bbe);
+
+void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t)
+{
+	u8 *ap = (u8 *)a;
+	be128 r[1];
+	int i = 15;
+
+	*r = t->t[ap[15]];
+	while (i--) {
+		gf128mul_x8_lle(r);
+		be128_xor(r, r, &t->t[ap[i]]);
+	}
+	*a = *r;
+}
+EXPORT_SYMBOL(gf128mul_4k_lle);
+
+void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t)
+{
+	u8 *ap = (u8 *)a;
+	be128 r[1];
+	int i = 0;
+
+	*r = t->t[ap[0]];
+	while (++i < 16) {
+		gf128mul_x8_bbe(r);
+		be128_xor(r, r, &t->t[ap[i]]);
+	}
+	*a = *r;
+}
+EXPORT_SYMBOL(gf128mul_4k_bbe);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");
author	Linus Torvalds <torvalds@linux-foundation.org>	2022-12-14 12:31:09 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2022-12-14 12:31:09 -0800
commit	64e7003c6b85626a533a67c1ba938b75a3db24e6 (patch)
tree	5e3e776d23a9520f51251b4838d4aa66d920dbff /lib
parent	48ea09cddae0b794cde2070f106ef676703dbcd3 (diff)
parent	453de3eb08c4b7e31b3019a4b0cc3ebce51a6219 (diff)
download	lwn-64e7003c6b85626a533a67c1ba938b75a3db24e6.tar.gz lwn-64e7003c6b85626a533a67c1ba938b75a3db24e6.zip