80 files changed, 1460 insertions, 1702 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a9dd4c39ec00..a0ce777f9706 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -704,6 +704,10 @@ config RELOCATABLE_TEST
 config ARCH_SUPPORTS_CRASH_DUMP
 	def_bool PPC64 || PPC_BOOK3S_32 || PPC_85xx || (44x && !SMP)
 
+config ARCH_DEFAULT_CRASH_DUMP
+	bool
+	default y if !PPC_BOOK3S_32
+
 config ARCH_SELECTS_CRASH_DUMP
 	def_bool y
 	depends on CRASH_DUMP
@@ -1318,6 +1322,14 @@ config MODULES_SIZE
 
 endmenu
 
+config PPC64_PROC_SYSTEMCFG
+	def_bool y
+	depends on PPC64 && PROC_FS
+	help
+	  This option enables the presence of /proc/ppc64/systemcfg through
+	  which the systemcfg page can be accessed.
+	  This interface only exists for backwards-compatibility.
+
 if PPC64
 # This value must have zeroes in the bottom 60 bits otherwise lots will break
 config PAGE_OFFSET
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index c06344db0eb3..4d77e17541e9 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -435,7 +435,6 @@ CONFIG_DM9102=m
 CONFIG_ULI526X=m
 CONFIG_PCMCIA_XIRCOM=m
 CONFIG_DL2K=m
-CONFIG_SUNDANCE=m
 CONFIG_S2IO=m
 CONFIG_FEC_MPC52xx=m
 CONFIG_GIANFAR=m
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index 46a4c85e85e2..951a43726461 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -107,12 +107,12 @@ config CRYPTO_AES_PPC_SPE
 
 config CRYPTO_AES_GCM_P10
 	tristate "Stitched AES/GCM acceleration support on P10 or later CPU (PPC)"
-	depends on BROKEN
 	depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
 	select CRYPTO_LIB_AES
 	select CRYPTO_ALGAPI
 	select CRYPTO_AEAD
 	select CRYPTO_SKCIPHER
+	select CRYPTO_SIMD
 	help
 	  AEAD cipher: AES cipher algorithms (FIPS-197)
 	  GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D)
diff --git a/arch/powerpc/crypto/aes-gcm-p10-glue.c b/arch/powerpc/crypto/aes-gcm-p10-glue.c
index f66ad56e765f..f37b3d13fc53 100644
--- a/arch/powerpc/crypto/aes-gcm-p10-glue.c
+++ b/arch/powerpc/crypto/aes-gcm-p10-glue.c
@@ -8,6 +8,7 @@
 #include <linux/unaligned.h>
 #include <asm/simd.h>
 #include <asm/switch_to.h>
+#include <crypto/gcm.h>
 #include <crypto/aes.h>
 #include <crypto/algapi.h>
 #include <crypto/b128ops.h>
@@ -24,6 +25,7 @@
 
 #define	PPC_ALIGN		16
 #define GCM_IV_SIZE		12
+#define RFC4106_NONCE_SIZE	4
 
 MODULE_DESCRIPTION("PPC64le AES-GCM with Stitched implementation");
 MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com");
@@ -31,7 +33,7 @@ MODULE_LICENSE("GPL v2");
 MODULE_ALIAS_CRYPTO("aes");
 
 asmlinkage int aes_p10_set_encrypt_key(const u8 *userKey, const int bits,
-				      void *key);
+				       void *key);
 asmlinkage void aes_p10_encrypt(const u8 *in, u8 *out, const void *key);
 asmlinkage void aes_p10_gcm_encrypt(u8 *in, u8 *out, size_t len,
 				    void *rkey, u8 *iv, void *Xi);
@@ -39,7 +41,8 @@ asmlinkage void aes_p10_gcm_decrypt(u8 *in, u8 *out, size_t len,
 				    void *rkey, u8 *iv, void *Xi);
 asmlinkage void gcm_init_htable(unsigned char htable[], unsigned char Xi[]);
 asmlinkage void gcm_ghash_p10(unsigned char *Xi, unsigned char *Htable,
-		unsigned char *aad, unsigned int alen);
+			      unsigned char *aad, unsigned int alen);
+asmlinkage void gcm_update(u8 *iv, void *Xi);
 
 struct aes_key {
 	u8 key[AES_MAX_KEYLENGTH];
@@ -52,6 +55,7 @@ struct gcm_ctx {
 	u8 aad_hash[16];
 	u64 aadLen;
 	u64 Plen;	/* offset 56 - used in aes_p10_gcm_{en/de}crypt */
+	u8 pblock[16];
 };
 struct Hash_ctx {
 	u8 H[16];	/* subkey */
@@ -60,17 +64,20 @@ struct Hash_ctx {
 
 struct p10_aes_gcm_ctx {
 	struct aes_key enc_key;
+	u8 nonce[RFC4106_NONCE_SIZE];
 };
 
 static void vsx_begin(void)
 {
 	preempt_disable();
+	pagefault_disable();
 	enable_kernel_vsx();
 }
 
 static void vsx_end(void)
 {
 	disable_kernel_vsx();
+	pagefault_enable();
 	preempt_enable();
 }
 
@@ -185,7 +192,7 @@ static int set_authsize(struct crypto_aead *tfm, unsigned int authsize)
 }
 
 static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
-			     unsigned int keylen)
+			      unsigned int keylen)
 {
 	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
 	struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -198,7 +205,8 @@ static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 	return ret ? -EINVAL : 0;
 }
 
-static int p10_aes_gcm_crypt(struct aead_request *req, int enc)
+static int p10_aes_gcm_crypt(struct aead_request *req, u8 *riv,
+			     int assoclen, int enc)
 {
 	struct crypto_tfm *tfm = req->base.tfm;
 	struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -210,7 +218,6 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc)
 	struct skcipher_walk walk;
 	u8 *assocmem = NULL;
 	u8 *assoc;
-	unsigned int assoclen = req->assoclen;
 	unsigned int cryptlen = req->cryptlen;
 	unsigned char ivbuf[AES_BLOCK_SIZE+PPC_ALIGN];
 	unsigned char *iv = PTR_ALIGN((void *)ivbuf, PPC_ALIGN);
@@ -218,11 +225,12 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc)
 	unsigned long auth_tag_len = crypto_aead_authsize(__crypto_aead_cast(tfm));
 	u8 otag[16];
 	int total_processed = 0;
+	int nbytes;
 
 	memset(databuf, 0, sizeof(databuf));
 	memset(hashbuf, 0, sizeof(hashbuf));
 	memset(ivbuf, 0, sizeof(ivbuf));
-	memcpy(iv, req->iv, GCM_IV_SIZE);
+	memcpy(iv, riv, GCM_IV_SIZE);
 
 	/* Linearize assoc, if not already linear */
 	if (req->src->length >= assoclen && req->src->length) {
@@ -257,19 +265,25 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc)
 	if (ret)
 		return ret;
 
-	while (walk.nbytes > 0 && ret == 0) {
+	while ((nbytes = walk.nbytes) > 0 && ret == 0) {
+		u8 *src = walk.src.virt.addr;
+		u8 *dst = walk.dst.virt.addr;
+		u8 buf[AES_BLOCK_SIZE];
+
+		if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
+			src = dst = memcpy(buf, src, nbytes);
 
 		vsx_begin();
 		if (enc)
-			aes_p10_gcm_encrypt(walk.src.virt.addr,
-					    walk.dst.virt.addr,
-					    walk.nbytes,
+			aes_p10_gcm_encrypt(src, dst, nbytes,
 					    &ctx->enc_key, gctx->iv, hash->Htable);
 		else
-			aes_p10_gcm_decrypt(walk.src.virt.addr,
-					    walk.dst.virt.addr,
-					    walk.nbytes,
+			aes_p10_gcm_decrypt(src, dst, nbytes,
 					    &ctx->enc_key, gctx->iv, hash->Htable);
+
+		if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
+			memcpy(walk.dst.virt.addr, buf, nbytes);
+
 		vsx_end();
 
 		total_processed += walk.nbytes;
@@ -281,6 +295,7 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc)
 
 	/* Finalize hash */
 	vsx_begin();
+	gcm_update(gctx->iv, hash->Htable);
 	finish_tag(gctx, hash, total_processed);
 	vsx_end();
 
@@ -302,17 +317,63 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc)
 	return 0;
 }
 
+static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey,
+			  unsigned int keylen)
+{
+	struct p10_aes_gcm_ctx *ctx = crypto_aead_ctx(tfm);
+	int err;
+
+	keylen -= RFC4106_NONCE_SIZE;
+	err = p10_aes_gcm_setkey(tfm, inkey, keylen);
+	if (err)
+		return err;
+
+	memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE);
+	return 0;
+}
+
+static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	return crypto_rfc4106_check_authsize(authsize);
+}
+
+static int rfc4106_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct p10_aes_gcm_ctx *ctx = crypto_aead_ctx(aead);
+	u8 iv[AES_BLOCK_SIZE];
+
+	memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE);
+	memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE);
+
+	return crypto_ipsec_check_assoclen(req->assoclen) ?:
+	       p10_aes_gcm_crypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE, 1);
+}
+
+static int rfc4106_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct p10_aes_gcm_ctx *ctx = crypto_aead_ctx(aead);
+	u8 iv[AES_BLOCK_SIZE];
+
+	memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE);
+	memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE);
+
+	return crypto_ipsec_check_assoclen(req->assoclen) ?:
+	       p10_aes_gcm_crypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE, 0);
+}
+
 static int p10_aes_gcm_encrypt(struct aead_request *req)
 {
-	return p10_aes_gcm_crypt(req, 1);
+	return p10_aes_gcm_crypt(req, req->iv, req->assoclen, 1);
 }
 
 static int p10_aes_gcm_decrypt(struct aead_request *req)
 {
-	return p10_aes_gcm_crypt(req, 0);
+	return p10_aes_gcm_crypt(req, req->iv, req->assoclen, 0);
 }
 
-static struct aead_alg gcm_aes_alg = {
+static struct aead_alg gcm_aes_algs[] = {{
 	.ivsize			= GCM_IV_SIZE,
 	.maxauthsize		= 16,
 
@@ -321,23 +382,57 @@ static struct aead_alg gcm_aes_alg = {
 	.encrypt		= p10_aes_gcm_encrypt,
 	.decrypt		= p10_aes_gcm_decrypt,
 
-	.base.cra_name		= "gcm(aes)",
-	.base.cra_driver_name	= "aes_gcm_p10",
+	.base.cra_name		= "__gcm(aes)",
+	.base.cra_driver_name	= "__aes_gcm_p10",
 	.base.cra_priority	= 2100,
 	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct p10_aes_gcm_ctx),
+	.base.cra_ctxsize	= sizeof(struct p10_aes_gcm_ctx)+
+				  4 * sizeof(u64[2]),
 	.base.cra_module	= THIS_MODULE,
-};
+	.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+}, {
+	.ivsize			= GCM_RFC4106_IV_SIZE,
+	.maxauthsize		= 16,
+	.setkey			= rfc4106_setkey,
+	.setauthsize		= rfc4106_setauthsize,
+	.encrypt		= rfc4106_encrypt,
+	.decrypt		= rfc4106_decrypt,
+
+	.base.cra_name		= "__rfc4106(gcm(aes))",
+	.base.cra_driver_name	= "__rfc4106_aes_gcm_p10",
+	.base.cra_priority	= 2100,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct p10_aes_gcm_ctx) +
+				  4 * sizeof(u64[2]),
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+}};
+
+static struct simd_aead_alg *p10_simd_aeads[ARRAY_SIZE(gcm_aes_algs)];
 
 static int __init p10_init(void)
 {
-	return crypto_register_aead(&gcm_aes_alg);
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31))
+		return 0;
+
+	ret = simd_register_aeads_compat(gcm_aes_algs,
+					 ARRAY_SIZE(gcm_aes_algs),
+					 p10_simd_aeads);
+	if (ret) {
+		simd_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs),
+				      p10_simd_aeads);
+		return ret;
+	}
+	return 0;
 }
 
 static void __exit p10_exit(void)
 {
-	crypto_unregister_aead(&gcm_aes_alg);
+	simd_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs),
+			      p10_simd_aeads);
 }
 
-module_cpu_feature_match(PPC_MODULE_FEATURE_P10, p10_init);
+module_init(p10_init);
 module_exit(p10_exit);
diff --git a/arch/powerpc/crypto/aes-gcm-p10.S b/arch/powerpc/crypto/aes-gcm-p10.S
index a51f4b265308..89f50eef3512 100644
--- a/arch/powerpc/crypto/aes-gcm-p10.S
+++ b/arch/powerpc/crypto/aes-gcm-p10.S
@@ -1,42 +1,42 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
- #
- # Accelerated AES-GCM stitched implementation for ppc64le.
- #
- # Copyright 2022- IBM Inc. All rights reserved
- #
- #===================================================================================
- # Written by Danny Tsen <dtsen@linux.ibm.com>
- #
- # GHASH is based on the Karatsuba multiplication method.
- #
- #    Xi xor X1
- #
- #    X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
- #      (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
- #      (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
- #      (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
- #      (X4.h * H.h + X4.l * H.l + X4 * H)
- #
- # Xi = v0
- # H Poly = v2
- # Hash keys = v3 - v14
- #     ( H.l, H, H.h)
- #     ( H^2.l, H^2, H^2.h)
- #     ( H^3.l, H^3, H^3.h)
- #     ( H^4.l, H^4, H^4.h)
- #
- # v30 is IV
- # v31 - counter 1
- #
- # AES used,
- #     vs0 - vs14 for round keys
- #     v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
- #
- # This implementation uses stitched AES-GCM approach to improve overall performance.
- # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
- #
- # ===================================================================================
- #
+#
+# Accelerated AES-GCM stitched implementation for ppc64le.
+#
+# Copyright 2024- IBM Inc.
+#
+#===================================================================================
+# Written by Danny Tsen <dtsen@us.ibm.com>
+#
+# GHASH is based on the Karatsuba multiplication method.
+#
+#    Xi xor X1
+#
+#    X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
+#      (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
+#      (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
+#      (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
+#      (X4.h * H.h + X4.l * H.l + X4 * H)
+#
+# Xi = v0
+# H Poly = v2
+# Hash keys = v3 - v14
+#     ( H.l, H, H.h)
+#     ( H^2.l, H^2, H^2.h)
+#     ( H^3.l, H^3, H^3.h)
+#     ( H^4.l, H^4, H^4.h)
+#
+# v30 is IV
+# v31 - counter 1
+#
+# AES used,
+#     vs0 - round key 0
+#     v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
+#
+# This implementation uses stitched AES-GCM approach to improve overall performance.
+# AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
+#
+# ===================================================================================
+#
 
 #include <asm/ppc_asm.h>
 #include <linux/linkage.h>
@@ -44,483 +44,224 @@
 .machine        "any"
 .text
 
- # 4x loops
- # v15 - v18 - input states
- # vs1 - vs9 - round keys
- #
-.macro Loop_aes_middle4x
-	xxlor	19+32, 1, 1
-	xxlor	20+32, 2, 2
-	xxlor	21+32, 3, 3
-	xxlor	22+32, 4, 4
-
-	vcipher	15, 15, 19
-	vcipher	16, 16, 19
-	vcipher	17, 17, 19
-	vcipher	18, 18, 19
-
-	vcipher	15, 15, 20
-	vcipher	16, 16, 20
-	vcipher	17, 17, 20
-	vcipher	18, 18, 20
-
-	vcipher	15, 15, 21
-	vcipher	16, 16, 21
-	vcipher	17, 17, 21
-	vcipher	18, 18, 21
-
-	vcipher	15, 15, 22
-	vcipher	16, 16, 22
-	vcipher	17, 17, 22
-	vcipher	18, 18, 22
-
-	xxlor	19+32, 5, 5
-	xxlor	20+32, 6, 6
-	xxlor	21+32, 7, 7
-	xxlor	22+32, 8, 8
-
-	vcipher	15, 15, 19
-	vcipher	16, 16, 19
-	vcipher	17, 17, 19
-	vcipher	18, 18, 19
-
-	vcipher	15, 15, 20
-	vcipher	16, 16, 20
-	vcipher	17, 17, 20
-	vcipher	18, 18, 20
-
-	vcipher	15, 15, 21
-	vcipher	16, 16, 21
-	vcipher	17, 17, 21
-	vcipher	18, 18, 21
-
-	vcipher	15, 15, 22
-	vcipher	16, 16, 22
-	vcipher	17, 17, 22
-	vcipher	18, 18, 22
-
-	xxlor	23+32, 9, 9
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
+.macro	SAVE_GPR GPR OFFSET FRAME
+	std	\GPR,\OFFSET(\FRAME)
 .endm
 
- # 8x loops
- # v15 - v22 - input states
- # vs1 - vs9 - round keys
- #
-.macro Loop_aes_middle8x
-	xxlor	23+32, 1, 1
-	xxlor	24+32, 2, 2
-	xxlor	25+32, 3, 3
-	xxlor	26+32, 4, 4
-
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-
-	vcipher	15, 15, 24
-	vcipher	16, 16, 24
-	vcipher	17, 17, 24
-	vcipher	18, 18, 24
-	vcipher	19, 19, 24
-	vcipher	20, 20, 24
-	vcipher	21, 21, 24
-	vcipher	22, 22, 24
-
-	vcipher	15, 15, 25
-	vcipher	16, 16, 25
-	vcipher	17, 17, 25
-	vcipher	18, 18, 25
-	vcipher	19, 19, 25
-	vcipher	20, 20, 25
-	vcipher	21, 21, 25
-	vcipher	22, 22, 25
-
-	vcipher	15, 15, 26
-	vcipher	16, 16, 26
-	vcipher	17, 17, 26
-	vcipher	18, 18, 26
-	vcipher	19, 19, 26
-	vcipher	20, 20, 26
-	vcipher	21, 21, 26
-	vcipher	22, 22, 26
-
-	xxlor	23+32, 5, 5
-	xxlor	24+32, 6, 6
-	xxlor	25+32, 7, 7
-	xxlor	26+32, 8, 8
-
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-
-	vcipher	15, 15, 24
-	vcipher	16, 16, 24
-	vcipher	17, 17, 24
-	vcipher	18, 18, 24
-	vcipher	19, 19, 24
-	vcipher	20, 20, 24
-	vcipher	21, 21, 24
-	vcipher	22, 22, 24
-
-	vcipher	15, 15, 25
-	vcipher	16, 16, 25
-	vcipher	17, 17, 25
-	vcipher	18, 18, 25
-	vcipher	19, 19, 25
-	vcipher	20, 20, 25
-	vcipher	21, 21, 25
-	vcipher	22, 22, 25
-
-	vcipher	15, 15, 26
-	vcipher	16, 16, 26
-	vcipher	17, 17, 26
-	vcipher	18, 18, 26
-	vcipher	19, 19, 26
-	vcipher	20, 20, 26
-	vcipher	21, 21, 26
-	vcipher	22, 22, 26
-
-	xxlor	23+32, 9, 9
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
+.macro	SAVE_VRS VRS OFFSET FRAME
+	stxv	\VRS+32, \OFFSET(\FRAME)
 .endm
 
-.macro Loop_aes_middle_1x
-	xxlor	19+32, 1, 1
-	xxlor	20+32, 2, 2
-	xxlor	21+32, 3, 3
-	xxlor	22+32, 4, 4
-
-	vcipher 15, 15, 19
-	vcipher 15, 15, 20
-	vcipher 15, 15, 21
-	vcipher 15, 15, 22
-
-	xxlor	19+32, 5, 5
-	xxlor	20+32, 6, 6
-	xxlor	21+32, 7, 7
-	xxlor	22+32, 8, 8
-
-	vcipher 15, 15, 19
-	vcipher 15, 15, 20
-	vcipher 15, 15, 21
-	vcipher 15, 15, 22
-
-	xxlor	19+32, 9, 9
-	vcipher 15, 15, 19
+.macro	RESTORE_GPR GPR OFFSET FRAME
+	ld	\GPR,\OFFSET(\FRAME)
 .endm
 
- #
- # Compute 4x hash values based on Karatsuba method.
- #
-.macro ppc_aes_gcm_ghash
-	vxor		15, 15, 0
-
-	vpmsumd		23, 12, 15		# H4.L * X.L
-	vpmsumd		24, 9, 16
-	vpmsumd		25, 6, 17
-	vpmsumd		26, 3, 18
-
-	vxor		23, 23, 24
-	vxor		23, 23, 25
-	vxor		23, 23, 26		# L
-
-	vpmsumd		24, 13, 15		# H4.L * X.H + H4.H * X.L
-	vpmsumd		25, 10, 16		# H3.L * X1.H + H3.H * X1.L
-	vpmsumd		26, 7, 17
-	vpmsumd		27, 4, 18
-
-	vxor		24, 24, 25
-	vxor		24, 24, 26
-	vxor		24, 24, 27		# M
-
-	# sum hash and reduction with H Poly
-	vpmsumd		28, 23, 2		# reduction
-
-	vxor		29, 29, 29
-	vsldoi		26, 24, 29, 8		# mL
-	vsldoi		29, 29, 24, 8		# mH
-	vxor		23, 23, 26		# mL + L
-
-	vsldoi		23, 23, 23, 8		# swap
-	vxor		23, 23, 28
-
-	vpmsumd		24, 14, 15		# H4.H * X.H
-	vpmsumd		25, 11, 16
-	vpmsumd		26, 8, 17
-	vpmsumd		27, 5, 18
-
-	vxor		24, 24, 25
-	vxor		24, 24, 26
-	vxor		24, 24, 27
-
-	vxor		24, 24, 29
-
-	# sum hash and reduction with H Poly
-	vsldoi		27, 23, 23, 8		# swap
-	vpmsumd		23, 23, 2
-	vxor		27, 27, 24
-	vxor		23, 23, 27
-
-	xxlor		32, 23+32, 23+32		# update hash
-
+.macro	RESTORE_VRS VRS OFFSET FRAME
+	lxv	\VRS+32, \OFFSET(\FRAME)
 .endm
 
- #
- # Combine two 4x ghash
- # v15 - v22 - input blocks
- #
-.macro ppc_aes_gcm_ghash2_4x
-	# first 4x hash
-	vxor		15, 15, 0		# Xi + X
-
-	vpmsumd		23, 12, 15		# H4.L * X.L
-	vpmsumd		24, 9, 16
-	vpmsumd		25, 6, 17
-	vpmsumd		26, 3, 18
-
-	vxor		23, 23, 24
-	vxor		23, 23, 25
-	vxor		23, 23, 26		# L
-
-	vpmsumd		24, 13, 15		# H4.L * X.H + H4.H * X.L
-	vpmsumd		25, 10, 16		# H3.L * X1.H + H3.H * X1.L
-	vpmsumd		26, 7, 17
-	vpmsumd		27, 4, 18
-
-	vxor		24, 24, 25
-	vxor		24, 24, 26
-
-	# sum hash and reduction with H Poly
-	vpmsumd		28, 23, 2		# reduction
-
-	vxor		29, 29, 29
-
-	vxor		24, 24, 27		# M
-	vsldoi		26, 24, 29, 8		# mL
-	vsldoi		29, 29, 24, 8		# mH
-	vxor		23, 23, 26		# mL + L
-
-	vsldoi		23, 23, 23, 8		# swap
-	vxor		23, 23, 28
+.macro SAVE_REGS
+	mflr 0
+	std 0, 16(1)
+	stdu 1,-512(1)
+
+	SAVE_GPR 14, 112, 1
+	SAVE_GPR 15, 120, 1
+	SAVE_GPR 16, 128, 1
+	SAVE_GPR 17, 136, 1
+	SAVE_GPR 18, 144, 1
+	SAVE_GPR 19, 152, 1
+	SAVE_GPR 20, 160, 1
+	SAVE_GPR 21, 168, 1
+	SAVE_GPR 22, 176, 1
+	SAVE_GPR 23, 184, 1
+	SAVE_GPR 24, 192, 1
+
+	addi	9, 1, 256
+	SAVE_VRS 20, 0, 9
+	SAVE_VRS 21, 16, 9
+	SAVE_VRS 22, 32, 9
+	SAVE_VRS 23, 48, 9
+	SAVE_VRS 24, 64, 9
+	SAVE_VRS 25, 80, 9
+	SAVE_VRS 26, 96, 9
+	SAVE_VRS 27, 112, 9
+	SAVE_VRS 28, 128, 9
+	SAVE_VRS 29, 144, 9
+	SAVE_VRS 30, 160, 9
+	SAVE_VRS 31, 176, 9
+.endm # SAVE_REGS
 
-	vpmsumd		24, 14, 15		# H4.H * X.H
-	vpmsumd		25, 11, 16
-	vpmsumd		26, 8, 17
-	vpmsumd		27, 5, 18
+.macro RESTORE_REGS
+	addi	9, 1, 256
+	RESTORE_VRS 20, 0, 9
+	RESTORE_VRS 21, 16, 9
+	RESTORE_VRS 22, 32, 9
+	RESTORE_VRS 23, 48, 9
+	RESTORE_VRS 24, 64, 9
+	RESTORE_VRS 25, 80, 9
+	RESTORE_VRS 26, 96, 9
+	RESTORE_VRS 27, 112, 9
+	RESTORE_VRS 28, 128, 9
+	RESTORE_VRS 29, 144, 9
+	RESTORE_VRS 30, 160, 9
+	RESTORE_VRS 31, 176, 9
+
+	RESTORE_GPR 14, 112, 1
+	RESTORE_GPR 15, 120, 1
+	RESTORE_GPR 16, 128, 1
+	RESTORE_GPR 17, 136, 1
+	RESTORE_GPR 18, 144, 1
+	RESTORE_GPR 19, 152, 1
+	RESTORE_GPR 20, 160, 1
+	RESTORE_GPR 21, 168, 1
+	RESTORE_GPR 22, 176, 1
+	RESTORE_GPR 23, 184, 1
+	RESTORE_GPR 24, 192, 1
+
+	addi    1, 1, 512
+	ld 0, 16(1)
+	mtlr 0
+.endm # RESTORE_REGS
+
+# 4x loops
+.macro AES_CIPHER_4x _VCIPHER ST r
+	\_VCIPHER	\ST, \ST, \r
+	\_VCIPHER	\ST+1, \ST+1, \r
+	\_VCIPHER	\ST+2, \ST+2, \r
+	\_VCIPHER	\ST+3, \ST+3, \r
+.endm
 
-	vxor		24, 24, 25
-	vxor		24, 24, 26
-	vxor		24, 24, 27		# H
+# 8x loops
+.macro AES_CIPHER_8x _VCIPHER ST r
+	\_VCIPHER	\ST, \ST, \r
+	\_VCIPHER	\ST+1, \ST+1, \r
+	\_VCIPHER	\ST+2, \ST+2, \r
+	\_VCIPHER	\ST+3, \ST+3, \r
+	\_VCIPHER	\ST+4, \ST+4, \r
+	\_VCIPHER	\ST+5, \ST+5, \r
+	\_VCIPHER	\ST+6, \ST+6, \r
+	\_VCIPHER	\ST+7, \ST+7, \r
+.endm
 
-	vxor		24, 24, 29		# H + mH
+.macro LOOP_8AES_STATE
+	xxlor	32+23, 1, 1
+	xxlor	32+24, 2, 2
+	xxlor	32+25, 3, 3
+	xxlor	32+26, 4, 4
+	AES_CIPHER_8x vcipher, 15, 23
+	AES_CIPHER_8x vcipher, 15, 24
+	AES_CIPHER_8x vcipher, 15, 25
+	AES_CIPHER_8x vcipher, 15, 26
+	xxlor	32+23, 5, 5
+	xxlor	32+24, 6, 6
+	xxlor	32+25, 7, 7
+	xxlor	32+26, 8, 8
+	AES_CIPHER_8x vcipher, 15, 23
+	AES_CIPHER_8x vcipher, 15, 24
+	AES_CIPHER_8x vcipher, 15, 25
+	AES_CIPHER_8x vcipher, 15, 26
+.endm
 
-	# sum hash and reduction with H Poly
-	vsldoi		27, 23, 23, 8		# swap
-	vpmsumd		23, 23, 2
-	vxor		27, 27, 24
-	vxor		27, 23, 27		# 1st Xi
-
-	# 2nd 4x hash
-	vpmsumd		24, 9, 20
-	vpmsumd		25, 6, 21
-	vpmsumd		26, 3, 22
-	vxor		19, 19, 27		# Xi + X
-	vpmsumd		23, 12, 19		# H4.L * X.L
-
-	vxor		23, 23, 24
-	vxor		23, 23, 25
-	vxor		23, 23, 26		# L
-
-	vpmsumd		24, 13, 19		# H4.L * X.H + H4.H * X.L
-	vpmsumd		25, 10, 20		# H3.L * X1.H + H3.H * X1.L
-	vpmsumd		26, 7, 21
-	vpmsumd		27, 4, 22
-
-	vxor		24, 24, 25
-	vxor		24, 24, 26
+#
+# PPC_GHASH4x(H, S1, S2, S3, S4): Compute 4x hash values based on Karatsuba method.
+# H: returning digest
+# S#: states
+#
+# S1 should xor with the previous digest
+#
+# Xi = v0
+# H Poly = v2
+# Hash keys = v3 - v14
+# Scratch: v23 - v29
+#
+.macro PPC_GHASH4x H S1 S2 S3 S4
+
+	vpmsumd	23, 12, \S1		# H4.L * X.L
+	vpmsumd	24, 9, \S2
+	vpmsumd	25, 6, \S3
+	vpmsumd	26, 3, \S4
+
+	vpmsumd	27, 13, \S1		# H4.L * X.H + H4.H * X.L
+	vpmsumd	28, 10, \S2		# H3.L * X1.H + H3.H * X1.L
+
+	vxor	23, 23, 24
+	vxor	23, 23, 25
+	vxor	23, 23, 26		# L
+
+	vxor	24, 27, 28
+	vpmsumd	25, 7, \S3
+	vpmsumd	26, 4, \S4
+
+	vxor	24, 24, 25
+	vxor	24, 24, 26		# M
 
 	# sum hash and reduction with H Poly
-	vpmsumd		28, 23, 2		# reduction
-
-	vxor		29, 29, 29
+	vpmsumd	28, 23, 2		# reduction
 
-	vxor		24, 24, 27		# M
-	vsldoi		26, 24, 29, 8		# mL
-	vsldoi		29, 29, 24, 8		# mH
-	vxor		23, 23, 26		# mL + L
+	vxor	1, 1, 1
+	vsldoi	25, 24, 1, 8		# mL
+	vsldoi	1, 1, 24, 8		# mH
+	vxor	23, 23, 25		# mL + L
 
-	vsldoi		23, 23, 23, 8		# swap
-	vxor		23, 23, 28
+	# This performs swap and xor like,
+	#   vsldoi	23, 23, 23, 8		# swap
+	#   vxor	23, 23, 28
+	xxlor	32+25, 10, 10
+	vpermxor 23, 23, 28, 25
 
-	vpmsumd		24, 14, 19		# H4.H * X.H
-	vpmsumd		25, 11, 20
-	vpmsumd		26, 8, 21
-	vpmsumd		27, 5, 22
+	vpmsumd	26, 14, \S1		# H4.H * X.H
+	vpmsumd	27, 11, \S2
+	vpmsumd	28, 8, \S3
+	vpmsumd	29, 5, \S4
 
-	vxor		24, 24, 25
-	vxor		24, 24, 26
-	vxor		24, 24, 27		# H
+	vxor	24, 26, 27
+	vxor	24, 24, 28
+	vxor	24, 24, 29
 
-	vxor		24, 24, 29		# H + mH
+	vxor	24, 24, 1
 
 	# sum hash and reduction with H Poly
-	vsldoi		27, 23, 23, 8		# swap
-	vpmsumd		23, 23, 2
-	vxor		27, 27, 24
-	vxor		23, 23, 27
-
-	xxlor		32, 23+32, 23+32		# update hash
-
+	vsldoi	25, 23, 23, 8		# swap
+	vpmsumd	23, 23, 2
+	vxor	27, 25, 24
+	vxor	\H, 23, 27
 .endm
 
- #
- # Compute update single hash
- #
-.macro ppc_update_hash_1x
-	vxor		28, 28, 0
-
-	vxor		19, 19, 19
+#
+# Compute update single ghash
+# scratch: v1, v22..v27
+#
+.macro PPC_GHASH1x H S1
 
-	vpmsumd		22, 3, 28		# L
-	vpmsumd		23, 4, 28		# M
-	vpmsumd		24, 5, 28		# H
+	vxor	1, 1, 1
 
-	vpmsumd		27, 22, 2		# reduction
+	vpmsumd	22, 3, \S1		# L
+	vpmsumd	23, 4, \S1		# M
+	vpmsumd	24, 5, \S1		# H
 
-	vsldoi		25, 23, 19, 8		# mL
-	vsldoi		26, 19, 23, 8		# mH
-	vxor		22, 22, 25		# LL + LL
-	vxor		24, 24, 26		# HH + HH
+	vpmsumd	27, 22, 2		# reduction
 
-	vsldoi		22, 22, 22, 8		# swap
-	vxor		22, 22, 27
+	vsldoi	25, 23, 1, 8		# mL
+	vsldoi	26, 1, 23, 8		# mH
+	vxor	22, 22, 25		# LL + LL
+	vxor	24, 24, 26		# HH + HH
 
-	vsldoi		20, 22, 22, 8		# swap
-	vpmsumd		22, 22, 2		# reduction
-	vxor		20, 20, 24
-	vxor		22, 22, 20
+	xxlor	32+25, 10, 10
+	vpermxor 22, 22, 27, 25
 
-	vmr		0, 22			# update hash
-
-.endm
-
-.macro SAVE_REGS
-	stdu 1,-640(1)
-	mflr 0
-
-	std	14,112(1)
-	std	15,120(1)
-	std	16,128(1)
-	std	17,136(1)
-	std	18,144(1)
-	std	19,152(1)
-	std	20,160(1)
-	std	21,168(1)
-	li	9, 256
-	stvx	20, 9, 1
-	addi	9, 9, 16
-	stvx	21, 9, 1
-	addi	9, 9, 16
-	stvx	22, 9, 1
-	addi	9, 9, 16
-	stvx	23, 9, 1
-	addi	9, 9, 16
-	stvx	24, 9, 1
-	addi	9, 9, 16
-	stvx	25, 9, 1
-	addi	9, 9, 16
-	stvx	26, 9, 1
-	addi	9, 9, 16
-	stvx	27, 9, 1
-	addi	9, 9, 16
-	stvx	28, 9, 1
-	addi	9, 9, 16
-	stvx	29, 9, 1
-	addi	9, 9, 16
-	stvx	30, 9, 1
-	addi	9, 9, 16
-	stvx	31, 9, 1
-	stxv	14, 464(1)
-	stxv	15, 480(1)
-	stxv	16, 496(1)
-	stxv	17, 512(1)
-	stxv	18, 528(1)
-	stxv	19, 544(1)
-	stxv	20, 560(1)
-	stxv	21, 576(1)
-	stxv	22, 592(1)
-	std	0, 656(1)
-.endm
-
-.macro RESTORE_REGS
-	lxv	14, 464(1)
-	lxv	15, 480(1)
-	lxv	16, 496(1)
-	lxv	17, 512(1)
-	lxv	18, 528(1)
-	lxv	19, 544(1)
-	lxv	20, 560(1)
-	lxv	21, 576(1)
-	lxv	22, 592(1)
-	li	9, 256
-	lvx	20, 9, 1
-	addi	9, 9, 16
-	lvx	21, 9, 1
-	addi	9, 9, 16
-	lvx	22, 9, 1
-	addi	9, 9, 16
-	lvx	23, 9, 1
-	addi	9, 9, 16
-	lvx	24, 9, 1
-	addi	9, 9, 16
-	lvx	25, 9, 1
-	addi	9, 9, 16
-	lvx	26, 9, 1
-	addi	9, 9, 16
-	lvx	27, 9, 1
-	addi	9, 9, 16
-	lvx	28, 9, 1
-	addi	9, 9, 16
-	lvx	29, 9, 1
-	addi	9, 9, 16
-	lvx	30, 9, 1
-	addi	9, 9, 16
-	lvx	31, 9, 1
-
-	ld	0, 656(1)
-	ld      14,112(1)
-	ld      15,120(1)
-	ld      16,128(1)
-	ld      17,136(1)
-	ld      18,144(1)
-	ld      19,152(1)
-	ld      20,160(1)
-	ld	21,168(1)
-
-	mtlr	0
-	addi	1, 1, 640
+	vsldoi	23, 22, 22, 8		# swap
+	vpmsumd	22, 22, 2		# reduction
+	vxor	23, 23, 24
+	vxor	\H, 22, 23
 .endm
 
+#
+# LOAD_HASH_TABLE
+# Xi = v0
+# H Poly = v2
+# Hash keys = v3 - v14
+#
 .macro LOAD_HASH_TABLE
 	# Load Xi
 	lxvb16x	32, 0, 8	# load Xi
@@ -557,657 +298,434 @@
 	lxvd2x	14+32, 10, 8	# H^4h
 .endm
 
- #
- # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len,
- #               const char *rk, unsigned char iv[16], void *Xip);
- #
- #    r3 - inp
- #    r4 - out
- #    r5 - len
- #    r6 - AES round keys
- #    r7 - iv and other data
- #    r8 - Xi, HPoli, hash keys
- #
- #    rounds is at offset 240 in rk
- #    Xi is at 0 in gcm_table (Xip).
- #
-_GLOBAL(aes_p10_gcm_encrypt)
-.align 5
-
-	SAVE_REGS
-
-	LOAD_HASH_TABLE
-
-	# initialize ICB: GHASH( IV ), IV - r7
-	lxvb16x	30+32, 0, 7	# load IV  - v30
-
-	mr	12, 5		# length
-	li	11, 0		# block index
-
-	# counter 1
-	vxor	31, 31, 31
-	vspltisb 22, 1
-	vsldoi	31, 31, 22,1	# counter 1
-
-	# load round key to VSR
-	lxv	0, 0(6)
-	lxv	1, 0x10(6)
-	lxv	2, 0x20(6)
-	lxv	3, 0x30(6)
-	lxv	4, 0x40(6)
-	lxv	5, 0x50(6)
-	lxv	6, 0x60(6)
-	lxv	7, 0x70(6)
-	lxv	8, 0x80(6)
-	lxv	9, 0x90(6)
-	lxv	10, 0xa0(6)
-
-	# load rounds - 10 (128), 12 (192), 14 (256)
-	lwz	9,240(6)
-
-	#
-	# vxor	state, state, w # addroundkey
-	xxlor	32+29, 0, 0
-	vxor	15, 30, 29	# IV + round key - add round key 0
-
-	cmpdi	9, 10
-	beq	Loop_aes_gcm_8x
-
-	# load 2 more round keys (v11, v12)
-	lxv	11, 0xb0(6)
-	lxv	12, 0xc0(6)
-
-	cmpdi	9, 12
-	beq	Loop_aes_gcm_8x
-
-	# load 2 more round keys (v11, v12, v13, v14)
-	lxv	13, 0xd0(6)
-	lxv	14, 0xe0(6)
-	cmpdi	9, 14
-	beq	Loop_aes_gcm_8x
-
-	b	aes_gcm_out
-
-.align 5
-Loop_aes_gcm_8x:
-	mr	14, 3
-	mr	9, 4
-
-	#
-	# check partial block
-	#
-Continue_partial_check:
-	ld	15, 56(7)
-	cmpdi	15, 0
-	beq	Continue
-	bgt	Final_block
-	cmpdi	15, 16
-	blt	Final_block
-
-Continue:
-	# n blcoks
-	li	10, 128
-	divdu	10, 12, 10	# n 128 bytes-blocks
-	cmpdi	10, 0
-	beq	Loop_last_block
-
-	vaddudm	30, 30, 31	# IV + counter
-	vxor	16, 30, 29
-	vaddudm	30, 30, 31
-	vxor	17, 30, 29
-	vaddudm	30, 30, 31
-	vxor	18, 30, 29
-	vaddudm	30, 30, 31
-	vxor	19, 30, 29
-	vaddudm	30, 30, 31
-	vxor	20, 30, 29
-	vaddudm	30, 30, 31
-	vxor	21, 30, 29
-	vaddudm	30, 30, 31
-	vxor	22, 30, 29
-
-	mtctr	10
-
-	li	15, 16
-	li	16, 32
-	li	17, 48
-	li	18, 64
-	li	19, 80
-	li	20, 96
-	li	21, 112
-
-	lwz	10, 240(6)
-
-Loop_8x_block:
-
-	lxvb16x		15, 0, 14	# load block
-	lxvb16x		16, 15, 14	# load block
-	lxvb16x		17, 16, 14	# load block
-	lxvb16x		18, 17, 14	# load block
-	lxvb16x		19, 18, 14	# load block
-	lxvb16x		20, 19, 14	# load block
-	lxvb16x		21, 20, 14	# load block
-	lxvb16x		22, 21, 14	# load block
-	addi		14, 14, 128
-
-	Loop_aes_middle8x
-
-	xxlor	23+32, 10, 10
-
-	cmpdi	10, 10
-	beq	Do_next_ghash
-
-	# 192 bits
-	xxlor	24+32, 11, 11
-
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-
-	vcipher	15, 15, 24
-	vcipher	16, 16, 24
-	vcipher	17, 17, 24
-	vcipher	18, 18, 24
-	vcipher	19, 19, 24
-	vcipher	20, 20, 24
-	vcipher	21, 21, 24
-	vcipher	22, 22, 24
-
-	xxlor	23+32, 12, 12
-
-	cmpdi	10, 12
-	beq	Do_next_ghash
-
-	# 256 bits
-	xxlor	24+32, 13, 13
-
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-
-	vcipher	15, 15, 24
-	vcipher	16, 16, 24
-	vcipher	17, 17, 24
-	vcipher	18, 18, 24
-	vcipher	19, 19, 24
-	vcipher	20, 20, 24
-	vcipher	21, 21, 24
-	vcipher	22, 22, 24
-
-	xxlor	23+32, 14, 14
-
-	cmpdi	10, 14
-	beq	Do_next_ghash
-	b	aes_gcm_out
-
-Do_next_ghash:
-
-	#
-	# last round
-	vcipherlast     15, 15, 23
-	vcipherlast     16, 16, 23
-
-	xxlxor		47, 47, 15
-	stxvb16x        47, 0, 9	# store output
-	xxlxor		48, 48, 16
-	stxvb16x        48, 15, 9	# store output
-
-	vcipherlast     17, 17, 23
-	vcipherlast     18, 18, 23
-
-	xxlxor		49, 49, 17
-	stxvb16x        49, 16, 9	# store output
-	xxlxor		50, 50, 18
-	stxvb16x        50, 17, 9	# store output
-
-	vcipherlast     19, 19, 23
-	vcipherlast     20, 20, 23
-
-	xxlxor		51, 51, 19
-	stxvb16x        51, 18, 9	# store output
-	xxlxor		52, 52, 20
-	stxvb16x        52, 19, 9	# store output
-
-	vcipherlast     21, 21, 23
-	vcipherlast     22, 22, 23
-
-	xxlxor		53, 53, 21
-	stxvb16x        53, 20, 9	# store output
-	xxlxor		54, 54, 22
-	stxvb16x        54, 21, 9	# store output
-
-	addi		9, 9, 128
-
-	# ghash here
-	ppc_aes_gcm_ghash2_4x
-
-	xxlor	27+32, 0, 0
-	vaddudm 30, 30, 31		# IV + counter
-	vmr	29, 30
-	vxor    15, 30, 27		# add round key
-	vaddudm 30, 30, 31
-	vxor    16, 30, 27
-	vaddudm 30, 30, 31
-	vxor    17, 30, 27
-	vaddudm 30, 30, 31
-	vxor    18, 30, 27
-	vaddudm 30, 30, 31
-	vxor    19, 30, 27
-	vaddudm 30, 30, 31
-	vxor    20, 30, 27
-	vaddudm 30, 30, 31
-	vxor    21, 30, 27
-	vaddudm 30, 30, 31
-	vxor    22, 30, 27
-
-	addi    12, 12, -128
-	addi    11, 11, 128
-
-	bdnz	Loop_8x_block
-
-	vmr	30, 29
-	stxvb16x 30+32, 0, 7		# update IV
-
-Loop_last_block:
-	cmpdi   12, 0
-	beq     aes_gcm_out
-
-	# loop last few blocks
+################################################################################
+# Compute AES and ghash one block at a time.
+# r23: AES rounds
+# v30: current IV
+# vs0: roundkey 0
+#
+################################################################################
+SYM_FUNC_START_LOCAL(aes_gcm_crypt_1x)
+
+	cmpdi	5, 16
+	bge	__More_1x
+	blr
+__More_1x:
 	li      10, 16
-	divdu   10, 12, 10
-
-	mtctr   10
-
-	lwz	10, 240(6)
-
-	cmpdi   12, 16
-	blt     Final_block
-
-Next_rem_block:
-	lxvb16x 15, 0, 14		# load block
-
-	Loop_aes_middle_1x
-
-	xxlor	23+32, 10, 10
-
-	cmpdi	10, 10
-	beq	Do_next_1x
-
-	# 192 bits
-	xxlor	24+32, 11, 11
-
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
-
-	xxlor	23+32, 12, 12
+	divdu   12, 5, 10
+
+	xxlxor	32+15, 32+30, 0
+
+	# Pre-load 8 AES rounds to scratch vectors.
+	xxlor	32+16, 1, 1
+	xxlor	32+17, 2, 2
+	xxlor	32+18, 3, 3
+	xxlor	32+19, 4, 4
+	xxlor	32+20, 5, 5
+	xxlor	32+21, 6, 6
+	xxlor	32+28, 7, 7
+	xxlor	32+29, 8, 8
+	lwz	23, 240(6)	# n rounds
+	addi	22, 23, -9	# remaing AES rounds
 
-	cmpdi	10, 12
-	beq	Do_next_1x
-
-	# 256 bits
-	xxlor	24+32, 13, 13
-
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
-
-	xxlor	23+32, 14, 14
-
-	cmpdi	10, 14
-	beq	Do_next_1x
-
-Do_next_1x:
-	vcipherlast     15, 15, 23
-
-	xxlxor		47, 47, 15
-	stxvb16x	47, 0, 9	# store output
-	addi		14, 14, 16
-	addi		9, 9, 16
-
-	vmr		28, 15
-	ppc_update_hash_1x
-
-	addi		12, 12, -16
-	addi		11, 11, 16
-	xxlor		19+32, 0, 0
-	vaddudm		30, 30, 31		# IV + counter
-	vxor		15, 30, 19		# add round key
-
-	bdnz	Next_rem_block
-
-	li	15, 0
-	std	15, 56(7)		# clear partial?
-	stxvb16x 30+32, 0, 7		# update IV
 	cmpdi	12, 0
-	beq	aes_gcm_out
-
-Final_block:
-	lwz	10, 240(6)
-	Loop_aes_middle_1x
-
-	xxlor	23+32, 10, 10
-
-	cmpdi	10, 10
-	beq	Do_final_1x
-
-	# 192 bits
-	xxlor	24+32, 11, 11
-
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
-
-	xxlor	23+32, 12, 12
-
-	cmpdi	10, 12
-	beq	Do_final_1x
-
-	# 256 bits
-	xxlor	24+32, 13, 13
-
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
+	bgt	__Loop_1x
+	blr
 
-	xxlor	23+32, 14, 14
+__Loop_1x:
+	mtctr	22
+	addi	10, 6, 144
+	vcipher	15, 15, 16
+	vcipher	15, 15, 17
+	vcipher	15, 15, 18
+	vcipher	15, 15, 19
+	vcipher	15, 15, 20
+	vcipher	15, 15, 21
+	vcipher	15, 15, 28
+	vcipher	15, 15, 29
 
-	cmpdi	10, 14
-	beq	Do_final_1x
+__Loop_aes_1state:
+	lxv	32+1, 0(10)
+	vcipher	15, 15, 1
+	addi	10, 10, 16
+	bdnz	__Loop_aes_1state
+	lxv	32+1, 0(10)		# last round key
+	lxvb16x 11, 0, 14		# load input block
+	vcipherlast 15, 15, 1
+
+	xxlxor	32+15, 32+15, 11
+	stxvb16x 32+15, 0, 9	# store output
+	addi	14, 14, 16
+	addi	9, 9, 16
 
-Do_final_1x:
-	vcipherlast     15, 15, 23
+	cmpdi	24, 0	# decrypt?
+	bne	__Encrypt_1x
+	xxlor	15+32, 11, 11
+__Encrypt_1x:
+	vxor	15, 15, 0
+	PPC_GHASH1x 0, 15
 
-	# check partial block
-	li	21, 0			# encrypt
-	ld	15, 56(7)		# partial?
-	cmpdi	15, 0
-	beq	Normal_block
-	bl	Do_partial_block
+	addi	5, 5, -16
+	addi	11, 11, 16
 
+	vadduwm 30, 30, 31		# IV + counter
+	xxlxor	32+15, 32+30, 0
+	addi	12, 12, -1
 	cmpdi	12, 0
-	ble aes_gcm_out
+	bgt	__Loop_1x
 
-	b Continue_partial_check
-
-Normal_block:
-	lxvb16x	15, 0, 14		# load last block
-	xxlxor	47, 47, 15
-
-	# create partial block mask
-	li	15, 16
-	sub	15, 15, 12		# index to the mask
-
-	vspltisb	16, -1		# first 16 bytes - 0xffff...ff
-	vspltisb	17, 0		# second 16 bytes - 0x0000...00
-	li	10, 192
-	stvx	16, 10, 1
+	stxvb16x 32+30, 0, 7		# update IV
+	stxvb16x 32+0, 0, 8		# update Xi
+	blr
+SYM_FUNC_END(aes_gcm_crypt_1x)
+
+################################################################################
+# Process a normal partial block when we come here.
+#  Compute partial mask, Load and store partial block to stack.
+#  Update partial_len and pblock.
+#  pblock is (encrypted ^ AES state) for encrypt
+#        and (input ^ AES state) for decrypt.
+#
+################################################################################
+SYM_FUNC_START_LOCAL(__Process_partial)
+
+	# create partial mask
+	vspltisb 16, -1
+	li	12, 16
+	sub	12, 12, 5
+	sldi	12, 12, 3
+	mtvsrdd	32+17, 0, 12
+	vslo	16, 16, 17		# partial block mask
+
+	lxvb16x 11, 0, 14		# load partial block
+	xxland	11, 11, 32+16
+
+	# AES crypt partial
+	xxlxor	32+15, 32+30, 0
+	lwz	23, 240(6)		# n rounds
+	addi	22, 23, -1		# loop - 1
+	mtctr	22
+	addi	10, 6, 16
+
+__Loop_aes_pstate:
+	lxv	32+1, 0(10)
+	vcipher	15, 15, 1
 	addi	10, 10, 16
-	stvx	17, 10, 1
-
-	addi	10, 1, 192
-	lxvb16x	16, 15, 10		# load partial block mask
-	xxland	47, 47, 16
-
-	vmr	28, 15
-	ppc_update_hash_1x
+	bdnz	__Loop_aes_pstate
+	lxv	32+1, 0(10)		# last round key
+	vcipherlast 15, 15, 1
 
-	# * should store only the remaining bytes.
-	bl	Write_partial_block
-
-	stxvb16x 30+32, 0, 7		# update IV
-	std	12, 56(7)		# update partial?
-	li	16, 16
+	xxlxor	32+15, 32+15, 11
+	vand	15, 15, 16
 
-	stxvb16x	32, 0, 8		# write out Xi
-	stxvb16x	32, 16, 8		# write out Xi
-	b aes_gcm_out
-
- #
- # Compute data mask
- #
-.macro GEN_MASK _mask _start _end
-	vspltisb	16, -1		# first 16 bytes - 0xffff...ff
-	vspltisb	17, 0		# second 16 bytes - 0x0000...00
-	li	10, 192
-	stxvb16x	17+32, 10, 1
-	add	10, 10, \_start
-	stxvb16x	16+32, 10, 1
-	add	10, 10, \_end
-	stxvb16x	17+32, 10, 1
-
-	addi	10, 1, 192
-	lxvb16x	\_mask, 0, 10		# load partial block mask
-.endm
+	# AES crypt output v15
+	# Write partial
+	li	10, 224
+	stxvb16x 15+32, 10, 1		# write v15 to stack
+	addi	10, 1, 223
+	addi	12, 9, -1
+        mtctr	5			# partial block len
+__Write_partial:
+        lbzu	22, 1(10)
+	stbu	22, 1(12)
+        bdnz	__Write_partial
+
+	cmpdi	24, 0			# decrypt?
+	bne	__Encrypt_partial
+	xxlor	32+15, 11, 11		# decrypt using the input block
+__Encrypt_partial:
+	#vxor	15, 15, 0		# ^ previous hash
+	#PPC_GHASH1x 0, 15
+
+	add	14, 14, 5
+	add	9, 9, 5
+	std	5, 56(7)		# update partial
+	sub	11, 11, 5
+	li	5, 0			# done last byte
 
- #
- # Handle multiple partial blocks for encrypt and decrypt
- #   operations.
- #
-SYM_FUNC_START_LOCAL(Do_partial_block)
-	add	17, 15, 5
-	cmpdi	17, 16
-	bgt	Big_block
-	GEN_MASK 18, 15, 5
-	b	_Partial
-SYM_FUNC_END(Do_partial_block)
-Big_block:
+	#
+	# Don't increase IV since this is the last partial.
+	# It should get updated in gcm_update if no more data blocks.
+	#vadduwm	30, 30, 31		# increase IV
+	stxvb16x 32+30, 0, 7		# update IV
+	li	10, 64
+	stxvb16x 32+0, 0, 8		# Update X1
+	stxvb16x 32+15, 10, 7		# Update pblock
+	blr
+SYM_FUNC_END(__Process_partial)
+
+################################################################################
+# Combine partial blocks and ghash when we come here.
+#
+# The partial block has to be shifted to the right location to encrypt/decrypt
+# and compute ghash if combing the previous partial block is needed.
+# - Compute ghash for a full block. Clear Partial_len and pblock. Update IV.
+#   Write Xi.
+# - Don't compute ghash if not full block.  gcm_update will take care of it
+#   is the last block. Update Partial_len and pblock.
+#
+################################################################################
+SYM_FUNC_START_LOCAL(__Combine_partial)
+
+	ld	12, 56(7)
+	mr	21, 5			# these bytes to be processed
+
+	li	17, 0
 	li	16, 16
-	GEN_MASK 18, 15, 16
-
-_Partial:
-	lxvb16x	17+32, 0, 14		# load last block
-	sldi	16, 15, 3
-	mtvsrdd	32+16, 0, 16
-	vsro	17, 17, 16
-	xxlxor	47, 47, 17+32
-	xxland	47, 47, 18
-
-	vxor	0, 0, 0			# clear Xi
-	vmr	28, 15
-
-	cmpdi	21, 0			# encrypt/decrypt ops?
-	beq	Skip_decrypt
-	xxland	32+28, 32+17, 18
-
-Skip_decrypt:
-
-	ppc_update_hash_1x
+	sub	22, 16, 12		# bytes to complete a block
+	sub	17, 22, 5		# remaining bytes in a block
+	cmpdi	5, 16
+	ble	__Inp_msg_less16
+	li	17, 0
+	mr	21, 22
+	b	__Combine_continue
+__Inp_msg_less16:
+	cmpd	22, 5
+	bgt	__Combine_continue
+	li	17, 0
+	mr	21, 22			# these bytes to be processed
+
+__Combine_continue:
+	# load msg and shift to the proper location and mask
+	vspltisb 16, -1
+	sldi	15, 12, 3
+	mtvsrdd	32+17, 0, 15
+	vslo	16, 16, 17
+	vsro	16, 16, 17
+	sldi	15, 17, 3
+	mtvsrdd	32+17, 0, 15
+	vsro	16, 16, 17
+	vslo	16, 16, 17		# mask
+
+	lxvb16x 32+19, 0, 14		# load partial block
+	sldi	15, 12, 3
+	mtvsrdd	32+17, 0, 15
+	vsro	19, 19, 17		# 0x00..xxxx??..??
+	sldi	15, 17, 3
+	mtvsrdd	32+17, 0, 15
+	vsro	19, 19, 17		# 0x00..xxxx
+	vslo	19, 19, 17		# shift back to form 0x00..xxxx00..00
+
+	# AES crypt partial
+	xxlxor	32+15, 32+30, 0
+	lwz	23, 240(6)	# n rounds
+	addi	22, 23, -1	# loop - 1
+	mtctr	22
+	addi	10, 6, 16
+
+__Loop_aes_cpstate:
+	lxv	32+1, 0(10)
+	vcipher	15, 15, 1
+	addi	10, 10, 16
+	bdnz	__Loop_aes_cpstate
+	lxv	32+1, 0(10)		# last round key
+	vcipherlast 15, 15, 1
 
-	li	16, 16
-	lxvb16x 32+29, 16, 8
-	vxor	0, 0, 29
-	stxvb16x 32, 0, 8		# save Xi
-	stxvb16x 32, 16, 8		# save Xi
-
-	# store partial block
-	# loop the rest of the stream if any
-	sldi	16, 15, 3
-	mtvsrdd	32+16, 0, 16
-	vslo	15, 15, 16
-	#stxvb16x 15+32, 0, 9		# last block
+	vxor	15, 15, 19
+	vand	15, 15, 16
 
-	li	16, 16
-	sub	17, 16, 15		# 16 - partial
-
-	add	16, 15, 5
-	cmpdi	16, 16
-	bgt	Larger_16
-	mr	17, 5
-Larger_16:
-
-	# write partial
-	li		10, 192
-	stxvb16x	15+32, 10, 1	# save current block
-
-	addi		10, 9, -1
-	addi		16, 1, 191
-	mtctr		17		# move partial byte count
-
-Write_last_partial:
-        lbzu		18, 1(16)
-	stbu		18, 1(10)
-        bdnz		Write_last_partial
-	# Complete loop partial
-
-	add	14, 14, 17
-	add	9, 9, 17
-	sub	12, 12, 17
-	add	11, 11, 17
-
-	add	15, 15, 5
-	cmpdi	15, 16
-	blt	Save_partial
-
-	vaddudm	30, 30, 31
-	stxvb16x 30+32, 0, 7		# update IV
-	xxlor	32+29, 0, 0
-	vxor	15, 30, 29		# IV + round key - add round key 0
-	li	15, 0
-	std	15, 56(7)		# partial done - clear
-	b	Partial_done
-Save_partial:
-	std	15, 56(7)		# partial
-
-Partial_done:
+	# AES crypt output v15
+	# Write partial
+	li	10, 224
+	stxvb16x 15+32, 10, 1		# write v15 to stack
+	addi	10, 1, 223
+	add	10, 10, 12		# add offset
+	addi	15, 9, -1
+        mtctr	21			# partial block len
+__Write_combine_partial:
+        lbzu	22, 1(10)
+	stbu	22, 1(15)
+        bdnz	__Write_combine_partial
+
+	add	14, 14, 21
+	add	11, 11, 21
+	add	9, 9, 21
+	sub	5, 5, 21
+
+	# Encrypt/Decrypt?
+	cmpdi	24, 0			# decrypt?
+	bne	__Encrypt_combine_partial
+	vmr	15, 19		# decrypt using the input block
+
+__Encrypt_combine_partial:
+	#
+	# Update partial flag and combine ghash.
+__Update_partial_ghash:
+	li	10, 64
+	lxvb16x 32+17, 10, 7		# load previous pblock
+	add	12, 12, 21		# combined pprocessed
+	vxor	15, 15, 17		# combined pblock
+
+	cmpdi	12, 16
+	beq	__Clear_partial_flag
+	std	12, 56(7)		# update partial len
+	stxvb16x 32+15, 10, 7		# Update current pblock
 	blr
 
- #
- # Write partial block
- # r9 - output
- # r12 - remaining bytes
- # v15 - partial input data
- #
-SYM_FUNC_START_LOCAL(Write_partial_block)
-	li		10, 192
-	stxvb16x	15+32, 10, 1		# last block
-
-	addi		10, 9, -1
-	addi		16, 1, 191
-
-        mtctr		12			# remaining bytes
-	li		15, 0
-
-Write_last_byte:
-        lbzu		14, 1(16)
-	stbu		14, 1(10)
-        bdnz		Write_last_byte
+__Clear_partial_flag:
+	li	12, 0
+	std	12, 56(7)
+	# Update IV and ghash here
+	vadduwm	30, 30, 31		# increase IV
+	stxvb16x 32+30, 0, 7		# update IV
+
+	# v15 either is either (input blockor encrypted)^(AES state)
+	vxor	15, 15, 0
+	PPC_GHASH1x 0, 15
+	stxvb16x 32+0, 10, 7		# update pblock for debug?
+	stxvb16x 32+0, 0, 8		# update Xi
 	blr
-SYM_FUNC_END(Write_partial_block)
+SYM_FUNC_END(__Combine_partial)
 
-aes_gcm_out:
-	# out = state
-	stxvb16x	32, 0, 8		# write out Xi
-	add	3, 11, 12		# return count
+################################################################################
+# gcm_update(iv, Xi) - compute last hash
+#
+################################################################################
+SYM_FUNC_START(gcm_update)
 
-	RESTORE_REGS
-	blr
+	ld	10, 56(3)
+	cmpdi	10, 0
+	beq	__no_update
 
- #
- # 8x Decrypt
- #
-_GLOBAL(aes_p10_gcm_decrypt)
-.align 5
+	lxvb16x	32, 0, 4	# load Xi
+	# load Hash - h^4, h^3, h^2, h
+	li	10, 32
+	lxvd2x	2+32, 10, 4	# H Poli
+	li	10, 48
+	lxvd2x	3+32, 10, 4	# Hl
+	li	10, 64
+	lxvd2x	4+32, 10, 4	# H
+	li	10, 80
+	lxvd2x	5+32, 10, 4	# Hh
+
+	addis	11, 2, permx@toc@ha
+	addi	11, 11, permx@toc@l
+	lxv	10, 0(11)	# vs10: vpermxor vector
+
+	li	9, 64
+	lxvb16x 32+6, 9, 3		# load pblock
+	vxor	6, 6, 0
+
+	vxor	1, 1, 1
+	vpmsumd	12, 3, 6		# L
+	vpmsumd	13, 4, 6		# M
+	vpmsumd	14, 5, 6		# H
+	vpmsumd	17, 12, 2		# reduction
+	vsldoi	15, 13, 1, 8		# mL
+	vsldoi	16, 1, 13, 8		# mH
+	vxor	12, 12, 15		# LL + LL
+	vxor	14, 14, 16		# HH + HH
+	xxlor	32+15, 10, 10
+	vpermxor 12, 12, 17, 15
+	vsldoi	13, 12, 12, 8		# swap
+	vpmsumd	12, 12, 2		# reduction
+	vxor	13, 13, 14
+	vxor	7, 12, 13
+
+	#vxor	0, 0, 0
+	#stxvb16x 32+0, 9, 3
+	li	10, 0
+	std	10, 56(3)
+	stxvb16x 32+7, 0, 4
+
+__no_update:
+	blr
+SYM_FUNC_END(gcm_update)
+
+################################################################################
+# aes_p10_gcm_encrypt (const void *inp, void *out, size_t len,
+#               const char *rk, unsigned char iv[16], void *Xip);
+#
+#    r3 - inp
+#    r4 - out
+#    r5 - len
+#    r6 - AES round keys
+#    r7 - iv and other data
+#    r8 - Xi, HPoli, hash keys
+#
+#    rounds is at offset 240 in rk
+#    Xi is at 0 in gcm_table (Xip).
+#
+################################################################################
+SYM_FUNC_START(aes_p10_gcm_encrypt)
+
+	cmpdi	5, 0
+	ble	__Invalid_msg_len
 
 	SAVE_REGS
-
 	LOAD_HASH_TABLE
 
 	# initialize ICB: GHASH( IV ), IV - r7
 	lxvb16x	30+32, 0, 7	# load IV  - v30
 
-	mr	12, 5		# length
-	li	11, 0		# block index
+	mr	14, 3
+	mr	9, 4
 
 	# counter 1
 	vxor	31, 31, 31
 	vspltisb 22, 1
 	vsldoi	31, 31, 22,1	# counter 1
 
-	# load round key to VSR
-	lxv	0, 0(6)
-	lxv	1, 0x10(6)
-	lxv	2, 0x20(6)
-	lxv	3, 0x30(6)
-	lxv	4, 0x40(6)
-	lxv	5, 0x50(6)
-	lxv	6, 0x60(6)
-	lxv	7, 0x70(6)
-	lxv	8, 0x80(6)
-	lxv	9, 0x90(6)
-	lxv	10, 0xa0(6)
+	addis	11, 2, permx@toc@ha
+	addi	11, 11, permx@toc@l
+	lxv	10, 0(11)	# vs10: vpermxor vector
+	li	11, 0
+
+	# load 9 round keys to VSR
+	lxv	0, 0(6)			# round key 0
+	lxv	1, 16(6)		# round key 1
+	lxv	2, 32(6)		# round key 2
+	lxv	3, 48(6)		# round key 3
+	lxv	4, 64(6)		# round key 4
+	lxv	5, 80(6)		# round key 5
+	lxv	6, 96(6)		# round key 6
+	lxv	7, 112(6)		# round key 7
+	lxv	8, 128(6)		# round key 8
 
 	# load rounds - 10 (128), 12 (192), 14 (256)
-	lwz	9,240(6)
+	lwz	23, 240(6)		# n rounds
+	li	24, 1			# encrypt
 
+__Process_encrypt:
 	#
-	# vxor	state, state, w # addroundkey
-	xxlor	32+29, 0, 0
-	vxor	15, 30, 29	# IV + round key - add round key 0
-
-	cmpdi	9, 10
-	beq	Loop_aes_gcm_8x_dec
-
-	# load 2 more round keys (v11, v12)
-	lxv	11, 0xb0(6)
-	lxv	12, 0xc0(6)
-
-	cmpdi	9, 12
-	beq	Loop_aes_gcm_8x_dec
-
-	# load 2 more round keys (v11, v12, v13, v14)
-	lxv	13, 0xd0(6)
-	lxv	14, 0xe0(6)
-	cmpdi	9, 14
-	beq	Loop_aes_gcm_8x_dec
+	# Process different blocks
+	#
+	ld	12, 56(7)
+	cmpdi	12, 0
+	bgt	__Do_combine_enc
+	cmpdi	5, 128
+	blt	__Process_more_enc
+
+#
+# Process 8x AES/GCM blocks
+#
+__Process_8x_enc:
+	# 8x blcoks
+	li	10, 128
+	divdu	12, 5, 10	# n 128 bytes-blocks
 
-	b	aes_gcm_out
+	addi	12, 12, -1	# loop - 1
 
-.align 5
-Loop_aes_gcm_8x_dec:
-	mr	14, 3
-	mr	9, 4
+	vmr	15, 30		# first state: IV
+	vadduwm	16, 15, 31	# state + counter
+	vadduwm	17, 16, 31
+	vadduwm	18, 17, 31
+	vadduwm	19, 18, 31
+	vadduwm	20, 19, 31
+	vadduwm	21, 20, 31
+	vadduwm	22, 21, 31
+	xxlor	9, 32+22, 32+22	# save last state
 
-	#
-	# check partial block
-	#
-Continue_partial_check_dec:
-	ld	15, 56(7)
-	cmpdi	15, 0
-	beq	Continue_dec
-	bgt	Final_block_dec
-	cmpdi	15, 16
-	blt	Final_block_dec
-
-Continue_dec:
-	# n blcoks
-	li	10, 128
-	divdu	10, 12, 10	# n 128 bytes-blocks
-	cmpdi	10, 0
-	beq	Loop_last_block_dec
-
-	vaddudm	30, 30, 31	# IV + counter
-	vxor	16, 30, 29
-	vaddudm	30, 30, 31
-	vxor	17, 30, 29
-	vaddudm	30, 30, 31
-	vxor	18, 30, 29
-	vaddudm	30, 30, 31
-	vxor	19, 30, 29
-	vaddudm	30, 30, 31
-	vxor	20, 30, 29
-	vaddudm	30, 30, 31
-	vxor	21, 30, 29
-	vaddudm	30, 30, 31
-	vxor	22, 30, 29
-
-	mtctr	10
+	# vxor  state, state, w # addroundkey
+	xxlor	32+29, 0, 0
+        vxor    15, 15, 29      # IV + round key - add round key 0
+	vxor	16, 16, 29
+	vxor	17, 17, 29
+	vxor	18, 18, 29
+	vxor	19, 19, 29
+	vxor	20, 20, 29
+	vxor	21, 21, 29
+	vxor	22, 22, 29
 
 	li	15, 16
 	li	16, 32
@@ -1217,305 +735,502 @@ Continue_dec:
 	li	20, 96
 	li	21, 112
 
-	lwz	10, 240(6)
-
-Loop_8x_block_dec:
-
-	lxvb16x		15, 0, 14	# load block
-	lxvb16x		16, 15, 14	# load block
-	lxvb16x		17, 16, 14	# load block
-	lxvb16x		18, 17, 14	# load block
-	lxvb16x		19, 18, 14	# load block
-	lxvb16x		20, 19, 14	# load block
-	lxvb16x		21, 20, 14	# load block
-	lxvb16x		22, 21, 14	# load block
-	addi		14, 14, 128
-
-	Loop_aes_middle8x
-
-	xxlor	23+32, 10, 10
-
-	cmpdi	10, 10
-	beq	Do_next_ghash_dec
-
-	# 192 bits
-	xxlor	24+32, 11, 11
-
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-
-	vcipher	15, 15, 24
-	vcipher	16, 16, 24
-	vcipher	17, 17, 24
-	vcipher	18, 18, 24
-	vcipher	19, 19, 24
-	vcipher	20, 20, 24
-	vcipher	21, 21, 24
-	vcipher	22, 22, 24
-
-	xxlor	23+32, 12, 12
-
-	cmpdi	10, 12
-	beq	Do_next_ghash_dec
-
-	# 256 bits
-	xxlor	24+32, 13, 13
-
-	vcipher	15, 15, 23
-	vcipher	16, 16, 23
-	vcipher	17, 17, 23
-	vcipher	18, 18, 23
-	vcipher	19, 19, 23
-	vcipher	20, 20, 23
-	vcipher	21, 21, 23
-	vcipher	22, 22, 23
-
-	vcipher	15, 15, 24
-	vcipher	16, 16, 24
-	vcipher	17, 17, 24
-	vcipher	18, 18, 24
-	vcipher	19, 19, 24
-	vcipher	20, 20, 24
-	vcipher	21, 21, 24
-	vcipher	22, 22, 24
-
-	xxlor	23+32, 14, 14
-
-	cmpdi	10, 14
-	beq	Do_next_ghash_dec
-	b	aes_gcm_out
+	#
+	# Pre-compute first 8 AES state and leave 1/3/5 more rounds
+	# for the loop.
+	#
+	addi	22, 23, -9		# process 8 keys
+	mtctr	22			# AES key loop
+	addi	10, 6, 144
 
-Do_next_ghash_dec:
+	LOOP_8AES_STATE			# process 8 AES keys
 
-	#
-	# last round
-	vcipherlast     15, 15, 23
-	vcipherlast     16, 16, 23
-
-	xxlxor		47, 47, 15
-	stxvb16x        47, 0, 9	# store output
-	xxlxor		48, 48, 16
-	stxvb16x        48, 15, 9	# store output
-
-	vcipherlast     17, 17, 23
-	vcipherlast     18, 18, 23
-
-	xxlxor		49, 49, 17
-	stxvb16x        49, 16, 9	# store output
-	xxlxor		50, 50, 18
-	stxvb16x        50, 17, 9	# store output
-
-	vcipherlast     19, 19, 23
-	vcipherlast     20, 20, 23
-
-	xxlxor		51, 51, 19
-	stxvb16x        51, 18, 9	# store output
-	xxlxor		52, 52, 20
-	stxvb16x        52, 19, 9	# store output
-
-	vcipherlast     21, 21, 23
-	vcipherlast     22, 22, 23
-
-	xxlxor		53, 53, 21
-	stxvb16x        53, 20, 9	# store output
-	xxlxor		54, 54, 22
-	stxvb16x        54, 21, 9	# store output
-
-	addi		9, 9, 128
-
-	xxlor           15+32, 15, 15
-	xxlor           16+32, 16, 16
-	xxlor           17+32, 17, 17
-	xxlor           18+32, 18, 18
-	xxlor           19+32, 19, 19
-	xxlor           20+32, 20, 20
-	xxlor           21+32, 21, 21
-	xxlor           22+32, 22, 22
+__PreLoop_aes_state:
+	lxv	32+1, 0(10)		# round key
+	AES_CIPHER_8x vcipher 15 1
+	addi	10, 10, 16
+	bdnz	__PreLoop_aes_state
+	lxv	32+1, 0(10)		# last round key (v1)
+
+	cmpdi	12, 0			# Only one loop (8 block)
+	beq	__Finish_ghash
+
+#
+# Loop 8x blocks and compute ghash
+#
+__Loop_8x_block_enc:
+	vcipherlast     15, 15, 1
+	vcipherlast     16, 16, 1
+	vcipherlast     17, 17, 1
+	vcipherlast     18, 18, 1
+	vcipherlast     19, 19, 1
+	vcipherlast     20, 20, 1
+	vcipherlast     21, 21, 1
+	vcipherlast     22, 22, 1
+
+	lxvb16x	32+23, 0, 14	# load block
+	lxvb16x	32+24, 15, 14	# load block
+	lxvb16x	32+25, 16, 14	# load block
+	lxvb16x	32+26, 17, 14	# load block
+	lxvb16x	32+27, 18, 14	# load block
+	lxvb16x	32+28, 19, 14	# load block
+	lxvb16x	32+29, 20, 14	# load block
+	lxvb16x	32+30, 21, 14	# load block
+	addi	14, 14, 128
+
+	vxor	15, 15, 23
+	vxor	16, 16, 24
+	vxor	17, 17, 25
+	vxor	18, 18, 26
+	vxor	19, 19, 27
+	vxor	20, 20, 28
+	vxor	21, 21, 29
+	vxor	22, 22, 30
+
+	stxvb16x 47, 0, 9	# store output
+	stxvb16x 48, 15, 9	# store output
+	stxvb16x 49, 16, 9	# store output
+	stxvb16x 50, 17, 9	# store output
+	stxvb16x 51, 18, 9	# store output
+	stxvb16x 52, 19, 9	# store output
+	stxvb16x 53, 20, 9	# store output
+	stxvb16x 54, 21, 9	# store output
+	addi	9, 9, 128
 
 	# ghash here
-	ppc_aes_gcm_ghash2_4x
-
-	xxlor	27+32, 0, 0
-	vaddudm 30, 30, 31		# IV + counter
-	vmr	29, 30
-	vxor    15, 30, 27		# add round key
-	vaddudm 30, 30, 31
-	vxor    16, 30, 27
-	vaddudm 30, 30, 31
-	vxor    17, 30, 27
-	vaddudm 30, 30, 31
-	vxor    18, 30, 27
-	vaddudm 30, 30, 31
-	vxor    19, 30, 27
-	vaddudm 30, 30, 31
-	vxor    20, 30, 27
-	vaddudm 30, 30, 31
-	vxor    21, 30, 27
-	vaddudm 30, 30, 31
-	vxor    22, 30, 27
-
-	addi    12, 12, -128
+	vxor	15, 15, 0
+	PPC_GHASH4x 0, 15, 16, 17, 18
+
+	vxor	19, 19, 0
+	PPC_GHASH4x 0, 19, 20, 21, 22
+
+	xxlor	32+15, 9, 9		# last state
+	vadduwm 15, 15, 31		# state + counter
+	vadduwm 16, 15, 31
+	vadduwm 17, 16, 31
+	vadduwm 18, 17, 31
+	vadduwm 19, 18, 31
+	vadduwm 20, 19, 31
+	vadduwm 21, 20, 31
+	vadduwm 22, 21, 31
+	xxlor	9, 32+22, 32+22		# save last state
+
+	xxlor	32+27, 0, 0		# restore roundkey 0
+        vxor    15, 15, 27		# IV + round key - add round key 0
+	vxor	16, 16, 27
+	vxor	17, 17, 27
+	vxor	18, 18, 27
+	vxor	19, 19, 27
+	vxor	20, 20, 27
+	vxor	21, 21, 27
+	vxor	22, 22, 27
+
+	addi    5, 5, -128
 	addi    11, 11, 128
 
-	bdnz	Loop_8x_block_dec
-
-	vmr	30, 29
-	stxvb16x 30+32, 0, 7		# update IV
-
-Loop_last_block_dec:
-	cmpdi   12, 0
-	beq     aes_gcm_out
-
-	# loop last few blocks
-	li      10, 16
-	divdu   10, 12, 10
-
-	mtctr   10
-
-	lwz	10, 240(6)
-
-	cmpdi   12, 16
-	blt     Final_block_dec
-
-Next_rem_block_dec:
-	lxvb16x 15, 0, 14		# load block
-
-	Loop_aes_middle_1x
-
-	xxlor	23+32, 10, 10
+	LOOP_8AES_STATE			# process 8 AES keys
+	mtctr	22			# AES key loop
+	addi	10, 6, 144
+__LastLoop_aes_state:
+	lxv	32+1, 0(10)		# round key
+	AES_CIPHER_8x vcipher 15 1
+	addi	10, 10, 16
+	bdnz	__LastLoop_aes_state
+	lxv	32+1, 0(10)		# last round key (v1)
 
-	cmpdi	10, 10
-	beq	Do_next_1x_dec
+	addi	12, 12, -1
+	cmpdi	12, 0
+	bne	__Loop_8x_block_enc
+
+__Finish_ghash:
+	vcipherlast     15, 15, 1
+	vcipherlast     16, 16, 1
+	vcipherlast     17, 17, 1
+	vcipherlast     18, 18, 1
+	vcipherlast     19, 19, 1
+	vcipherlast     20, 20, 1
+	vcipherlast     21, 21, 1
+	vcipherlast     22, 22, 1
+
+	lxvb16x	32+23, 0, 14	# load block
+	lxvb16x	32+24, 15, 14	# load block
+	lxvb16x	32+25, 16, 14	# load block
+	lxvb16x	32+26, 17, 14	# load block
+	lxvb16x	32+27, 18, 14	# load block
+	lxvb16x	32+28, 19, 14	# load block
+	lxvb16x	32+29, 20, 14	# load block
+	lxvb16x	32+30, 21, 14	# load block
+	addi	14, 14, 128
+
+	vxor	15, 15, 23
+	vxor	16, 16, 24
+	vxor	17, 17, 25
+	vxor	18, 18, 26
+	vxor	19, 19, 27
+	vxor	20, 20, 28
+	vxor	21, 21, 29
+	vxor	22, 22, 30
+
+	stxvb16x 47, 0, 9	# store output
+	stxvb16x 48, 15, 9	# store output
+	stxvb16x 49, 16, 9	# store output
+	stxvb16x 50, 17, 9	# store output
+	stxvb16x 51, 18, 9	# store output
+	stxvb16x 52, 19, 9	# store output
+	stxvb16x 53, 20, 9	# store output
+	stxvb16x 54, 21, 9	# store output
+	addi	9, 9, 128
+
+	vxor	15, 15, 0
+	PPC_GHASH4x 0, 15, 16, 17, 18
+
+	vxor	19, 19, 0
+	PPC_GHASH4x 0, 19, 20, 21, 22
+
+	xxlor	30+32, 9, 9		# last ctr
+	vadduwm	30, 30, 31		# increase ctr
+	stxvb16x 32+30, 0, 7		# update IV
+	stxvb16x 32+0, 0, 8		# update Xi
+
+	addi    5, 5, -128
+	addi    11, 11, 128
 
-	# 192 bits
-	xxlor	24+32, 11, 11
+	#
+	# Done 8x blocks
+	#
 
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
+	cmpdi   5, 0
+	beq     aes_gcm_out
 
-	xxlor	23+32, 12, 12
+__Process_more_enc:
+	li	24, 1			# encrypt
+	bl	aes_gcm_crypt_1x
+	cmpdi   5, 0
+	beq     aes_gcm_out
 
-	cmpdi	10, 12
-	beq	Do_next_1x_dec
+	bl	__Process_partial
+	cmpdi   5, 0
+	beq     aes_gcm_out
+__Do_combine_enc:
+	bl	__Combine_partial
+	cmpdi	5, 0
+	bgt	__Process_encrypt
+	b	aes_gcm_out
 
-	# 256 bits
-	xxlor	24+32, 13, 13
+SYM_FUNC_END(aes_p10_gcm_encrypt)
 
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
+################################################################################
+# aes_p10_gcm_decrypt (const void *inp, void *out, size_t len,
+#               const char *rk, unsigned char iv[16], void *Xip);
+# 8x Decrypt
+#
+################################################################################
+SYM_FUNC_START(aes_p10_gcm_decrypt)
 
-	xxlor	23+32, 14, 14
+	cmpdi	5, 0
+	ble	__Invalid_msg_len
 
-	cmpdi	10, 14
-	beq	Do_next_1x_dec
+	SAVE_REGS
+	LOAD_HASH_TABLE
 
-Do_next_1x_dec:
-	vcipherlast     15, 15, 23
+	# initialize ICB: GHASH( IV ), IV - r7
+	lxvb16x	30+32, 0, 7	# load IV  - v30
 
-	xxlxor		47, 47, 15
-	stxvb16x	47, 0, 9	# store output
-	addi		14, 14, 16
-	addi		9, 9, 16
+	mr	14, 3
+	mr	9, 4
 
-	xxlor           28+32, 15, 15
-	#vmr		28, 15
-	ppc_update_hash_1x
+	# counter 1
+	vxor	31, 31, 31
+	vspltisb 22, 1
+	vsldoi	31, 31, 22,1	# counter 1
 
-	addi		12, 12, -16
-	addi		11, 11, 16
-	xxlor		19+32, 0, 0
-	vaddudm		30, 30, 31		# IV + counter
-	vxor		15, 30, 19		# add round key
+	addis	11, 2, permx@toc@ha
+	addi	11, 11, permx@toc@l
+	lxv	10, 0(11)	# vs10: vpermxor vector
+	li	11, 0
+
+	# load 9 round keys to VSR
+	lxv	0, 0(6)			# round key 0
+	lxv	1, 16(6)		# round key 1
+	lxv	2, 32(6)		# round key 2
+	lxv	3, 48(6)		# round key 3
+	lxv	4, 64(6)		# round key 4
+	lxv	5, 80(6)		# round key 5
+	lxv	6, 96(6)		# round key 6
+	lxv	7, 112(6)		# round key 7
+	lxv	8, 128(6)		# round key 8
 
-	bdnz	Next_rem_block_dec
+	# load rounds - 10 (128), 12 (192), 14 (256)
+	lwz	23, 240(6)		# n rounds
+	li	24, 0			# decrypt
 
-	li	15, 0
-	std	15, 56(7)		# clear partial?
-	stxvb16x 30+32, 0, 7		# update IV
+__Process_decrypt:
+	#
+	# Process different blocks
+	#
+	ld	12, 56(7)
 	cmpdi	12, 0
-	beq	aes_gcm_out
-
-Final_block_dec:
-	lwz	10, 240(6)
-	Loop_aes_middle_1x
-
-	xxlor	23+32, 10, 10
-
-	cmpdi	10, 10
-	beq	Do_final_1x_dec
+	bgt	__Do_combine_dec
+	cmpdi	5, 128
+	blt	__Process_more_dec
+
+#
+# Process 8x AES/GCM blocks
+#
+__Process_8x_dec:
+	# 8x blcoks
+	li	10, 128
+	divdu	12, 5, 10	# n 128 bytes-blocks
 
-	# 192 bits
-	xxlor	24+32, 11, 11
+	addi	12, 12, -1	# loop - 1
 
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
+	vmr	15, 30		# first state: IV
+	vadduwm	16, 15, 31	# state + counter
+	vadduwm	17, 16, 31
+	vadduwm	18, 17, 31
+	vadduwm	19, 18, 31
+	vadduwm	20, 19, 31
+	vadduwm	21, 20, 31
+	vadduwm	22, 21, 31
+	xxlor	9, 32+22, 32+22	# save last state
 
-	xxlor	23+32, 12, 12
+	# vxor  state, state, w # addroundkey
+	xxlor	32+29, 0, 0
+        vxor    15, 15, 29      # IV + round key - add round key 0
+	vxor	16, 16, 29
+	vxor	17, 17, 29
+	vxor	18, 18, 29
+	vxor	19, 19, 29
+	vxor	20, 20, 29
+	vxor	21, 21, 29
+	vxor	22, 22, 29
 
-	cmpdi	10, 12
-	beq	Do_final_1x_dec
+	li	15, 16
+	li	16, 32
+	li	17, 48
+	li	18, 64
+	li	19, 80
+	li	20, 96
+	li	21, 112
 
-	# 256 bits
-	xxlor	24+32, 13, 13
+	#
+	# Pre-compute first 8 AES state and leave 1/3/5 more rounds
+	# for the loop.
+	#
+	addi	22, 23, -9		# process 8 keys
+	mtctr	22			# AES key loop
+	addi	10, 6, 144
 
-	vcipher	15, 15, 23
-	vcipher	15, 15, 24
+	LOOP_8AES_STATE			# process 8 AES keys
 
-	xxlor	23+32, 14, 14
+__PreLoop_aes_state_dec:
+	lxv	32+1, 0(10)		# round key
+	AES_CIPHER_8x vcipher 15 1
+	addi	10, 10, 16
+	bdnz	__PreLoop_aes_state_dec
+	lxv	32+1, 0(10)		# last round key (v1)
+
+	cmpdi	12, 0			# Only one loop (8 block)
+	beq	__Finish_ghash_dec
+
+#
+# Loop 8x blocks and compute ghash
+#
+__Loop_8x_block_dec:
+	vcipherlast     15, 15, 1
+	vcipherlast     16, 16, 1
+	vcipherlast     17, 17, 1
+	vcipherlast     18, 18, 1
+	vcipherlast     19, 19, 1
+	vcipherlast     20, 20, 1
+	vcipherlast     21, 21, 1
+	vcipherlast     22, 22, 1
+
+	lxvb16x	32+23, 0, 14	# load block
+	lxvb16x	32+24, 15, 14	# load block
+	lxvb16x	32+25, 16, 14	# load block
+	lxvb16x	32+26, 17, 14	# load block
+	lxvb16x	32+27, 18, 14	# load block
+	lxvb16x	32+28, 19, 14	# load block
+	lxvb16x	32+29, 20, 14	# load block
+	lxvb16x	32+30, 21, 14	# load block
+	addi	14, 14, 128
+
+	vxor	15, 15, 23
+	vxor	16, 16, 24
+	vxor	17, 17, 25
+	vxor	18, 18, 26
+	vxor	19, 19, 27
+	vxor	20, 20, 28
+	vxor	21, 21, 29
+	vxor	22, 22, 30
+
+	stxvb16x 47, 0, 9	# store output
+	stxvb16x 48, 15, 9	# store output
+	stxvb16x 49, 16, 9	# store output
+	stxvb16x 50, 17, 9	# store output
+	stxvb16x 51, 18, 9	# store output
+	stxvb16x 52, 19, 9	# store output
+	stxvb16x 53, 20, 9	# store output
+	stxvb16x 54, 21, 9	# store output
+
+	addi	9, 9, 128
+
+	vmr	15, 23
+	vmr	16, 24
+	vmr	17, 25
+	vmr	18, 26
+	vmr	19, 27
+	vmr	20, 28
+	vmr	21, 29
+	vmr	22, 30
 
-	cmpdi	10, 14
-	beq	Do_final_1x_dec
+	# ghash here
+	vxor	15, 15, 0
+	PPC_GHASH4x 0, 15, 16, 17, 18
+
+	vxor	19, 19, 0
+	PPC_GHASH4x 0, 19, 20, 21, 22
+
+	xxlor	32+15, 9, 9		# last state
+	vadduwm 15, 15, 31		# state + counter
+	vadduwm 16, 15, 31
+	vadduwm 17, 16, 31
+	vadduwm 18, 17, 31
+	vadduwm 19, 18, 31
+	vadduwm 20, 19, 31
+	vadduwm 21, 20, 31
+	vadduwm 22, 21, 31
+	xxlor	9, 32+22, 32+22		# save last state
+
+	xxlor	32+27, 0, 0		# restore roundkey 0
+        vxor    15, 15, 27		# IV + round key - add round key 0
+	vxor	16, 16, 27
+	vxor	17, 17, 27
+	vxor	18, 18, 27
+	vxor	19, 19, 27
+	vxor	20, 20, 27
+	vxor	21, 21, 27
+	vxor	22, 22, 27
+
+	addi    5, 5, -128
+	addi    11, 11, 128
 
-Do_final_1x_dec:
-	vcipherlast     15, 15, 23
+	LOOP_8AES_STATE			# process 8 AES keys
+	mtctr	22			# AES key loop
+	addi	10, 6, 144
+__LastLoop_aes_state_dec:
+	lxv	32+1, 0(10)		# round key
+	AES_CIPHER_8x vcipher 15 1
+	addi	10, 10, 16
+	bdnz	__LastLoop_aes_state_dec
+	lxv	32+1, 0(10)		# last round key (v1)
 
-	# check partial block
-	li	21, 1			# decrypt
-	ld	15, 56(7)		# partial?
-	cmpdi	15, 0
-	beq	Normal_block_dec
-	bl	Do_partial_block
+	addi	12, 12, -1
 	cmpdi	12, 0
-	ble aes_gcm_out
-
-	b Continue_partial_check_dec
+	bne	__Loop_8x_block_dec
+
+__Finish_ghash_dec:
+	vcipherlast     15, 15, 1
+	vcipherlast     16, 16, 1
+	vcipherlast     17, 17, 1
+	vcipherlast     18, 18, 1
+	vcipherlast     19, 19, 1
+	vcipherlast     20, 20, 1
+	vcipherlast     21, 21, 1
+	vcipherlast     22, 22, 1
+
+	lxvb16x	32+23, 0, 14	# load block
+	lxvb16x	32+24, 15, 14	# load block
+	lxvb16x	32+25, 16, 14	# load block
+	lxvb16x	32+26, 17, 14	# load block
+	lxvb16x	32+27, 18, 14	# load block
+	lxvb16x	32+28, 19, 14	# load block
+	lxvb16x	32+29, 20, 14	# load block
+	lxvb16x	32+30, 21, 14	# load block
+	addi	14, 14, 128
+
+	vxor	15, 15, 23
+	vxor	16, 16, 24
+	vxor	17, 17, 25
+	vxor	18, 18, 26
+	vxor	19, 19, 27
+	vxor	20, 20, 28
+	vxor	21, 21, 29
+	vxor	22, 22, 30
+
+	stxvb16x 47, 0, 9	# store output
+	stxvb16x 48, 15, 9	# store output
+	stxvb16x 49, 16, 9	# store output
+	stxvb16x 50, 17, 9	# store output
+	stxvb16x 51, 18, 9	# store output
+	stxvb16x 52, 19, 9	# store output
+	stxvb16x 53, 20, 9	# store output
+	stxvb16x 54, 21, 9	# store output
+	addi	9, 9, 128
+
+	#vmr	15, 23
+	vxor	15, 23, 0
+	vmr	16, 24
+	vmr	17, 25
+	vmr	18, 26
+	vmr	19, 27
+	vmr	20, 28
+	vmr	21, 29
+	vmr	22, 30
+
+	#vxor	15, 15, 0
+	PPC_GHASH4x 0, 15, 16, 17, 18
+
+	vxor	19, 19, 0
+	PPC_GHASH4x 0, 19, 20, 21, 22
+
+	xxlor	30+32, 9, 9		# last ctr
+	vadduwm	30, 30, 31		# increase ctr
+	stxvb16x 32+30, 0, 7		# update IV
+	stxvb16x 32+0, 0, 8		# update Xi
+
+	addi    5, 5, -128
+	addi    11, 11, 128
 
-Normal_block_dec:
-	lxvb16x	15, 0, 14		# load last block
-	xxlxor	47, 47, 15
+	#
+	# Done 8x blocks
+	#
 
-	# create partial block mask
-	li	15, 16
-	sub	15, 15, 12		# index to the mask
+	cmpdi   5, 0
+	beq     aes_gcm_out
 
-	vspltisb	16, -1		# first 16 bytes - 0xffff...ff
-	vspltisb	17, 0		# second 16 bytes - 0x0000...00
-	li	10, 192
-	stvx	16, 10, 1
-	addi	10, 10, 16
-	stvx	17, 10, 1
+__Process_more_dec:
+	li	24, 0			# decrypt
+	bl	aes_gcm_crypt_1x
+	cmpdi   5, 0
+	beq     aes_gcm_out
 
-	addi	10, 1, 192
-	lxvb16x	16, 15, 10		# load partial block mask
-	xxland	47, 47, 16
+	bl	__Process_partial
+	cmpdi   5, 0
+	beq     aes_gcm_out
+__Do_combine_dec:
+	bl	__Combine_partial
+	cmpdi	5, 0
+	bgt	__Process_decrypt
+	b	aes_gcm_out
+SYM_FUNC_END(aes_p10_gcm_decrypt)
 
-	xxland	32+28, 15, 16
-	#vmr	28, 15
-	ppc_update_hash_1x
+SYM_FUNC_START_LOCAL(aes_gcm_out)
 
-	# * should store only the remaining bytes.
-	bl	Write_partial_block
+	mr	3, 11			# return count
 
-	stxvb16x 30+32, 0, 7		# update IV
-	std	12, 56(7)		# update partial?
-	li	16, 16
+	RESTORE_REGS
+	blr
 
-	stxvb16x	32, 0, 8		# write out Xi
-	stxvb16x	32, 16, 8		# write out Xi
-	b aes_gcm_out
+__Invalid_msg_len:
+	li	3, 0
+	blr
+SYM_FUNC_END(aes_gcm_out)
+
+SYM_DATA_START_LOCAL(PERMX)
+.align 4
+# for vector permute and xor
+permx:
+.long 0x4c5d6e7f, 0x08192a3b, 0xc4d5e6f7, 0x8091a2b3
+SYM_DATA_END(permx)
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 5eb7631355a1..db481b336bca 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -35,42 +35,21 @@ struct dyn_arch_ftrace {
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
 #define ftrace_init_nop ftrace_init_nop
 
-struct ftrace_regs {
-	struct pt_regs regs;
-};
+#include <linux/ftrace_regs.h>
 
 static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
 {
 	/* We clear regs.msr in ftrace_call */
-	return fregs->regs.msr ? &fregs->regs : NULL;
+	return arch_ftrace_regs(fregs)->regs.msr ? &arch_ftrace_regs(fregs)->regs : NULL;
 }
 
 static __always_inline void
 ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
 				    unsigned long ip)
 {
-	regs_set_return_ip(&fregs->regs, ip);
+	regs_set_return_ip(&arch_ftrace_regs(fregs)->regs, ip);
 }
 
-static __always_inline unsigned long
-ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs)
-{
-	return instruction_pointer(&fregs->regs);
-}
-
-#define ftrace_regs_get_argument(fregs, n) \
-	regs_get_kernel_argument(&(fregs)->regs, n)
-#define ftrace_regs_get_stack_pointer(fregs) \
-	kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
-	regs_return_value(&(fregs)->regs)
-#define ftrace_regs_set_return_value(fregs, ret) \
-	regs_set_return_value(&(fregs)->regs, ret)
-#define ftrace_override_function_with_return(fregs) \
-	override_function_with_return(&(fregs)->regs)
-#define ftrace_regs_query_register_offset(name) \
-	regs_query_register_offset(name)
-
 struct ftrace_ops;
 
 #define ftrace_graph_func ftrace_graph_func
@@ -159,7 +138,7 @@ unsigned long ftrace_call_adjust(unsigned long addr);
  */
 static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr)
 {
-	struct pt_regs *regs = &fregs->regs;
+	struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs;
 
 	regs->orig_gpr3 = addr;
 }
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 52e1b1d15ff6..fd92ac450169 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -970,18 +970,6 @@ static inline void * phys_to_virt(unsigned long address)
 #define phys_to_virt phys_to_virt
 
 /*
- * Change "struct page" to physical address.
- */
-static inline phys_addr_t page_to_phys(struct page *page)
-{
-	unsigned long pfn = page_to_pfn(page);
-
-	WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && !pfn_valid(pfn));
-
-	return PFN_PHYS(pfn);
-}
-
-/*
  * 32 bits still uses virt_to_bus() for its implementation of DMA
  * mappings se we have to keep it defined here. We also have some old
  * drivers (shame shame shame) that use bus_to_virt() and haven't been
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index 4525a9c68260..dfe2e5ad3b21 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -21,7 +21,7 @@
 #include <linux/percpu.h>
 #include <linux/module.h>
 #include <asm/probes.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 
 #ifdef CONFIG_KPROBES
 #define  __ARCH_WANT_KPROBES_INSN_SLOT
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 83d0a4fc5f75..af9a2628d1df 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -21,8 +21,7 @@
  * page size. When using 64K pages however, whether we are really supporting
  * 64K pages in HW or not is irrelevant to those definitions.
  */
-#define PAGE_SHIFT		CONFIG_PAGE_SHIFT
-#define PAGE_SIZE		(ASM_CONST(1) << PAGE_SHIFT)
+#include <vdso/page.h>
 
 #ifndef __ASSEMBLY__
 #ifndef CONFIG_HUGETLB_PAGE
@@ -42,13 +41,6 @@ extern unsigned int hpage_shift;
 #endif
 
 /*
- * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
- * assign PAGE_MASK to a larger type it gets extended the way we want
- * (i.e. with 1s in the high bits)
- */
-#define PAGE_MASK      (~((1 << PAGE_SHIFT) - 1))
-
-/*
  * KERNELBASE is the virtual address of the start of the kernel, it's often
  * the same as PAGE_OFFSET, but _might not be_.
  *
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 5995614e9062..af0f46e2373b 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -102,8 +102,8 @@ struct power_pmu {
 int __init register_power_pmu(struct power_pmu *pmu);
 
 struct pt_regs;
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
 extern unsigned long int read_bhrb(int n);
 
 /*
@@ -111,7 +111,7 @@ extern unsigned long int read_bhrb(int n);
  * if we have hardware PMU support.
  */
 #ifdef CONFIG_PPC_PERF_CTRS
-#define perf_misc_flags(regs)	perf_misc_flags(regs)
+#define perf_arch_misc_flags(regs)	perf_arch_misc_flags(regs)
 #endif
 
 /*
diff --git a/arch/powerpc/include/asm/systemcfg.h b/arch/powerpc/include/asm/systemcfg.h
new file mode 100644
index 000000000000..2f9b1d6a5c98
--- /dev/null
+++ b/arch/powerpc/include/asm/systemcfg.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _SYSTEMCFG_H
+#define _SYSTEMCFG_H
+
+/*
+ * Copyright (C) 2002 Peter Bergner <bergner@vnet.ibm.com>, IBM
+ * Copyright (C) 2005 Benjamin Herrenschmidy <benh@kernel.crashing.org>,
+ * 		      IBM Corp.
+ */
+
+#ifdef CONFIG_PPC64
+
+/*
+ * If the major version changes we are incompatible.
+ * Minor version changes are a hint.
+ */
+#define SYSTEMCFG_MAJOR 1
+#define SYSTEMCFG_MINOR 1
+
+#include <linux/types.h>
+
+struct systemcfg {
+	__u8  eye_catcher[16];		/* Eyecatcher: SYSTEMCFG:PPC64	0x00 */
+	struct {			/* Systemcfg version numbers	     */
+		__u32 major;		/* Major number			0x10 */
+		__u32 minor;		/* Minor number			0x14 */
+	} version;
+
+	/* Note about the platform flags: it now only contains the lpar
+	 * bit. The actual platform number is dead and buried
+	 */
+	__u32 platform;			/* Platform flags		0x18 */
+	__u32 processor;		/* Processor type		0x1C */
+	__u64 processorCount;		/* # of physical processors	0x20 */
+	__u64 physicalMemorySize;	/* Size of real memory(B)	0x28 */
+	__u64 tb_orig_stamp;		/* (NU) Timebase at boot	0x30 */
+	__u64 tb_ticks_per_sec;		/* Timebase tics / sec		0x38 */
+	__u64 tb_to_xs;			/* (NU) Inverse of TB to 2^20	0x40 */
+	__u64 stamp_xsec;		/* (NU)				0x48 */
+	__u64 tb_update_count;		/* (NU) Timebase atomicity ctr	0x50 */
+	__u32 tz_minuteswest;		/* (NU) Min. west of Greenwich	0x58 */
+	__u32 tz_dsttime;		/* (NU) Type of dst correction	0x5C */
+	__u32 dcache_size;		/* L1 d-cache size		0x60 */
+	__u32 dcache_line_size;		/* L1 d-cache line size		0x64 */
+	__u32 icache_size;		/* L1 i-cache size		0x68 */
+	__u32 icache_line_size;		/* L1 i-cache line size		0x6C */
+};
+
+extern struct systemcfg *systemcfg;
+
+#endif /* CONFIG_PPC64 */
+#endif /* _SYSTEMCFG_H */
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/text-patching.h
index e7f14720f630..e7f14720f630 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/text-patching.h
diff --git a/arch/powerpc/include/asm/vdso/vsyscall.h b/arch/powerpc/include/asm/vdso/vsyscall.h
index 92f480d8cc6d..48560a119559 100644
--- a/arch/powerpc/include/asm/vdso/vsyscall.h
+++ b/arch/powerpc/include/asm/vdso/vsyscall.h
@@ -4,12 +4,8 @@
 
 #ifndef __ASSEMBLY__
 
-#include <linux/timekeeper_internal.h>
 #include <asm/vdso_datapage.h>
 
-/*
- * Update the vDSO data page to keep in sync with kernel timekeeping.
- */
 static __always_inline
 struct vdso_data *__arch_get_k_vdso_data(void)
 {
diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index 33069afccb3c..a202f5b63479 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -9,29 +9,6 @@
  * 		      IBM Corp.
  */
 
-
-/*
- * Note about this structure:
- *
- * This structure was historically called systemcfg and exposed to
- * userland via /proc/ppc64/systemcfg. Unfortunately, this became an
- * ABI issue as some proprietary software started relying on being able
- * to mmap() it, thus we have to keep the base layout at least for a
- * few kernel versions.
- *
- * However, since ppc32 doesn't suffer from this backward handicap,
- * a simpler version of the data structure is used there with only the
- * fields actually used by the vDSO.
- *
- */
-
-/*
- * If the major version changes we are incompatible.
- * Minor version changes are a hint.
- */
-#define SYSTEMCFG_MAJOR 1
-#define SYSTEMCFG_MINOR 1
-
 #ifndef __ASSEMBLY__
 
 #include <linux/unistd.h>
@@ -40,41 +17,10 @@
 
 #define SYSCALL_MAP_SIZE      ((NR_syscalls + 31) / 32)
 
-/*
- * So here is the ppc64 backward compatible version
- */
-
 #ifdef CONFIG_PPC64
 
 struct vdso_arch_data {
-	__u8  eye_catcher[16];		/* Eyecatcher: SYSTEMCFG:PPC64	0x00 */
-	struct {			/* Systemcfg version numbers	     */
-		__u32 major;		/* Major number			0x10 */
-		__u32 minor;		/* Minor number			0x14 */
-	} version;
-
-	/* Note about the platform flags: it now only contains the lpar
-	 * bit. The actual platform number is dead and buried
-	 */
-	__u32 platform;			/* Platform flags		0x18 */
-	__u32 processor;		/* Processor type		0x1C */
-	__u64 processorCount;		/* # of physical processors	0x20 */
-	__u64 physicalMemorySize;	/* Size of real memory(B)	0x28 */
-	__u64 tb_orig_stamp;		/* (NU) Timebase at boot	0x30 */
-	__u64 tb_ticks_per_sec;		/* Timebase tics / sec		0x38 */
-	__u64 tb_to_xs;			/* (NU) Inverse of TB to 2^20	0x40 */
-	__u64 stamp_xsec;		/* (NU)				0x48 */
-	__u64 tb_update_count;		/* (NU) Timebase atomicity ctr	0x50 */
-	__u32 tz_minuteswest;		/* (NU) Min. west of Greenwich	0x58 */
-	__u32 tz_dsttime;		/* (NU) Type of dst correction	0x5C */
-	__u32 dcache_size;		/* L1 d-cache size		0x60 */
-	__u32 dcache_line_size;		/* L1 d-cache line size		0x64 */
-	__u32 icache_size;		/* L1 i-cache size		0x68 */
-	__u32 icache_line_size;		/* L1 i-cache line size		0x6C */
-
-	/* those additional ones don't have to be located anywhere
-	 * special as they were not part of the original systemcfg
-	 */
+	__u64 tb_ticks_per_sec;			/* Timebase tics / sec */
 	__u32 dcache_block_size;		/* L1 d-cache block size     */
 	__u32 icache_block_size;		/* L1 i-cache block size     */
 	__u32 dcache_log_block_size;		/* L1 d-cache log block size */
@@ -89,11 +35,8 @@ struct vdso_arch_data {
 
 #else /* CONFIG_PPC64 */
 
-/*
- * And here is the simpler 32 bits version
- */
 struct vdso_arch_data {
-	__u64 tb_ticks_per_sec;		/* Timebase tics / sec		0x38 */
+	__u64 tb_ticks_per_sec;		/* Timebase tics / sec */
 	__u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */
 	__u32 compat_syscall_map[0];	/* No compat syscalls on PPC32 */
 	struct vdso_rng_data rng_data;
diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h
index fcf721682a71..f2dc40e1c52a 100644
--- a/arch/powerpc/include/asm/vga.h
+++ b/arch/powerpc/include/asm/vga.h
@@ -40,11 +40,6 @@ static inline void scr_memsetw(u16 *s, u16 v, unsigned int n)
 	memset16(s, cpu_to_le16(v), n / 2);
 }
 
-#define VT_BUF_HAVE_MEMCPYW
-#define VT_BUF_HAVE_MEMMOVEW
-#define scr_memcpyw	memcpy
-#define scr_memmovew	memmove
-
 #endif /* !CONFIG_VGA_CONSOLE && !CONFIG_MDA_CONSOLE */
 
 #ifdef __powerpc64__
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 2086fa6cdc25..103b6605dd68 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -13,7 +13,7 @@
 #include <linux/io.h>
 #include <linux/memblock.h>
 #include <linux/of.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/kdump.h>
 #include <asm/firmware.h>
 #include <linux/uio.h>
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index af4263594eb2..1bee15c013e7 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -867,7 +867,7 @@ bool __init dt_cpu_ftrs_init(void *fdt)
 	using_dt_cpu_ftrs = false;
 
 	/* Setup and verify the FDT, if it fails we just bail */
-	if (!early_init_dt_verify(fdt))
+	if (!early_init_dt_verify(fdt, __pa(fdt)))
 		return false;
 
 	if (!of_scan_flat_dt(fdt_find_cpu_features, NULL))
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index d4b8aff20815..247ab2acaccc 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -9,7 +9,7 @@
 #include <linux/of_fdt.h>
 #include <asm/epapr_hcalls.h>
 #include <asm/cacheflush.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/machdep.h>
 #include <asm/inst.h>
 
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 811a7130505c..56c5ebe21b99 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -494,6 +494,7 @@ FixupDAR:/* Entry point for dcbx workaround. */
 	bctr				/* jump into table */
 152:
 	mfdar	r11
+	mtdar	r10
 	mtctr	r11			/* restore ctr reg from DAR */
 	mfspr	r11, SPRN_SPRG_THREAD
 	stw	r10, DAR(r11)
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index 5277cf582c16..2659e1ac8604 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -5,7 +5,7 @@
 
 #include <linux/kernel.h>
 #include <linux/jump_label.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/inst.h>
 
 void arch_jump_label_transform(struct jump_entry *entry,
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 7a8bc03a00af..5081334b7bd2 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -21,7 +21,7 @@
 #include <asm/processor.h>
 #include <asm/machdep.h>
 #include <asm/debug.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <linux/slab.h>
 #include <asm/inst.h>
 
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bf382c459e1f..c0d9f12cb441 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -21,7 +21,7 @@
 #include <linux/slab.h>
 #include <linux/set_memory.h>
 #include <linux/execmem.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/cacheflush.h>
 #include <asm/sstep.h>
 #include <asm/sections.h>
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 816a63fd71fb..f930e3395a7f 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -18,7 +18,7 @@
 #include <linux/bug.h>
 #include <linux/sort.h>
 #include <asm/setup.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 
 /* Count how many different relocations (different symbol, different
    addend) */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 6816e9967cab..45dac7b46aa3 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -17,7 +17,7 @@
 #include <linux/kernel.h>
 #include <asm/module.h>
 #include <asm/firmware.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <linux/sort.h>
 #include <asm/setup.h>
 #include <asm/sections.h>
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index c0b351d61058..2e83702bf9ba 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -13,7 +13,7 @@
 #include <asm/kprobes.h>
 #include <asm/ptrace.h>
 #include <asm/cacheflush.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/sstep.h>
 #include <asm/ppc-opcode.h>
 #include <asm/inst.h>
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index b109cd7b5d01..3816a2bf2b84 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/proc_fs.h>
 #include <linux/kernel.h>
@@ -12,9 +13,10 @@
 #include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
 #include <asm/rtas.h>
+#include <asm/systemcfg.h>
 #include <linux/uaccess.h>
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
 
 static loff_t page_map_seek(struct file *file, loff_t off, int whence)
 {
@@ -33,10 +35,9 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
 	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE)
 		return -EINVAL;
 
-	remap_pfn_range(vma, vma->vm_start,
-			__pa(pde_data(file_inode(file))) >> PAGE_SHIFT,
-			PAGE_SIZE, vma->vm_page_prot);
-	return 0;
+	return remap_pfn_range(vma, vma->vm_start,
+			       __pa(pde_data(file_inode(file))) >> PAGE_SHIFT,
+			       PAGE_SIZE, vma->vm_page_prot);
 }
 
 static const struct proc_ops page_map_proc_ops = {
@@ -45,13 +46,35 @@ static const struct proc_ops page_map_proc_ops = {
 	.proc_mmap	= page_map_mmap,
 };
 
+static union {
+	struct systemcfg	data;
+	u8			page[PAGE_SIZE];
+} systemcfg_data_store __page_aligned_data;
+struct systemcfg *systemcfg = &systemcfg_data_store.data;
 
 static int __init proc_ppc64_init(void)
 {
 	struct proc_dir_entry *pde;
 
+	strcpy((char *)systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
+	systemcfg->version.major = SYSTEMCFG_MAJOR;
+	systemcfg->version.minor = SYSTEMCFG_MINOR;
+	systemcfg->processor = mfspr(SPRN_PVR);
+	/*
+	 * Fake the old platform number for pSeries and add
+	 * in LPAR bit if necessary
+	 */
+	systemcfg->platform = 0x100;
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		systemcfg->platform |= 1;
+	systemcfg->physicalMemorySize = memblock_phys_mem_size();
+	systemcfg->dcache_size = ppc64_caches.l1d.size;
+	systemcfg->dcache_line_size = ppc64_caches.l1d.line_size;
+	systemcfg->icache_size = ppc64_caches.l1i.size;
+	systemcfg->icache_line_size = ppc64_caches.l1i.line_size;
+
 	pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL,
-			       &page_map_proc_ops, vdso_data);
+			       &page_map_proc_ops, systemcfg);
 	if (!pde)
 		return 1;
 	proc_set_size(pde, PAGE_SIZE);
@@ -60,7 +83,7 @@ static int __init proc_ppc64_init(void)
 }
 __initcall(proc_ppc64_init);
 
-#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC64_PROC_SYSTEMCFG */
 
 /*
  * Create the ppc64 and ppc64/rtas directories early. This allows us to
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ff61a3e7984c..7b739b9a91ab 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -54,7 +54,7 @@
 #include <asm/firmware.h>
 #include <asm/hw_irq.h>
 #endif
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/exec.h>
 #include <asm/livepatch.h>
 #include <asm/cpu_has_feature.h>
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 47db1b1aef25..e0059842a1c6 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -791,7 +791,7 @@ void __init early_init_devtree(void *params)
 	DBG(" -> early_init_devtree(%px)\n", params);
 
 	/* Too early to BUG_ON(), do it by hand */
-	if (!early_init_dt_verify(params))
+	if (!early_init_dt_verify(params, __pa(params)))
 		panic("BUG: Failed verifying flat device tree, bad version?");
 
 	of_scan_flat_dt(early_init_dt_scan_model, NULL);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index f7e86e09c49f..d31c9799cab2 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1390,21 +1390,14 @@ bool __ref rtas_busy_delay(int status)
 		 */
 		ms = clamp(ms, 1U, 1000U);
 		/*
-		 * The delay hint is an order-of-magnitude suggestion, not
-		 * a minimum. It is fine, possibly even advantageous, for
-		 * us to pause for less time than hinted. For small values,
-		 * use usleep_range() to ensure we don't sleep much longer
-		 * than actually needed.
-		 *
-		 * See Documentation/timers/timers-howto.rst for
-		 * explanation of the threshold used here. In effect we use
-		 * usleep_range() for 9900 and 9901, msleep() for
-		 * 9902-9905.
+		 * The delay hint is an order-of-magnitude suggestion, not a
+		 * minimum. It is fine, possibly even advantageous, for us to
+		 * pause for less time than hinted. To make sure pause time will
+		 * not be way longer than requested independent of HZ
+		 * configuration, use fsleep(). See fsleep() for details of
+		 * used sleeping functions.
 		 */
-		if (ms <= 20)
-			usleep_range(ms * 100, ms * 1000);
-		else
-			msleep(ms);
+		fsleep(ms * 1000);
 		break;
 	case RTAS_BUSY:
 		ret = true;
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 4856e1a5161c..fbb7ebd8aa08 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -14,7 +14,7 @@
 #include <linux/debugfs.h>
 
 #include <asm/asm-prototypes.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/security_features.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index b6b01502e504..6fa179448c33 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -67,6 +67,7 @@
 #include <asm/cpu_has_feature.h>
 #include <asm/kasan.h>
 #include <asm/mce.h>
+#include <asm/systemcfg.h>
 
 #include "setup.h"
 
@@ -560,7 +561,9 @@ void __init smp_setup_cpu_maps(void)
 	out:
 		of_node_put(dn);
 	}
-	vdso_data->processorCount = num_present_cpus();
+#endif
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
+	systemcfg->processorCount = num_present_cpus();
 #endif /* CONFIG_PPC64 */
 
         /* Initialize CPU <=> thread mapping/
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index e515c1f7d8d3..75dbf3e0d9c4 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -40,7 +40,7 @@
 #include <asm/time.h>
 #include <asm/serial.h>
 #include <asm/udbg.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/cpu_has_feature.h>
 #include <asm/asm-prototypes.h>
 #include <asm/kdump.h>
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 1edc7cd68c10..e67f3048611f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -60,7 +60,7 @@
 #include <asm/xmon.h>
 #include <asm/udbg.h>
 #include <asm/kexec.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/ftrace.h>
 #include <asm/opal.h>
 #include <asm/cputhreads.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 4ab9b8cee77a..5ac7084eebc0 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -61,6 +61,7 @@
 #include <asm/ftrace.h>
 #include <asm/kup.h>
 #include <asm/fadump.h>
+#include <asm/systemcfg.h>
 
 #include <trace/events/ipi.h>
 
@@ -1186,8 +1187,8 @@ int generic_cpu_disable(void)
 		return -EBUSY;
 
 	set_cpu_online(cpu, false);
-#ifdef CONFIG_PPC64
-	vdso_data->processorCount--;
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
+	systemcfg->processorCount--;
 #endif
 	/* Update affinity of all IRQs previously aimed at this CPU */
 	irq_migrate_all_off_this_cpu();
@@ -1642,10 +1643,12 @@ void start_secondary(void *unused)
 
 	secondary_cpu_time_init();
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
 	if (system_state == SYSTEM_RUNNING)
-		vdso_data->processorCount++;
+		systemcfg->processorCount++;
+#endif
 
+#ifdef CONFIG_PPC64
 	vdso_getcpu_init();
 #endif
 	set_numa_node(numa_cpu_lookup_table[cpu]);
diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c
index 1502b7e439ca..7cfd0710e757 100644
--- a/arch/powerpc/kernel/static_call.c
+++ b/arch/powerpc/kernel/static_call.c
@@ -2,7 +2,7 @@
 #include <linux/memory.h>
 #include <linux/static_call.h>
 
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 
 void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
 {
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index ebae8415dfbb..d8b4ab78bef0 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -553,3 +553,7 @@
 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
 461	common	lsm_list_modules		sys_lsm_list_modules
 462	common	mseal				sys_mseal
+463	common	setxattrat			sys_setxattrat
+464	common	getxattrat			sys_getxattrat
+465	common	listxattrat			sys_listxattrat
+466	common	removexattrat			sys_removexattrat
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 0ff9f038e800..0727332ad86f 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -71,11 +71,11 @@
 #include <asm/vdso_datapage.h>
 #include <asm/firmware.h>
 #include <asm/mce.h>
+#include <asm/systemcfg.h>
 
 /* powerpc clocksource/clockevent code */
 
 #include <linux/clockchips.h>
-#include <linux/timekeeper_internal.h>
 
 static u64 timebase_read(struct clocksource *);
 static struct clocksource clocksource_timebase = {
@@ -951,6 +951,9 @@ void __init time_init(void)
 	}
 
 	vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
+	systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
+#endif
 
 	/* initialise and enable the large decrementer (if we have one) */
 	set_decrementer_max();
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 051f3db14606..5ccd791761e8 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -23,7 +23,7 @@
 #include <linux/list.h>
 
 #include <asm/cacheflush.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/ftrace.h>
 #include <asm/syscall.h>
 #include <asm/inst.h>
@@ -657,7 +657,7 @@ int __init ftrace_dyn_arch_init(void)
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
 		       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
-	unsigned long sp = fregs->regs.gpr[1];
+	unsigned long sp = arch_ftrace_regs(fregs)->regs.gpr[1];
 	int bit;
 
 	if (unlikely(ftrace_graph_is_dead()))
@@ -675,6 +675,6 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
 
 	ftrace_test_recursion_unlock(bit);
 out:
-	fregs->regs.link = parent_ip;
+	arch_ftrace_regs(fregs)->regs.link = parent_ip;
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c
index 8a551dfca3d0..98787376eb87 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.c
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -23,7 +23,7 @@
 #include <linux/list.h>
 
 #include <asm/cacheflush.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/ftrace.h>
 #include <asm/syscall.h>
 #include <asm/inst.h>
@@ -816,7 +816,7 @@ out:
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
 		       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
-	fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]);
+	arch_ftrace_regs(fregs)->regs.link = __prepare_ftrace_return(parent_ip, ip, arch_ftrace_regs(fregs)->regs.gpr[1]);
 }
 #else
 unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 6166c1862c06..43379365ce1b 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -16,7 +16,6 @@
 #include <linux/user.h>
 #include <linux/elf.h>
 #include <linux/security.h>
-#include <linux/memblock.h>
 #include <linux/syscalls.h>
 #include <linux/time_namespace.h>
 #include <vdso/datapage.h>
@@ -353,25 +352,6 @@ static struct page ** __init vdso_setup_pages(void *start, void *end)
 static int __init vdso_init(void)
 {
 #ifdef CONFIG_PPC64
-	/*
-	 * Fill up the "systemcfg" stuff for backward compatibility
-	 */
-	strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
-	vdso_data->version.major = SYSTEMCFG_MAJOR;
-	vdso_data->version.minor = SYSTEMCFG_MINOR;
-	vdso_data->processor = mfspr(SPRN_PVR);
-	/*
-	 * Fake the old platform number for pSeries and add
-	 * in LPAR bit if necessary
-	 */
-	vdso_data->platform = 0x100;
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		vdso_data->platform |= 1;
-	vdso_data->physicalMemorySize = memblock_phys_mem_size();
-	vdso_data->dcache_size = ppc64_caches.l1d.size;
-	vdso_data->dcache_line_size = ppc64_caches.l1d.line_size;
-	vdso_data->icache_size = ppc64_caches.l1i.size;
-	vdso_data->icache_line_size = ppc64_caches.l1i.line_size;
 	vdso_data->dcache_block_size = ppc64_caches.l1d.block_size;
 	vdso_data->icache_block_size = ppc64_caches.l1i.block_size;
 	vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size;
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 34c0adb9fdbf..742aa58a7c7e 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -115,10 +115,9 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
 	struct iommu_table_group *table_group;
 	long i;
 	struct kvmppc_spapr_tce_iommu_table *stit;
-	struct fd f;
+	CLASS(fd, f)(tablefd);
 
-	f = fdget(tablefd);
-	if (!fd_file(f))
+	if (fd_empty(f))
 		return -EBADF;
 
 	rcu_read_lock();
@@ -130,16 +129,12 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
 	}
 	rcu_read_unlock();
 
-	if (!found) {
-		fdput(f);
+	if (!found)
 		return -EINVAL;
-	}
 
 	table_group = iommu_group_get_iommudata(grp);
-	if (WARN_ON(!table_group)) {
-		fdput(f);
+	if (WARN_ON(!table_group))
 		return -EFAULT;
-	}
 
 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
 		struct iommu_table *tbltmp = table_group->tables[i];
@@ -160,10 +155,8 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
 			break;
 		}
 	}
-	if (!tbl) {
-		fdput(f);
+	if (!tbl)
 		return -EINVAL;
-	}
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
@@ -174,7 +167,6 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
 			/* stit is being destroyed */
 			iommu_tce_table_put(tbl);
 			rcu_read_unlock();
-			fdput(f);
 			return -ENOTTY;
 		}
 		/*
@@ -182,7 +174,6 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
 		 * its KVM reference counter and can return.
 		 */
 		rcu_read_unlock();
-		fdput(f);
 		return 0;
 	}
 	rcu_read_unlock();
@@ -190,7 +181,6 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
 	stit = kzalloc(sizeof(*stit), GFP_KERNEL);
 	if (!stit) {
 		iommu_tce_table_put(tbl);
-		fdput(f);
 		return -ENOMEM;
 	}
 
@@ -199,7 +189,6 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
 
 	list_add_rcu(&stit->next, &stt->iommu_tables);
 
-	fdput(f);
 	return 0;
 }
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d8938fdf1936..25429905ae90 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4950,6 +4950,18 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 							   BOOK3S_INTERRUPT_EXTERNAL, 0);
 			else
 				lpcr |= LPCR_MER;
+		} else {
+			/*
+			 * L1's copy of L2's LPCR (vcpu->arch.vcore->lpcr) can get its MER bit
+			 * unexpectedly set - for e.g. during NMI handling when all register
+			 * states are synchronized from L0 to L1. L1 needs to inform L0 about
+			 * MER=1 only when there are pending external interrupts.
+			 * In the above if check, MER bit is set if there are pending
+			 * external interrupts. Hence, explicity mask off MER bit
+			 * here as otherwise it may generate spurious interrupts in L2 KVM
+			 * causing an endless loop, which results in L2 guest getting hung.
+			 */
+			lpcr &= ~LPCR_MER;
 		}
 	} else if (vcpu->arch.pending_exceptions ||
 		   xive_interrupt_pending(vcpu)) {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f14329989e9a..b3b37ea77849 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1933,12 +1933,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 #endif
 #ifdef CONFIG_KVM_MPIC
 	case KVM_CAP_IRQ_MPIC: {
-		struct fd f;
+		CLASS(fd, f)(cap->args[0]);
 		struct kvm_device *dev;
 
 		r = -EBADF;
-		f = fdget(cap->args[0]);
-		if (!fd_file(f))
+		if (fd_empty(f))
 			break;
 
 		r = -EPERM;
@@ -1946,18 +1945,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		if (dev)
 			r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
 
-		fdput(f);
 		break;
 	}
 #endif
 #ifdef CONFIG_KVM_XICS
 	case KVM_CAP_IRQ_XICS: {
-		struct fd f;
+		CLASS(fd, f)(cap->args[0]);
 		struct kvm_device *dev;
 
 		r = -EBADF;
-		f = fdget(cap->args[0]);
-		if (!fd_file(f))
+		if (fd_empty(f))
 			break;
 
 		r = -EPERM;
@@ -1968,34 +1965,27 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 			else
 				r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
 		}
-
-		fdput(f);
 		break;
 	}
 #endif /* CONFIG_KVM_XICS */
 #ifdef CONFIG_KVM_XIVE
 	case KVM_CAP_PPC_IRQ_XIVE: {
-		struct fd f;
+		CLASS(fd, f)(cap->args[0]);
 		struct kvm_device *dev;
 
 		r = -EBADF;
-		f = fdget(cap->args[0]);
-		if (!fd_file(f))
+		if (fd_empty(f))
 			break;
 
 		r = -ENXIO;
-		if (!xive_enabled()) {
-			fdput(f);
+		if (!xive_enabled())
 			break;
-		}
 
 		r = -EPERM;
 		dev = kvm_device_from_filp(fd_file(f));
 		if (dev)
 			r = kvmppc_xive_native_connect_vcpu(dev, vcpu,
 							    cap->args[1]);
-
-		fdput(f);
 		break;
 	}
 #endif /* CONFIG_KVM_XIVE */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index acdab294b340..af97fbb3c257 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -17,7 +17,7 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/page.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/inst.h>
 
 static int __patch_mem(void *exec_addr, unsigned long val, void *patch_addr, bool is_dword)
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index b7201ba50b2e..587c8cf1230f 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -16,7 +16,7 @@
 #include <linux/sched/mm.h>
 #include <linux/stop_machine.h>
 #include <asm/cputable.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/interrupt.h>
 #include <asm/page.h>
 #include <asm/sections.h>
diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c
index 8cd3b32f805b..1440d99630b3 100644
--- a/arch/powerpc/lib/test-code-patching.c
+++ b/arch/powerpc/lib/test-code-patching.c
@@ -6,7 +6,7 @@
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 
 static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr)
 {
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
index 23c7805fb7b3..66b5b4fa1686 100644
--- a/arch/powerpc/lib/test_emulate_step.c
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -11,7 +11,7 @@
 #include <asm/cpu_has_feature.h>
 #include <asm/sstep.h>
 #include <asm/ppc-opcode.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/inst.h>
 
 #define MAX_SUBTESTS	16
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 2db167f4233f..6978344edcb4 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -25,7 +25,7 @@
 
 #include <asm/mmu.h>
 #include <asm/machdep.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/sections.h>
 
 #include <mm/mmu_decl.h>
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index db87c2cc2fb6..c8b4fa71d4a7 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -58,7 +58,7 @@
 #include <asm/sections.h>
 #include <asm/copro.h>
 #include <asm/udbg.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/fadump.h>
 #include <asm/firmware.h>
 #include <asm/tm.h>
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index f2708c8629a5..6b783552403c 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -24,7 +24,7 @@
 #include <linux/pgtable.h>
 
 #include <asm/udbg.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 
 #include "internal.h"
 
diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
index 87307d0fc3b8..bc9a39821d1c 100644
--- a/arch/powerpc/mm/book3s64/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -633,6 +633,20 @@ return_addr:
 }
 EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
 
+#ifdef CONFIG_HUGETLB_PAGE
+static int file_to_psize(struct file *file)
+{
+	struct hstate *hstate = hstate_file(file);
+
+	return shift_to_mmu_psize(huge_page_shift(hstate));
+}
+#else
+static int file_to_psize(struct file *file)
+{
+	return 0;
+}
+#endif
+
 unsigned long arch_get_unmapped_area(struct file *filp,
 				     unsigned long addr,
 				     unsigned long len,
@@ -640,11 +654,17 @@ unsigned long arch_get_unmapped_area(struct file *filp,
 				     unsigned long flags,
 				     vm_flags_t vm_flags)
 {
+	unsigned int psize;
+
 	if (radix_enabled())
 		return generic_get_unmapped_area(filp, addr, len, pgoff, flags, vm_flags);
 
-	return slice_get_unmapped_area(addr, len, flags,
-				       mm_ctx_user_psize(&current->mm->context), 0);
+	if (filp && is_file_hugepages(filp))
+		psize = file_to_psize(filp);
+	else
+		psize = mm_ctx_user_psize(&current->mm->context);
+
+	return slice_get_unmapped_area(addr, len, flags, psize, 0);
 }
 
 unsigned long arch_get_unmapped_area_topdown(struct file *filp,
@@ -654,11 +674,17 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
 					     const unsigned long flags,
 					     vm_flags_t vm_flags)
 {
+	unsigned int psize;
+
 	if (radix_enabled())
 		return generic_get_unmapped_area_topdown(filp, addr0, len, pgoff, flags, vm_flags);
 
-	return slice_get_unmapped_area(addr0, len, flags,
-				       mm_ctx_user_psize(&current->mm->context), 1);
+	if (filp && is_file_hugepages(filp))
+		psize = file_to_psize(filp);
+	else
+		psize = mm_ctx_user_psize(&current->mm->context);
+
+	return slice_get_unmapped_area(addr0, len, flags, psize, 1);
 }
 
 unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr)
@@ -788,20 +814,4 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
 
 	return 1UL << mmu_psize_to_shift(get_slice_psize(vma->vm_mm, vma->vm_start));
 }
-
-static int file_to_psize(struct file *file)
-{
-	struct hstate *hstate = hstate_file(file);
-	return shift_to_mmu_psize(huge_page_shift(hstate));
-}
-
-unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-					unsigned long len, unsigned long pgoff,
-					unsigned long flags)
-{
-	if (radix_enabled())
-		return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags);
-
-	return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 1);
-}
 #endif
diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c
index aa9aa11927b2..03666d790a53 100644
--- a/arch/powerpc/mm/kasan/init_32.c
+++ b/arch/powerpc/mm/kasan/init_32.c
@@ -7,7 +7,7 @@
 #include <linux/memblock.h>
 #include <linux/sched/task.h>
 #include <asm/pgalloc.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <mm/mmu_decl.h>
 
 static pgprot_t __init kasan_prot_ro(void)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 1221c561b43a..c7708c8fad29 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -26,7 +26,7 @@
 #include <asm/svm.h>
 #include <asm/mmzone.h>
 #include <asm/ftrace.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/setup.h>
 #include <asm/fixmap.h>
 
diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c
index 1beae802bb1c..6d10c6d8be71 100644
--- a/arch/powerpc/mm/nohash/44x.c
+++ b/arch/powerpc/mm/nohash/44x.c
@@ -24,7 +24,7 @@
 #include <asm/mmu.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/smp.h>
 
 #include <mm/mmu_decl.h>
diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
index ad2a7c26f2a0..062e8785c1bb 100644
--- a/arch/powerpc/mm/nohash/book3e_pgtable.c
+++ b/arch/powerpc/mm/nohash/book3e_pgtable.c
@@ -10,7 +10,7 @@
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/dma.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 
 #include <mm/mmu_decl.h>
 
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index b653a7be4cb1..0a650742f3a0 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -37,7 +37,7 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/cputhreads.h>
 #include <asm/hugetlb.h>
 #include <asm/paca.h>
diff --git a/arch/powerpc/mm/nohash/tlb_64e.c b/arch/powerpc/mm/nohash/tlb_64e.c
index d26656b07b72..4f925adf2695 100644
--- a/arch/powerpc/mm/nohash/tlb_64e.c
+++ b/arch/powerpc/mm/nohash/tlb_64e.c
@@ -24,7 +24,7 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/cputhreads.h>
 
 #include <mm/mmu_decl.h>
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 7316396e452d..61df5aed7989 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -398,7 +398,7 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
 	 */
 	if (pmd_none(*pmd))
 		return;
-	pte = pte_offset_map_nolock(mm, pmd, addr, &ptl);
+	pte = pte_offset_map_ro_nolock(mm, pmd, addr, &ptl);
 	BUG_ON(!pte);
 	assert_spin_locked(ptl);
 	pte_unmap(pte);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 28e2fd8b7900..2991bb171a9b 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -18,7 +18,7 @@
 #include <linux/bpf.h>
 
 #include <asm/kprobes.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 
 #include "bpf_jit.h"
 
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index 308a2e40d7be..1d2972229e3a 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -14,7 +14,7 @@
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 #include <asm/ptrace.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/inst.h>
 
 #define PERF_8xx_ID_CPU_CYCLES		1
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 6b4434dd0ff3..26aa26482c9a 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -51,7 +51,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
 
 	lr = regs->link;
 	sp = regs->gpr[1];
-	perf_callchain_store(entry, perf_instruction_pointer(regs));
+	perf_callchain_store(entry, perf_arch_instruction_pointer(regs));
 
 	if (!validate_sp(sp, current))
 		return;
diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c
index ea8cfe3806dc..ddcc2d8aa64a 100644
--- a/arch/powerpc/perf/callchain_32.c
+++ b/arch/powerpc/perf/callchain_32.c
@@ -139,7 +139,7 @@ void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
 	long level = 0;
 	unsigned int __user *fp, *uregs;
 
-	next_ip = perf_instruction_pointer(regs);
+	next_ip = perf_arch_instruction_pointer(regs);
 	lr = regs->link;
 	sp = regs->gpr[1];
 	perf_callchain_store(entry, next_ip);
diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c
index 488e8a21a11e..115d1c105e8a 100644
--- a/arch/powerpc/perf/callchain_64.c
+++ b/arch/powerpc/perf/callchain_64.c
@@ -74,7 +74,7 @@ void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
 	struct signal_frame_64 __user *sigframe;
 	unsigned long __user *fp, *uregs;
 
-	next_ip = perf_instruction_pointer(regs);
+	next_ip = perf_arch_instruction_pointer(regs);
 	lr = regs->link;
 	sp = regs->gpr[1];
 	perf_callchain_store(entry, next_ip);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 42867469752d..2b79171ee185 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -16,7 +16,7 @@
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 #include <asm/ptrace.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/hw_irq.h>
 #include <asm/interrupt.h>
 
@@ -2332,7 +2332,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
  * Called from generic code to get the misc flags (i.e. processor mode)
  * for an event_id.
  */
-unsigned long perf_misc_flags(struct pt_regs *regs)
+unsigned long perf_arch_misc_flags(struct pt_regs *regs)
 {
 	u32 flags = perf_get_misc_flags(regs);
 
@@ -2346,7 +2346,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
  * Called from generic code to get the instruction pointer
  * for an event_id.
  */
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
 {
 	unsigned long siar = mfspr(SPRN_SIAR);
 
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index e52b848b64b7..32fa5fb557c0 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -23,7 +23,7 @@
 #include <asm/mpic.h>
 #include <asm/cacheflush.h>
 #include <asm/dbell.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/cputhreads.h>
 #include <asm/fsl_pm.h>
 
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
index 8a7e55acf090..9be33e41af6d 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -12,7 +12,7 @@
 #include <linux/delay.h>
 #include <linux/pgtable.h>
 
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/page.h>
 #include <asm/pci-bridge.h>
 #include <asm/mpic.h>
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 28dc86744cac..d243f7fd8982 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -112,7 +112,7 @@ static void axon_msi_cascade(struct irq_desc *desc)
 		pr_devel("axon_msi: woff %x roff %x msi %x\n",
 			  write_offset, msic->read_offset, msi);
 
-		if (msi < nr_irqs && irq_get_chip_data(msi) == msic) {
+		if (msi < irq_get_nr_irqs() && irq_get_chip_data(msi) == msic) {
 			generic_handle_irq(msi);
 			msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
 		} else {
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index fee638fd8970..0e8f20ecca08 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -35,7 +35,7 @@
 #include <asm/firmware.h>
 #include <asm/rtas.h>
 #include <asm/cputhreads.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 
 #include "interrupt.h"
 #include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
index cd7d42fc12a6..000894e07b02 100644
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -36,6 +36,9 @@ static inline struct spufs_calls *spufs_calls_get(void)
 
 static inline void spufs_calls_put(struct spufs_calls *calls)
 {
+	if (!calls)
+		return;
+
 	BUG_ON(calls != spufs_calls);
 
 	/* we don't need to rcu this, as we hold a reference to the module */
@@ -53,82 +56,55 @@ static inline void spufs_calls_put(struct spufs_calls *calls) { }
 
 #endif /* CONFIG_SPU_FS_MODULE */
 
+DEFINE_CLASS(spufs_calls, struct spufs_calls *, spufs_calls_put(_T), spufs_calls_get(), void)
+
 SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
 	umode_t, mode, int, neighbor_fd)
 {
-	long ret;
-	struct spufs_calls *calls;
-
-	calls = spufs_calls_get();
+	CLASS(spufs_calls, calls)();
 	if (!calls)
 		return -ENOSYS;
 
 	if (flags & SPU_CREATE_AFFINITY_SPU) {
-		struct fd neighbor = fdget(neighbor_fd);
-		ret = -EBADF;
-		if (fd_file(neighbor)) {
-			ret = calls->create_thread(name, flags, mode, fd_file(neighbor));
-			fdput(neighbor);
-		}
-	} else
-		ret = calls->create_thread(name, flags, mode, NULL);
-
-	spufs_calls_put(calls);
-	return ret;
+		CLASS(fd, neighbor)(neighbor_fd);
+		if (fd_empty(neighbor))
+			return -EBADF;
+		return calls->create_thread(name, flags, mode, fd_file(neighbor));
+	} else {
+		return calls->create_thread(name, flags, mode, NULL);
+	}
 }
 
 SYSCALL_DEFINE3(spu_run,int, fd, __u32 __user *, unpc, __u32 __user *, ustatus)
 {
-	long ret;
-	struct fd arg;
-	struct spufs_calls *calls;
-
-	calls = spufs_calls_get();
+	CLASS(spufs_calls, calls)();
 	if (!calls)
 		return -ENOSYS;
 
-	ret = -EBADF;
-	arg = fdget(fd);
-	if (fd_file(arg)) {
-		ret = calls->spu_run(fd_file(arg), unpc, ustatus);
-		fdput(arg);
-	}
+	CLASS(fd, arg)(fd);
+	if (fd_empty(arg))
+		return -EBADF;
 
-	spufs_calls_put(calls);
-	return ret;
+	return calls->spu_run(fd_file(arg), unpc, ustatus);
 }
 
 #ifdef CONFIG_COREDUMP
 int elf_coredump_extra_notes_size(void)
 {
-	struct spufs_calls *calls;
-	int ret;
-
-	calls = spufs_calls_get();
+	CLASS(spufs_calls, calls)();
 	if (!calls)
 		return 0;
 
-	ret = calls->coredump_extra_notes_size();
-
-	spufs_calls_put(calls);
-
-	return ret;
+	return calls->coredump_extra_notes_size();
 }
 
 int elf_coredump_extra_notes_write(struct coredump_params *cprm)
 {
-	struct spufs_calls *calls;
-	int ret;
-
-	calls = spufs_calls_get();
+	CLASS(spufs_calls, calls)();
 	if (!calls)
 		return 0;
 
-	ret = calls->coredump_extra_notes_write(cprm);
-
-	spufs_calls_put(calls);
-
-	return ret;
+	return calls->coredump_extra_notes_write(cprm);
 }
 #endif
 
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 18daafbe2e65..301ee7d8b7df 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -73,9 +73,7 @@ static struct spu_context *coredump_next_context(int *fd)
 		return NULL;
 	*fd = n - 1;
 
-	rcu_read_lock();
-	file = lookup_fdget_rcu(*fd);
-	rcu_read_unlock();
+	file = fget_raw(*fd);
 	if (file) {
 		ctx = SPUFS_I(file_inode(file))->i_ctx;
 		get_spu_context(ctx);
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index d21b681f52fb..09e7fe24fac1 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -35,7 +35,7 @@
 
 #include <asm/ptrace.h>
 #include <linux/atomic.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/irq.h>
 #include <asm/page.h>
 #include <asm/sections.h>
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index ad41dffe4d92..d98b933e4984 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -18,7 +18,7 @@
 #include <asm/opal.h>
 #include <asm/cputhreads.h>
 #include <asm/cpuidle.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/smp.h>
 #include <asm/runlatch.h>
 #include <asm/dbell.h>
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index 56a1f7ce78d2..d92759c21fae 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -282,6 +282,7 @@ int __init opal_event_init(void)
 				 name, NULL);
 		if (rc) {
 			pr_warn("Error %d requesting OPAL irq %d\n", rc, (int)r->start);
+			kfree(name);
 			continue;
 		}
 	}
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 8f14f0581a21..8f41ef364fc6 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -28,7 +28,7 @@
 #include <asm/xive.h>
 #include <asm/opal.h>
 #include <asm/runlatch.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/dbell.h>
 #include <asm/kvm_ppc.h>
 #include <asm/ppc-opcode.h>
@@ -36,6 +36,7 @@
 #include <asm/kexec.h>
 #include <asm/reg.h>
 #include <asm/powernv.h>
+#include <asm/systemcfg.h>
 
 #include "powernv.h"
 
@@ -136,7 +137,9 @@ static int pnv_smp_cpu_disable(void)
 	 * the generic fixup_irqs. --BenH.
 	 */
 	set_cpu_online(cpu, false);
-	vdso_data->processorCount--;
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
+	systemcfg->processorCount--;
+#endif
 	if (cpu == boot_cpuid)
 		boot_cpuid = cpumask_any(cpu_online_mask);
 	if (xive_enabled())
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6838a0fcda29..bc6926dbf148 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -33,6 +33,7 @@
 #include <asm/xive.h>
 #include <asm/plpar_wrappers.h>
 #include <asm/topology.h>
+#include <asm/systemcfg.h>
 
 #include "pseries.h"
 
@@ -83,7 +84,9 @@ static int pseries_cpu_disable(void)
 	int cpu = smp_processor_id();
 
 	set_cpu_online(cpu, false);
-	vdso_data->processorCount--;
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
+	systemcfg->processorCount--;
+#endif
 
 	/*fix boot_cpuid here*/
 	if (cpu == boot_cpuid)
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 62da20f9700a..cc22924f159f 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -29,7 +29,6 @@
 #include <asm/firmware.h>
 #include <asm/rtas.h>
 #include <asm/time.h>
-#include <asm/vdso_datapage.h>
 #include <asm/vio.h>
 #include <asm/mmu.h>
 #include <asm/machdep.h>
@@ -530,7 +529,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 		lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL);
 
 	if (lrdrp == NULL) {
-		partition_potential_processors = vdso_data->processorCount;
+		partition_potential_processors = num_possible_cpus();
 	} else {
 		partition_potential_processors = be32_to_cpup(lrdrp + 4);
 	}
@@ -553,7 +552,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 	} else {		/* non SPLPAR case */
 
 		seq_printf(m, "system_active_processors=%d\n",
-			   partition_potential_processors);
+			   partition_active_processors);
 
 		seq_printf(m, "system_potential_processors=%d\n",
 			   partition_potential_processors);
diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c
index 4a595493d28a..b1667ed05f98 100644
--- a/arch/powerpc/platforms/pseries/plpks.c
+++ b/arch/powerpc/platforms/pseries/plpks.c
@@ -683,7 +683,7 @@ void __init plpks_early_init_devtree(void)
 out:
 	fdt_nop_property(fdt, chosen_node, "ibm,plpks-pw");
 	// Since we've cleared the password, we must update the FDT checksum
-	early_init_dt_verify(fdt);
+	early_init_dt_verify(fdt, __pa(fdt));
 }
 
 static __init int pseries_plpks_init(void)
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index c597711ef20a..db99725e752b 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -39,7 +39,7 @@
 #include <asm/xive.h>
 #include <asm/dbell.h>
 #include <asm/plpar_wrappers.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/svm.h>
 #include <asm/kvm_guest.h>
 
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index 10b8eb6bff39..c5d0f92c7969 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -8,6 +8,7 @@
 
 #include <linux/mm.h>
 #include <linux/memblock.h>
+#include <linux/mem_encrypt.h>
 #include <linux/cc_platform.h>
 #include <linux/mem_encrypt.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 22b8b5cc4df0..f4e841a36458 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -50,7 +50,7 @@
 #include <asm/xive.h>
 #include <asm/opal.h>
 #include <asm/firmware.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
 #include <asm/sections.h>
 #include <asm/inst.h>
 #include <asm/interrupt.h>