diff options
Diffstat (limited to 'arch/powerpc')
80 files changed, 1460 insertions, 1702 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a9dd4c39ec00..a0ce777f9706 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -704,6 +704,10 @@ config RELOCATABLE_TEST config ARCH_SUPPORTS_CRASH_DUMP def_bool PPC64 || PPC_BOOK3S_32 || PPC_85xx || (44x && !SMP) +config ARCH_DEFAULT_CRASH_DUMP + bool + default y if !PPC_BOOK3S_32 + config ARCH_SELECTS_CRASH_DUMP def_bool y depends on CRASH_DUMP @@ -1318,6 +1322,14 @@ config MODULES_SIZE endmenu +config PPC64_PROC_SYSTEMCFG + def_bool y + depends on PPC64 && PROC_FS + help + This option enables the presence of /proc/ppc64/systemcfg through + which the systemcfg page can be accessed. + This interface only exists for backwards-compatibility. + if PPC64 # This value must have zeroes in the bottom 60 bits otherwise lots will break config PAGE_OFFSET diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index c06344db0eb3..4d77e17541e9 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -435,7 +435,6 @@ CONFIG_DM9102=m CONFIG_ULI526X=m CONFIG_PCMCIA_XIRCOM=m CONFIG_DL2K=m -CONFIG_SUNDANCE=m CONFIG_S2IO=m CONFIG_FEC_MPC52xx=m CONFIG_GIANFAR=m diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index 46a4c85e85e2..951a43726461 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -107,12 +107,12 @@ config CRYPTO_AES_PPC_SPE config CRYPTO_AES_GCM_P10 tristate "Stitched AES/GCM acceleration support on P10 or later CPU (PPC)" - depends on BROKEN depends on PPC64 && CPU_LITTLE_ENDIAN && VSX select CRYPTO_LIB_AES select CRYPTO_ALGAPI select CRYPTO_AEAD select CRYPTO_SKCIPHER + select CRYPTO_SIMD help AEAD cipher: AES cipher algorithms (FIPS-197) GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D) diff --git a/arch/powerpc/crypto/aes-gcm-p10-glue.c b/arch/powerpc/crypto/aes-gcm-p10-glue.c index f66ad56e765f..f37b3d13fc53 100644 --- a/arch/powerpc/crypto/aes-gcm-p10-glue.c +++ b/arch/powerpc/crypto/aes-gcm-p10-glue.c @@ -8,6 +8,7 @@ #include <linux/unaligned.h> #include <asm/simd.h> #include <asm/switch_to.h> +#include <crypto/gcm.h> #include <crypto/aes.h> #include <crypto/algapi.h> #include <crypto/b128ops.h> @@ -24,6 +25,7 @@ #define PPC_ALIGN 16 #define GCM_IV_SIZE 12 +#define RFC4106_NONCE_SIZE 4 MODULE_DESCRIPTION("PPC64le AES-GCM with Stitched implementation"); MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com"); @@ -31,7 +33,7 @@ MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("aes"); asmlinkage int aes_p10_set_encrypt_key(const u8 *userKey, const int bits, - void *key); + void *key); asmlinkage void aes_p10_encrypt(const u8 *in, u8 *out, const void *key); asmlinkage void aes_p10_gcm_encrypt(u8 *in, u8 *out, size_t len, void *rkey, u8 *iv, void *Xi); @@ -39,7 +41,8 @@ asmlinkage void aes_p10_gcm_decrypt(u8 *in, u8 *out, size_t len, void *rkey, u8 *iv, void *Xi); asmlinkage void gcm_init_htable(unsigned char htable[], unsigned char Xi[]); asmlinkage void gcm_ghash_p10(unsigned char *Xi, unsigned char *Htable, - unsigned char *aad, unsigned int alen); + unsigned char *aad, unsigned int alen); +asmlinkage void gcm_update(u8 *iv, void *Xi); struct aes_key { u8 key[AES_MAX_KEYLENGTH]; @@ -52,6 +55,7 @@ struct gcm_ctx { u8 aad_hash[16]; u64 aadLen; u64 Plen; /* offset 56 - used in aes_p10_gcm_{en/de}crypt */ + u8 pblock[16]; }; struct Hash_ctx { u8 H[16]; /* subkey */ @@ -60,17 +64,20 @@ struct Hash_ctx { struct p10_aes_gcm_ctx { struct aes_key enc_key; + u8 nonce[RFC4106_NONCE_SIZE]; }; static void vsx_begin(void) { preempt_disable(); + pagefault_disable(); enable_kernel_vsx(); } static void vsx_end(void) { disable_kernel_vsx(); + pagefault_enable(); preempt_enable(); } @@ -185,7 +192,7 @@ static int set_authsize(struct crypto_aead *tfm, unsigned int authsize) } static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key, - unsigned int keylen) + unsigned int keylen) { struct crypto_tfm *tfm = crypto_aead_tfm(aead); struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm); @@ -198,7 +205,8 @@ static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key, return ret ? -EINVAL : 0; } -static int p10_aes_gcm_crypt(struct aead_request *req, int enc) +static int p10_aes_gcm_crypt(struct aead_request *req, u8 *riv, + int assoclen, int enc) { struct crypto_tfm *tfm = req->base.tfm; struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm); @@ -210,7 +218,6 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc) struct skcipher_walk walk; u8 *assocmem = NULL; u8 *assoc; - unsigned int assoclen = req->assoclen; unsigned int cryptlen = req->cryptlen; unsigned char ivbuf[AES_BLOCK_SIZE+PPC_ALIGN]; unsigned char *iv = PTR_ALIGN((void *)ivbuf, PPC_ALIGN); @@ -218,11 +225,12 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc) unsigned long auth_tag_len = crypto_aead_authsize(__crypto_aead_cast(tfm)); u8 otag[16]; int total_processed = 0; + int nbytes; memset(databuf, 0, sizeof(databuf)); memset(hashbuf, 0, sizeof(hashbuf)); memset(ivbuf, 0, sizeof(ivbuf)); - memcpy(iv, req->iv, GCM_IV_SIZE); + memcpy(iv, riv, GCM_IV_SIZE); /* Linearize assoc, if not already linear */ if (req->src->length >= assoclen && req->src->length) { @@ -257,19 +265,25 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc) if (ret) return ret; - while (walk.nbytes > 0 && ret == 0) { + while ((nbytes = walk.nbytes) > 0 && ret == 0) { + u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + u8 buf[AES_BLOCK_SIZE]; + + if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf, src, nbytes); vsx_begin(); if (enc) - aes_p10_gcm_encrypt(walk.src.virt.addr, - walk.dst.virt.addr, - walk.nbytes, + aes_p10_gcm_encrypt(src, dst, nbytes, &ctx->enc_key, gctx->iv, hash->Htable); else - aes_p10_gcm_decrypt(walk.src.virt.addr, - walk.dst.virt.addr, - walk.nbytes, + aes_p10_gcm_decrypt(src, dst, nbytes, &ctx->enc_key, gctx->iv, hash->Htable); + + if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, buf, nbytes); + vsx_end(); total_processed += walk.nbytes; @@ -281,6 +295,7 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc) /* Finalize hash */ vsx_begin(); + gcm_update(gctx->iv, hash->Htable); finish_tag(gctx, hash, total_processed); vsx_end(); @@ -302,17 +317,63 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc) return 0; } +static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey, + unsigned int keylen) +{ + struct p10_aes_gcm_ctx *ctx = crypto_aead_ctx(tfm); + int err; + + keylen -= RFC4106_NONCE_SIZE; + err = p10_aes_gcm_setkey(tfm, inkey, keylen); + if (err) + return err; + + memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE); + return 0; +} + +static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + return crypto_rfc4106_check_authsize(authsize); +} + +static int rfc4106_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct p10_aes_gcm_ctx *ctx = crypto_aead_ctx(aead); + u8 iv[AES_BLOCK_SIZE]; + + memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); + memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); + + return crypto_ipsec_check_assoclen(req->assoclen) ?: + p10_aes_gcm_crypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE, 1); +} + +static int rfc4106_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct p10_aes_gcm_ctx *ctx = crypto_aead_ctx(aead); + u8 iv[AES_BLOCK_SIZE]; + + memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); + memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); + + return crypto_ipsec_check_assoclen(req->assoclen) ?: + p10_aes_gcm_crypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE, 0); +} + static int p10_aes_gcm_encrypt(struct aead_request *req) { - return p10_aes_gcm_crypt(req, 1); + return p10_aes_gcm_crypt(req, req->iv, req->assoclen, 1); } static int p10_aes_gcm_decrypt(struct aead_request *req) { - return p10_aes_gcm_crypt(req, 0); + return p10_aes_gcm_crypt(req, req->iv, req->assoclen, 0); } -static struct aead_alg gcm_aes_alg = { +static struct aead_alg gcm_aes_algs[] = {{ .ivsize = GCM_IV_SIZE, .maxauthsize = 16, @@ -321,23 +382,57 @@ static struct aead_alg gcm_aes_alg = { .encrypt = p10_aes_gcm_encrypt, .decrypt = p10_aes_gcm_decrypt, - .base.cra_name = "gcm(aes)", - .base.cra_driver_name = "aes_gcm_p10", + .base.cra_name = "__gcm(aes)", + .base.cra_driver_name = "__aes_gcm_p10", .base.cra_priority = 2100, .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct p10_aes_gcm_ctx), + .base.cra_ctxsize = sizeof(struct p10_aes_gcm_ctx)+ + 4 * sizeof(u64[2]), .base.cra_module = THIS_MODULE, -}; + .base.cra_flags = CRYPTO_ALG_INTERNAL, +}, { + .ivsize = GCM_RFC4106_IV_SIZE, + .maxauthsize = 16, + .setkey = rfc4106_setkey, + .setauthsize = rfc4106_setauthsize, + .encrypt = rfc4106_encrypt, + .decrypt = rfc4106_decrypt, + + .base.cra_name = "__rfc4106(gcm(aes))", + .base.cra_driver_name = "__rfc4106_aes_gcm_p10", + .base.cra_priority = 2100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct p10_aes_gcm_ctx) + + 4 * sizeof(u64[2]), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, +}}; + +static struct simd_aead_alg *p10_simd_aeads[ARRAY_SIZE(gcm_aes_algs)]; static int __init p10_init(void) { - return crypto_register_aead(&gcm_aes_alg); + int ret; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return 0; + + ret = simd_register_aeads_compat(gcm_aes_algs, + ARRAY_SIZE(gcm_aes_algs), + p10_simd_aeads); + if (ret) { + simd_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs), + p10_simd_aeads); + return ret; + } + return 0; } static void __exit p10_exit(void) { - crypto_unregister_aead(&gcm_aes_alg); + simd_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs), + p10_simd_aeads); } -module_cpu_feature_match(PPC_MODULE_FEATURE_P10, p10_init); +module_init(p10_init); module_exit(p10_exit); diff --git a/arch/powerpc/crypto/aes-gcm-p10.S b/arch/powerpc/crypto/aes-gcm-p10.S index a51f4b265308..89f50eef3512 100644 --- a/arch/powerpc/crypto/aes-gcm-p10.S +++ b/arch/powerpc/crypto/aes-gcm-p10.S @@ -1,42 +1,42 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ - # - # Accelerated AES-GCM stitched implementation for ppc64le. - # - # Copyright 2022- IBM Inc. All rights reserved - # - #=================================================================================== - # Written by Danny Tsen <dtsen@linux.ibm.com> - # - # GHASH is based on the Karatsuba multiplication method. - # - # Xi xor X1 - # - # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H = - # (X1.h * H4.h + xX.l * H4.l + X1 * H4) + - # (X2.h * H3.h + X2.l * H3.l + X2 * H3) + - # (X3.h * H2.h + X3.l * H2.l + X3 * H2) + - # (X4.h * H.h + X4.l * H.l + X4 * H) - # - # Xi = v0 - # H Poly = v2 - # Hash keys = v3 - v14 - # ( H.l, H, H.h) - # ( H^2.l, H^2, H^2.h) - # ( H^3.l, H^3, H^3.h) - # ( H^4.l, H^4, H^4.h) - # - # v30 is IV - # v31 - counter 1 - # - # AES used, - # vs0 - vs14 for round keys - # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted) - # - # This implementation uses stitched AES-GCM approach to improve overall performance. - # AES is implemented with 8x blocks and GHASH is using 2 4x blocks. - # - # =================================================================================== - # +# +# Accelerated AES-GCM stitched implementation for ppc64le. +# +# Copyright 2024- IBM Inc. +# +#=================================================================================== +# Written by Danny Tsen <dtsen@us.ibm.com> +# +# GHASH is based on the Karatsuba multiplication method. +# +# Xi xor X1 +# +# X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H = +# (X1.h * H4.h + xX.l * H4.l + X1 * H4) + +# (X2.h * H3.h + X2.l * H3.l + X2 * H3) + +# (X3.h * H2.h + X3.l * H2.l + X3 * H2) + +# (X4.h * H.h + X4.l * H.l + X4 * H) +# +# Xi = v0 +# H Poly = v2 +# Hash keys = v3 - v14 +# ( H.l, H, H.h) +# ( H^2.l, H^2, H^2.h) +# ( H^3.l, H^3, H^3.h) +# ( H^4.l, H^4, H^4.h) +# +# v30 is IV +# v31 - counter 1 +# +# AES used, +# vs0 - round key 0 +# v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted) +# +# This implementation uses stitched AES-GCM approach to improve overall performance. +# AES is implemented with 8x blocks and GHASH is using 2 4x blocks. +# +# =================================================================================== +# #include <asm/ppc_asm.h> #include <linux/linkage.h> @@ -44,483 +44,224 @@ .machine "any" .text - # 4x loops - # v15 - v18 - input states - # vs1 - vs9 - round keys - # -.macro Loop_aes_middle4x - xxlor 19+32, 1, 1 - xxlor 20+32, 2, 2 - xxlor 21+32, 3, 3 - xxlor 22+32, 4, 4 - - vcipher 15, 15, 19 - vcipher 16, 16, 19 - vcipher 17, 17, 19 - vcipher 18, 18, 19 - - vcipher 15, 15, 20 - vcipher 16, 16, 20 - vcipher 17, 17, 20 - vcipher 18, 18, 20 - - vcipher 15, 15, 21 - vcipher 16, 16, 21 - vcipher 17, 17, 21 - vcipher 18, 18, 21 - - vcipher 15, 15, 22 - vcipher 16, 16, 22 - vcipher 17, 17, 22 - vcipher 18, 18, 22 - - xxlor 19+32, 5, 5 - xxlor 20+32, 6, 6 - xxlor 21+32, 7, 7 - xxlor 22+32, 8, 8 - - vcipher 15, 15, 19 - vcipher 16, 16, 19 - vcipher 17, 17, 19 - vcipher 18, 18, 19 - - vcipher 15, 15, 20 - vcipher 16, 16, 20 - vcipher 17, 17, 20 - vcipher 18, 18, 20 - - vcipher 15, 15, 21 - vcipher 16, 16, 21 - vcipher 17, 17, 21 - vcipher 18, 18, 21 - - vcipher 15, 15, 22 - vcipher 16, 16, 22 - vcipher 17, 17, 22 - vcipher 18, 18, 22 - - xxlor 23+32, 9, 9 - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 +.macro SAVE_GPR GPR OFFSET FRAME + std \GPR,\OFFSET(\FRAME) .endm - # 8x loops - # v15 - v22 - input states - # vs1 - vs9 - round keys - # -.macro Loop_aes_middle8x - xxlor 23+32, 1, 1 - xxlor 24+32, 2, 2 - xxlor 25+32, 3, 3 - xxlor 26+32, 4, 4 - - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 - - vcipher 15, 15, 24 - vcipher 16, 16, 24 - vcipher 17, 17, 24 - vcipher 18, 18, 24 - vcipher 19, 19, 24 - vcipher 20, 20, 24 - vcipher 21, 21, 24 - vcipher 22, 22, 24 - - vcipher 15, 15, 25 - vcipher 16, 16, 25 - vcipher 17, 17, 25 - vcipher 18, 18, 25 - vcipher 19, 19, 25 - vcipher 20, 20, 25 - vcipher 21, 21, 25 - vcipher 22, 22, 25 - - vcipher 15, 15, 26 - vcipher 16, 16, 26 - vcipher 17, 17, 26 - vcipher 18, 18, 26 - vcipher 19, 19, 26 - vcipher 20, 20, 26 - vcipher 21, 21, 26 - vcipher 22, 22, 26 - - xxlor 23+32, 5, 5 - xxlor 24+32, 6, 6 - xxlor 25+32, 7, 7 - xxlor 26+32, 8, 8 - - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 - - vcipher 15, 15, 24 - vcipher 16, 16, 24 - vcipher 17, 17, 24 - vcipher 18, 18, 24 - vcipher 19, 19, 24 - vcipher 20, 20, 24 - vcipher 21, 21, 24 - vcipher 22, 22, 24 - - vcipher 15, 15, 25 - vcipher 16, 16, 25 - vcipher 17, 17, 25 - vcipher 18, 18, 25 - vcipher 19, 19, 25 - vcipher 20, 20, 25 - vcipher 21, 21, 25 - vcipher 22, 22, 25 - - vcipher 15, 15, 26 - vcipher 16, 16, 26 - vcipher 17, 17, 26 - vcipher 18, 18, 26 - vcipher 19, 19, 26 - vcipher 20, 20, 26 - vcipher 21, 21, 26 - vcipher 22, 22, 26 - - xxlor 23+32, 9, 9 - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 +.macro SAVE_VRS VRS OFFSET FRAME + stxv \VRS+32, \OFFSET(\FRAME) .endm -.macro Loop_aes_middle_1x - xxlor 19+32, 1, 1 - xxlor 20+32, 2, 2 - xxlor 21+32, 3, 3 - xxlor 22+32, 4, 4 - - vcipher 15, 15, 19 - vcipher 15, 15, 20 - vcipher 15, 15, 21 - vcipher 15, 15, 22 - - xxlor 19+32, 5, 5 - xxlor 20+32, 6, 6 - xxlor 21+32, 7, 7 - xxlor 22+32, 8, 8 - - vcipher 15, 15, 19 - vcipher 15, 15, 20 - vcipher 15, 15, 21 - vcipher 15, 15, 22 - - xxlor 19+32, 9, 9 - vcipher 15, 15, 19 +.macro RESTORE_GPR GPR OFFSET FRAME + ld \GPR,\OFFSET(\FRAME) .endm - # - # Compute 4x hash values based on Karatsuba method. - # -.macro ppc_aes_gcm_ghash - vxor 15, 15, 0 - - vpmsumd 23, 12, 15 # H4.L * X.L - vpmsumd 24, 9, 16 - vpmsumd 25, 6, 17 - vpmsumd 26, 3, 18 - - vxor 23, 23, 24 - vxor 23, 23, 25 - vxor 23, 23, 26 # L - - vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L - vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L - vpmsumd 26, 7, 17 - vpmsumd 27, 4, 18 - - vxor 24, 24, 25 - vxor 24, 24, 26 - vxor 24, 24, 27 # M - - # sum hash and reduction with H Poly - vpmsumd 28, 23, 2 # reduction - - vxor 29, 29, 29 - vsldoi 26, 24, 29, 8 # mL - vsldoi 29, 29, 24, 8 # mH - vxor 23, 23, 26 # mL + L - - vsldoi 23, 23, 23, 8 # swap - vxor 23, 23, 28 - - vpmsumd 24, 14, 15 # H4.H * X.H - vpmsumd 25, 11, 16 - vpmsumd 26, 8, 17 - vpmsumd 27, 5, 18 - - vxor 24, 24, 25 - vxor 24, 24, 26 - vxor 24, 24, 27 - - vxor 24, 24, 29 - - # sum hash and reduction with H Poly - vsldoi 27, 23, 23, 8 # swap - vpmsumd 23, 23, 2 - vxor 27, 27, 24 - vxor 23, 23, 27 - - xxlor 32, 23+32, 23+32 # update hash - +.macro RESTORE_VRS VRS OFFSET FRAME + lxv \VRS+32, \OFFSET(\FRAME) .endm - # - # Combine two 4x ghash - # v15 - v22 - input blocks - # -.macro ppc_aes_gcm_ghash2_4x - # first 4x hash - vxor 15, 15, 0 # Xi + X - - vpmsumd 23, 12, 15 # H4.L * X.L - vpmsumd 24, 9, 16 - vpmsumd 25, 6, 17 - vpmsumd 26, 3, 18 - - vxor 23, 23, 24 - vxor 23, 23, 25 - vxor 23, 23, 26 # L - - vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L - vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L - vpmsumd 26, 7, 17 - vpmsumd 27, 4, 18 - - vxor 24, 24, 25 - vxor 24, 24, 26 - - # sum hash and reduction with H Poly - vpmsumd 28, 23, 2 # reduction - - vxor 29, 29, 29 - - vxor 24, 24, 27 # M - vsldoi 26, 24, 29, 8 # mL - vsldoi 29, 29, 24, 8 # mH - vxor 23, 23, 26 # mL + L - - vsldoi 23, 23, 23, 8 # swap - vxor 23, 23, 28 +.macro SAVE_REGS + mflr 0 + std 0, 16(1) + stdu 1,-512(1) + + SAVE_GPR 14, 112, 1 + SAVE_GPR 15, 120, 1 + SAVE_GPR 16, 128, 1 + SAVE_GPR 17, 136, 1 + SAVE_GPR 18, 144, 1 + SAVE_GPR 19, 152, 1 + SAVE_GPR 20, 160, 1 + SAVE_GPR 21, 168, 1 + SAVE_GPR 22, 176, 1 + SAVE_GPR 23, 184, 1 + SAVE_GPR 24, 192, 1 + + addi 9, 1, 256 + SAVE_VRS 20, 0, 9 + SAVE_VRS 21, 16, 9 + SAVE_VRS 22, 32, 9 + SAVE_VRS 23, 48, 9 + SAVE_VRS 24, 64, 9 + SAVE_VRS 25, 80, 9 + SAVE_VRS 26, 96, 9 + SAVE_VRS 27, 112, 9 + SAVE_VRS 28, 128, 9 + SAVE_VRS 29, 144, 9 + SAVE_VRS 30, 160, 9 + SAVE_VRS 31, 176, 9 +.endm # SAVE_REGS - vpmsumd 24, 14, 15 # H4.H * X.H - vpmsumd 25, 11, 16 - vpmsumd 26, 8, 17 - vpmsumd 27, 5, 18 +.macro RESTORE_REGS + addi 9, 1, 256 + RESTORE_VRS 20, 0, 9 + RESTORE_VRS 21, 16, 9 + RESTORE_VRS 22, 32, 9 + RESTORE_VRS 23, 48, 9 + RESTORE_VRS 24, 64, 9 + RESTORE_VRS 25, 80, 9 + RESTORE_VRS 26, 96, 9 + RESTORE_VRS 27, 112, 9 + RESTORE_VRS 28, 128, 9 + RESTORE_VRS 29, 144, 9 + RESTORE_VRS 30, 160, 9 + RESTORE_VRS 31, 176, 9 + + RESTORE_GPR 14, 112, 1 + RESTORE_GPR 15, 120, 1 + RESTORE_GPR 16, 128, 1 + RESTORE_GPR 17, 136, 1 + RESTORE_GPR 18, 144, 1 + RESTORE_GPR 19, 152, 1 + RESTORE_GPR 20, 160, 1 + RESTORE_GPR 21, 168, 1 + RESTORE_GPR 22, 176, 1 + RESTORE_GPR 23, 184, 1 + RESTORE_GPR 24, 192, 1 + + addi 1, 1, 512 + ld 0, 16(1) + mtlr 0 +.endm # RESTORE_REGS + +# 4x loops +.macro AES_CIPHER_4x _VCIPHER ST r + \_VCIPHER \ST, \ST, \r + \_VCIPHER \ST+1, \ST+1, \r + \_VCIPHER \ST+2, \ST+2, \r + \_VCIPHER \ST+3, \ST+3, \r +.endm - vxor 24, 24, 25 - vxor 24, 24, 26 - vxor 24, 24, 27 # H +# 8x loops +.macro AES_CIPHER_8x _VCIPHER ST r + \_VCIPHER \ST, \ST, \r + \_VCIPHER \ST+1, \ST+1, \r + \_VCIPHER \ST+2, \ST+2, \r + \_VCIPHER \ST+3, \ST+3, \r + \_VCIPHER \ST+4, \ST+4, \r + \_VCIPHER \ST+5, \ST+5, \r + \_VCIPHER \ST+6, \ST+6, \r + \_VCIPHER \ST+7, \ST+7, \r +.endm - vxor 24, 24, 29 # H + mH +.macro LOOP_8AES_STATE + xxlor 32+23, 1, 1 + xxlor 32+24, 2, 2 + xxlor 32+25, 3, 3 + xxlor 32+26, 4, 4 + AES_CIPHER_8x vcipher, 15, 23 + AES_CIPHER_8x vcipher, 15, 24 + AES_CIPHER_8x vcipher, 15, 25 + AES_CIPHER_8x vcipher, 15, 26 + xxlor 32+23, 5, 5 + xxlor 32+24, 6, 6 + xxlor 32+25, 7, 7 + xxlor 32+26, 8, 8 + AES_CIPHER_8x vcipher, 15, 23 + AES_CIPHER_8x vcipher, 15, 24 + AES_CIPHER_8x vcipher, 15, 25 + AES_CIPHER_8x vcipher, 15, 26 +.endm - # sum hash and reduction with H Poly - vsldoi 27, 23, 23, 8 # swap - vpmsumd 23, 23, 2 - vxor 27, 27, 24 - vxor 27, 23, 27 # 1st Xi - - # 2nd 4x hash - vpmsumd 24, 9, 20 - vpmsumd 25, 6, 21 - vpmsumd 26, 3, 22 - vxor 19, 19, 27 # Xi + X - vpmsumd 23, 12, 19 # H4.L * X.L - - vxor 23, 23, 24 - vxor 23, 23, 25 - vxor 23, 23, 26 # L - - vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L - vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L - vpmsumd 26, 7, 21 - vpmsumd 27, 4, 22 - - vxor 24, 24, 25 - vxor 24, 24, 26 +# +# PPC_GHASH4x(H, S1, S2, S3, S4): Compute 4x hash values based on Karatsuba method. +# H: returning digest +# S#: states +# +# S1 should xor with the previous digest +# +# Xi = v0 +# H Poly = v2 +# Hash keys = v3 - v14 +# Scratch: v23 - v29 +# +.macro PPC_GHASH4x H S1 S2 S3 S4 + + vpmsumd 23, 12, \S1 # H4.L * X.L + vpmsumd 24, 9, \S2 + vpmsumd 25, 6, \S3 + vpmsumd 26, 3, \S4 + + vpmsumd 27, 13, \S1 # H4.L * X.H + H4.H * X.L + vpmsumd 28, 10, \S2 # H3.L * X1.H + H3.H * X1.L + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 # L + + vxor 24, 27, 28 + vpmsumd 25, 7, \S3 + vpmsumd 26, 4, \S4 + + vxor 24, 24, 25 + vxor 24, 24, 26 # M # sum hash and reduction with H Poly - vpmsumd 28, 23, 2 # reduction - - vxor 29, 29, 29 + vpmsumd 28, 23, 2 # reduction - vxor 24, 24, 27 # M - vsldoi 26, 24, 29, 8 # mL - vsldoi 29, 29, 24, 8 # mH - vxor 23, 23, 26 # mL + L + vxor 1, 1, 1 + vsldoi 25, 24, 1, 8 # mL + vsldoi 1, 1, 24, 8 # mH + vxor 23, 23, 25 # mL + L - vsldoi 23, 23, 23, 8 # swap - vxor 23, 23, 28 + # This performs swap and xor like, + # vsldoi 23, 23, 23, 8 # swap + # vxor 23, 23, 28 + xxlor 32+25, 10, 10 + vpermxor 23, 23, 28, 25 - vpmsumd 24, 14, 19 # H4.H * X.H - vpmsumd 25, 11, 20 - vpmsumd 26, 8, 21 - vpmsumd 27, 5, 22 + vpmsumd 26, 14, \S1 # H4.H * X.H + vpmsumd 27, 11, \S2 + vpmsumd 28, 8, \S3 + vpmsumd 29, 5, \S4 - vxor 24, 24, 25 - vxor 24, 24, 26 - vxor 24, 24, 27 # H + vxor 24, 26, 27 + vxor 24, 24, 28 + vxor 24, 24, 29 - vxor 24, 24, 29 # H + mH + vxor 24, 24, 1 # sum hash and reduction with H Poly - vsldoi 27, 23, 23, 8 # swap - vpmsumd 23, 23, 2 - vxor 27, 27, 24 - vxor 23, 23, 27 - - xxlor 32, 23+32, 23+32 # update hash - + vsldoi 25, 23, 23, 8 # swap + vpmsumd 23, 23, 2 + vxor 27, 25, 24 + vxor \H, 23, 27 .endm - # - # Compute update single hash - # -.macro ppc_update_hash_1x - vxor 28, 28, 0 - - vxor 19, 19, 19 +# +# Compute update single ghash +# scratch: v1, v22..v27 +# +.macro PPC_GHASH1x H S1 - vpmsumd 22, 3, 28 # L - vpmsumd 23, 4, 28 # M - vpmsumd 24, 5, 28 # H + vxor 1, 1, 1 - vpmsumd 27, 22, 2 # reduction + vpmsumd 22, 3, \S1 # L + vpmsumd 23, 4, \S1 # M + vpmsumd 24, 5, \S1 # H - vsldoi 25, 23, 19, 8 # mL - vsldoi 26, 19, 23, 8 # mH - vxor 22, 22, 25 # LL + LL - vxor 24, 24, 26 # HH + HH + vpmsumd 27, 22, 2 # reduction - vsldoi 22, 22, 22, 8 # swap - vxor 22, 22, 27 + vsldoi 25, 23, 1, 8 # mL + vsldoi 26, 1, 23, 8 # mH + vxor 22, 22, 25 # LL + LL + vxor 24, 24, 26 # HH + HH - vsldoi 20, 22, 22, 8 # swap - vpmsumd 22, 22, 2 # reduction - vxor 20, 20, 24 - vxor 22, 22, 20 + xxlor 32+25, 10, 10 + vpermxor 22, 22, 27, 25 - vmr 0, 22 # update hash - -.endm - -.macro SAVE_REGS - stdu 1,-640(1) - mflr 0 - - std 14,112(1) - std 15,120(1) - std 16,128(1) - std 17,136(1) - std 18,144(1) - std 19,152(1) - std 20,160(1) - std 21,168(1) - li 9, 256 - stvx 20, 9, 1 - addi 9, 9, 16 - stvx 21, 9, 1 - addi 9, 9, 16 - stvx 22, 9, 1 - addi 9, 9, 16 - stvx 23, 9, 1 - addi 9, 9, 16 - stvx 24, 9, 1 - addi 9, 9, 16 - stvx 25, 9, 1 - addi 9, 9, 16 - stvx 26, 9, 1 - addi 9, 9, 16 - stvx 27, 9, 1 - addi 9, 9, 16 - stvx 28, 9, 1 - addi 9, 9, 16 - stvx 29, 9, 1 - addi 9, 9, 16 - stvx 30, 9, 1 - addi 9, 9, 16 - stvx 31, 9, 1 - stxv 14, 464(1) - stxv 15, 480(1) - stxv 16, 496(1) - stxv 17, 512(1) - stxv 18, 528(1) - stxv 19, 544(1) - stxv 20, 560(1) - stxv 21, 576(1) - stxv 22, 592(1) - std 0, 656(1) -.endm - -.macro RESTORE_REGS - lxv 14, 464(1) - lxv 15, 480(1) - lxv 16, 496(1) - lxv 17, 512(1) - lxv 18, 528(1) - lxv 19, 544(1) - lxv 20, 560(1) - lxv 21, 576(1) - lxv 22, 592(1) - li 9, 256 - lvx 20, 9, 1 - addi 9, 9, 16 - lvx 21, 9, 1 - addi 9, 9, 16 - lvx 22, 9, 1 - addi 9, 9, 16 - lvx 23, 9, 1 - addi 9, 9, 16 - lvx 24, 9, 1 - addi 9, 9, 16 - lvx 25, 9, 1 - addi 9, 9, 16 - lvx 26, 9, 1 - addi 9, 9, 16 - lvx 27, 9, 1 - addi 9, 9, 16 - lvx 28, 9, 1 - addi 9, 9, 16 - lvx 29, 9, 1 - addi 9, 9, 16 - lvx 30, 9, 1 - addi 9, 9, 16 - lvx 31, 9, 1 - - ld 0, 656(1) - ld 14,112(1) - ld 15,120(1) - ld 16,128(1) - ld 17,136(1) - ld 18,144(1) - ld 19,152(1) - ld 20,160(1) - ld 21,168(1) - - mtlr 0 - addi 1, 1, 640 + vsldoi 23, 22, 22, 8 # swap + vpmsumd 22, 22, 2 # reduction + vxor 23, 23, 24 + vxor \H, 22, 23 .endm +# +# LOAD_HASH_TABLE +# Xi = v0 +# H Poly = v2 +# Hash keys = v3 - v14 +# .macro LOAD_HASH_TABLE # Load Xi lxvb16x 32, 0, 8 # load Xi @@ -557,657 +298,434 @@ lxvd2x 14+32, 10, 8 # H^4h .endm - # - # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len, - # const char *rk, unsigned char iv[16], void *Xip); - # - # r3 - inp - # r4 - out - # r5 - len - # r6 - AES round keys - # r7 - iv and other data - # r8 - Xi, HPoli, hash keys - # - # rounds is at offset 240 in rk - # Xi is at 0 in gcm_table (Xip). - # -_GLOBAL(aes_p10_gcm_encrypt) -.align 5 - - SAVE_REGS - - LOAD_HASH_TABLE - - # initialize ICB: GHASH( IV ), IV - r7 - lxvb16x 30+32, 0, 7 # load IV - v30 - - mr 12, 5 # length - li 11, 0 # block index - - # counter 1 - vxor 31, 31, 31 - vspltisb 22, 1 - vsldoi 31, 31, 22,1 # counter 1 - - # load round key to VSR - lxv 0, 0(6) - lxv 1, 0x10(6) - lxv 2, 0x20(6) - lxv 3, 0x30(6) - lxv 4, 0x40(6) - lxv 5, 0x50(6) - lxv 6, 0x60(6) - lxv 7, 0x70(6) - lxv 8, 0x80(6) - lxv 9, 0x90(6) - lxv 10, 0xa0(6) - - # load rounds - 10 (128), 12 (192), 14 (256) - lwz 9,240(6) - - # - # vxor state, state, w # addroundkey - xxlor 32+29, 0, 0 - vxor 15, 30, 29 # IV + round key - add round key 0 - - cmpdi 9, 10 - beq Loop_aes_gcm_8x - - # load 2 more round keys (v11, v12) - lxv 11, 0xb0(6) - lxv 12, 0xc0(6) - - cmpdi 9, 12 - beq Loop_aes_gcm_8x - - # load 2 more round keys (v11, v12, v13, v14) - lxv 13, 0xd0(6) - lxv 14, 0xe0(6) - cmpdi 9, 14 - beq Loop_aes_gcm_8x - - b aes_gcm_out - -.align 5 -Loop_aes_gcm_8x: - mr 14, 3 - mr 9, 4 - - # - # check partial block - # -Continue_partial_check: - ld 15, 56(7) - cmpdi 15, 0 - beq Continue - bgt Final_block - cmpdi 15, 16 - blt Final_block - -Continue: - # n blcoks - li 10, 128 - divdu 10, 12, 10 # n 128 bytes-blocks - cmpdi 10, 0 - beq Loop_last_block - - vaddudm 30, 30, 31 # IV + counter - vxor 16, 30, 29 - vaddudm 30, 30, 31 - vxor 17, 30, 29 - vaddudm 30, 30, 31 - vxor 18, 30, 29 - vaddudm 30, 30, 31 - vxor 19, 30, 29 - vaddudm 30, 30, 31 - vxor 20, 30, 29 - vaddudm 30, 30, 31 - vxor 21, 30, 29 - vaddudm 30, 30, 31 - vxor 22, 30, 29 - - mtctr 10 - - li 15, 16 - li 16, 32 - li 17, 48 - li 18, 64 - li 19, 80 - li 20, 96 - li 21, 112 - - lwz 10, 240(6) - -Loop_8x_block: - - lxvb16x 15, 0, 14 # load block - lxvb16x 16, 15, 14 # load block - lxvb16x 17, 16, 14 # load block - lxvb16x 18, 17, 14 # load block - lxvb16x 19, 18, 14 # load block - lxvb16x 20, 19, 14 # load block - lxvb16x 21, 20, 14 # load block - lxvb16x 22, 21, 14 # load block - addi 14, 14, 128 - - Loop_aes_middle8x - - xxlor 23+32, 10, 10 - - cmpdi 10, 10 - beq Do_next_ghash - - # 192 bits - xxlor 24+32, 11, 11 - - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 - - vcipher 15, 15, 24 - vcipher 16, 16, 24 - vcipher 17, 17, 24 - vcipher 18, 18, 24 - vcipher 19, 19, 24 - vcipher 20, 20, 24 - vcipher 21, 21, 24 - vcipher 22, 22, 24 - - xxlor 23+32, 12, 12 - - cmpdi 10, 12 - beq Do_next_ghash - - # 256 bits - xxlor 24+32, 13, 13 - - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 - - vcipher 15, 15, 24 - vcipher 16, 16, 24 - vcipher 17, 17, 24 - vcipher 18, 18, 24 - vcipher 19, 19, 24 - vcipher 20, 20, 24 - vcipher 21, 21, 24 - vcipher 22, 22, 24 - - xxlor 23+32, 14, 14 - - cmpdi 10, 14 - beq Do_next_ghash - b aes_gcm_out - -Do_next_ghash: - - # - # last round - vcipherlast 15, 15, 23 - vcipherlast 16, 16, 23 - - xxlxor 47, 47, 15 - stxvb16x 47, 0, 9 # store output - xxlxor 48, 48, 16 - stxvb16x 48, 15, 9 # store output - - vcipherlast 17, 17, 23 - vcipherlast 18, 18, 23 - - xxlxor 49, 49, 17 - stxvb16x 49, 16, 9 # store output - xxlxor 50, 50, 18 - stxvb16x 50, 17, 9 # store output - - vcipherlast 19, 19, 23 - vcipherlast 20, 20, 23 - - xxlxor 51, 51, 19 - stxvb16x 51, 18, 9 # store output - xxlxor 52, 52, 20 - stxvb16x 52, 19, 9 # store output - - vcipherlast 21, 21, 23 - vcipherlast 22, 22, 23 - - xxlxor 53, 53, 21 - stxvb16x 53, 20, 9 # store output - xxlxor 54, 54, 22 - stxvb16x 54, 21, 9 # store output - - addi 9, 9, 128 - - # ghash here - ppc_aes_gcm_ghash2_4x - - xxlor 27+32, 0, 0 - vaddudm 30, 30, 31 # IV + counter - vmr 29, 30 - vxor 15, 30, 27 # add round key - vaddudm 30, 30, 31 - vxor 16, 30, 27 - vaddudm 30, 30, 31 - vxor 17, 30, 27 - vaddudm 30, 30, 31 - vxor 18, 30, 27 - vaddudm 30, 30, 31 - vxor 19, 30, 27 - vaddudm 30, 30, 31 - vxor 20, 30, 27 - vaddudm 30, 30, 31 - vxor 21, 30, 27 - vaddudm 30, 30, 31 - vxor 22, 30, 27 - - addi 12, 12, -128 - addi 11, 11, 128 - - bdnz Loop_8x_block - - vmr 30, 29 - stxvb16x 30+32, 0, 7 # update IV - -Loop_last_block: - cmpdi 12, 0 - beq aes_gcm_out - - # loop last few blocks +################################################################################ +# Compute AES and ghash one block at a time. +# r23: AES rounds +# v30: current IV +# vs0: roundkey 0 +# +################################################################################ +SYM_FUNC_START_LOCAL(aes_gcm_crypt_1x) + + cmpdi 5, 16 + bge __More_1x + blr +__More_1x: li 10, 16 - divdu 10, 12, 10 - - mtctr 10 - - lwz 10, 240(6) - - cmpdi 12, 16 - blt Final_block - -Next_rem_block: - lxvb16x 15, 0, 14 # load block - - Loop_aes_middle_1x - - xxlor 23+32, 10, 10 - - cmpdi 10, 10 - beq Do_next_1x - - # 192 bits - xxlor 24+32, 11, 11 - - vcipher 15, 15, 23 - vcipher 15, 15, 24 - - xxlor 23+32, 12, 12 + divdu 12, 5, 10 + + xxlxor 32+15, 32+30, 0 + + # Pre-load 8 AES rounds to scratch vectors. + xxlor 32+16, 1, 1 + xxlor 32+17, 2, 2 + xxlor 32+18, 3, 3 + xxlor 32+19, 4, 4 + xxlor 32+20, 5, 5 + xxlor 32+21, 6, 6 + xxlor 32+28, 7, 7 + xxlor 32+29, 8, 8 + lwz 23, 240(6) # n rounds + addi 22, 23, -9 # remaing AES rounds - cmpdi 10, 12 - beq Do_next_1x - - # 256 bits - xxlor 24+32, 13, 13 - - vcipher 15, 15, 23 - vcipher 15, 15, 24 - - xxlor 23+32, 14, 14 - - cmpdi 10, 14 - beq Do_next_1x - -Do_next_1x: - vcipherlast 15, 15, 23 - - xxlxor 47, 47, 15 - stxvb16x 47, 0, 9 # store output - addi 14, 14, 16 - addi 9, 9, 16 - - vmr 28, 15 - ppc_update_hash_1x - - addi 12, 12, -16 - addi 11, 11, 16 - xxlor 19+32, 0, 0 - vaddudm 30, 30, 31 # IV + counter - vxor 15, 30, 19 # add round key - - bdnz Next_rem_block - - li 15, 0 - std 15, 56(7) # clear partial? - stxvb16x 30+32, 0, 7 # update IV cmpdi 12, 0 - beq aes_gcm_out - -Final_block: - lwz 10, 240(6) - Loop_aes_middle_1x - - xxlor 23+32, 10, 10 - - cmpdi 10, 10 - beq Do_final_1x - - # 192 bits - xxlor 24+32, 11, 11 - - vcipher 15, 15, 23 - vcipher 15, 15, 24 - - xxlor 23+32, 12, 12 - - cmpdi 10, 12 - beq Do_final_1x - - # 256 bits - xxlor 24+32, 13, 13 - - vcipher 15, 15, 23 - vcipher 15, 15, 24 + bgt __Loop_1x + blr - xxlor 23+32, 14, 14 +__Loop_1x: + mtctr 22 + addi 10, 6, 144 + vcipher 15, 15, 16 + vcipher 15, 15, 17 + vcipher 15, 15, 18 + vcipher 15, 15, 19 + vcipher 15, 15, 20 + vcipher 15, 15, 21 + vcipher 15, 15, 28 + vcipher 15, 15, 29 - cmpdi 10, 14 - beq Do_final_1x +__Loop_aes_1state: + lxv 32+1, 0(10) + vcipher 15, 15, 1 + addi 10, 10, 16 + bdnz __Loop_aes_1state + lxv 32+1, 0(10) # last round key + lxvb16x 11, 0, 14 # load input block + vcipherlast 15, 15, 1 + + xxlxor 32+15, 32+15, 11 + stxvb16x 32+15, 0, 9 # store output + addi 14, 14, 16 + addi 9, 9, 16 -Do_final_1x: - vcipherlast 15, 15, 23 + cmpdi 24, 0 # decrypt? + bne __Encrypt_1x + xxlor 15+32, 11, 11 +__Encrypt_1x: + vxor 15, 15, 0 + PPC_GHASH1x 0, 15 - # check partial block - li 21, 0 # encrypt - ld 15, 56(7) # partial? - cmpdi 15, 0 - beq Normal_block - bl Do_partial_block + addi 5, 5, -16 + addi 11, 11, 16 + vadduwm 30, 30, 31 # IV + counter + xxlxor 32+15, 32+30, 0 + addi 12, 12, -1 cmpdi 12, 0 - ble aes_gcm_out + bgt __Loop_1x - b Continue_partial_check - -Normal_block: - lxvb16x 15, 0, 14 # load last block - xxlxor 47, 47, 15 - - # create partial block mask - li 15, 16 - sub 15, 15, 12 # index to the mask - - vspltisb 16, -1 # first 16 bytes - 0xffff...ff - vspltisb 17, 0 # second 16 bytes - 0x0000...00 - li 10, 192 - stvx 16, 10, 1 + stxvb16x 32+30, 0, 7 # update IV + stxvb16x 32+0, 0, 8 # update Xi + blr +SYM_FUNC_END(aes_gcm_crypt_1x) + +################################################################################ +# Process a normal partial block when we come here. +# Compute partial mask, Load and store partial block to stack. +# Update partial_len and pblock. +# pblock is (encrypted ^ AES state) for encrypt +# and (input ^ AES state) for decrypt. +# +################################################################################ +SYM_FUNC_START_LOCAL(__Process_partial) + + # create partial mask + vspltisb 16, -1 + li 12, 16 + sub 12, 12, 5 + sldi 12, 12, 3 + mtvsrdd 32+17, 0, 12 + vslo 16, 16, 17 # partial block mask + + lxvb16x 11, 0, 14 # load partial block + xxland 11, 11, 32+16 + + # AES crypt partial + xxlxor 32+15, 32+30, 0 + lwz 23, 240(6) # n rounds + addi 22, 23, -1 # loop - 1 + mtctr 22 + addi 10, 6, 16 + +__Loop_aes_pstate: + lxv 32+1, 0(10) + vcipher 15, 15, 1 addi 10, 10, 16 - stvx 17, 10, 1 - - addi 10, 1, 192 - lxvb16x 16, 15, 10 # load partial block mask - xxland 47, 47, 16 - - vmr 28, 15 - ppc_update_hash_1x + bdnz __Loop_aes_pstate + lxv 32+1, 0(10) # last round key + vcipherlast 15, 15, 1 - # * should store only the remaining bytes. - bl Write_partial_block - - stxvb16x 30+32, 0, 7 # update IV - std 12, 56(7) # update partial? - li 16, 16 + xxlxor 32+15, 32+15, 11 + vand 15, 15, 16 - stxvb16x 32, 0, 8 # write out Xi - stxvb16x 32, 16, 8 # write out Xi - b aes_gcm_out - - # - # Compute data mask - # -.macro GEN_MASK _mask _start _end - vspltisb 16, -1 # first 16 bytes - 0xffff...ff - vspltisb 17, 0 # second 16 bytes - 0x0000...00 - li 10, 192 - stxvb16x 17+32, 10, 1 - add 10, 10, \_start - stxvb16x 16+32, 10, 1 - add 10, 10, \_end - stxvb16x 17+32, 10, 1 - - addi 10, 1, 192 - lxvb16x \_mask, 0, 10 # load partial block mask -.endm + # AES crypt output v15 + # Write partial + li 10, 224 + stxvb16x 15+32, 10, 1 # write v15 to stack + addi 10, 1, 223 + addi 12, 9, -1 + mtctr 5 # partial block len +__Write_partial: + lbzu 22, 1(10) + stbu 22, 1(12) + bdnz __Write_partial + + cmpdi 24, 0 # decrypt? + bne __Encrypt_partial + xxlor 32+15, 11, 11 # decrypt using the input block +__Encrypt_partial: + #vxor 15, 15, 0 # ^ previous hash + #PPC_GHASH1x 0, 15 + + add 14, 14, 5 + add 9, 9, 5 + std 5, 56(7) # update partial + sub 11, 11, 5 + li 5, 0 # done last byte - # - # Handle multiple partial blocks for encrypt and decrypt - # operations. - # -SYM_FUNC_START_LOCAL(Do_partial_block) - add 17, 15, 5 - cmpdi 17, 16 - bgt Big_block - GEN_MASK 18, 15, 5 - b _Partial -SYM_FUNC_END(Do_partial_block) -Big_block: + # + # Don't increase IV since this is the last partial. + # It should get updated in gcm_update if no more data blocks. + #vadduwm 30, 30, 31 # increase IV + stxvb16x 32+30, 0, 7 # update IV + li 10, 64 + stxvb16x 32+0, 0, 8 # Update X1 + stxvb16x 32+15, 10, 7 # Update pblock + blr +SYM_FUNC_END(__Process_partial) + +################################################################################ +# Combine partial blocks and ghash when we come here. +# +# The partial block has to be shifted to the right location to encrypt/decrypt +# and compute ghash if combing the previous partial block is needed. +# - Compute ghash for a full block. Clear Partial_len and pblock. Update IV. +# Write Xi. +# - Don't compute ghash if not full block. gcm_update will take care of it +# is the last block. Update Partial_len and pblock. +# +################################################################################ +SYM_FUNC_START_LOCAL(__Combine_partial) + + ld 12, 56(7) + mr 21, 5 # these bytes to be processed + + li 17, 0 li 16, 16 - GEN_MASK 18, 15, 16 - -_Partial: - lxvb16x 17+32, 0, 14 # load last block - sldi 16, 15, 3 - mtvsrdd 32+16, 0, 16 - vsro 17, 17, 16 - xxlxor 47, 47, 17+32 - xxland 47, 47, 18 - - vxor 0, 0, 0 # clear Xi - vmr 28, 15 - - cmpdi 21, 0 # encrypt/decrypt ops? - beq Skip_decrypt - xxland 32+28, 32+17, 18 - -Skip_decrypt: - - ppc_update_hash_1x + sub 22, 16, 12 # bytes to complete a block + sub 17, 22, 5 # remaining bytes in a block + cmpdi 5, 16 + ble __Inp_msg_less16 + li 17, 0 + mr 21, 22 + b __Combine_continue +__Inp_msg_less16: + cmpd 22, 5 + bgt __Combine_continue + li 17, 0 + mr 21, 22 # these bytes to be processed + +__Combine_continue: + # load msg and shift to the proper location and mask + vspltisb 16, -1 + sldi 15, 12, 3 + mtvsrdd 32+17, 0, 15 + vslo 16, 16, 17 + vsro 16, 16, 17 + sldi 15, 17, 3 + mtvsrdd 32+17, 0, 15 + vsro 16, 16, 17 + vslo 16, 16, 17 # mask + + lxvb16x 32+19, 0, 14 # load partial block + sldi 15, 12, 3 + mtvsrdd 32+17, 0, 15 + vsro 19, 19, 17 # 0x00..xxxx??..?? + sldi 15, 17, 3 + mtvsrdd 32+17, 0, 15 + vsro 19, 19, 17 # 0x00..xxxx + vslo 19, 19, 17 # shift back to form 0x00..xxxx00..00 + + # AES crypt partial + xxlxor 32+15, 32+30, 0 + lwz 23, 240(6) # n rounds + addi 22, 23, -1 # loop - 1 + mtctr 22 + addi 10, 6, 16 + +__Loop_aes_cpstate: + lxv 32+1, 0(10) + vcipher 15, 15, 1 + addi 10, 10, 16 + bdnz __Loop_aes_cpstate + lxv 32+1, 0(10) # last round key + vcipherlast 15, 15, 1 - li 16, 16 - lxvb16x 32+29, 16, 8 - vxor 0, 0, 29 - stxvb16x 32, 0, 8 # save Xi - stxvb16x 32, 16, 8 # save Xi - - # store partial block - # loop the rest of the stream if any - sldi 16, 15, 3 - mtvsrdd 32+16, 0, 16 - vslo 15, 15, 16 - #stxvb16x 15+32, 0, 9 # last block + vxor 15, 15, 19 + vand 15, 15, 16 - li 16, 16 - sub 17, 16, 15 # 16 - partial - - add 16, 15, 5 - cmpdi 16, 16 - bgt Larger_16 - mr 17, 5 -Larger_16: - - # write partial - li 10, 192 - stxvb16x 15+32, 10, 1 # save current block - - addi 10, 9, -1 - addi 16, 1, 191 - mtctr 17 # move partial byte count - -Write_last_partial: - lbzu 18, 1(16) - stbu 18, 1(10) - bdnz Write_last_partial - # Complete loop partial - - add 14, 14, 17 - add 9, 9, 17 - sub 12, 12, 17 - add 11, 11, 17 - - add 15, 15, 5 - cmpdi 15, 16 - blt Save_partial - - vaddudm 30, 30, 31 - stxvb16x 30+32, 0, 7 # update IV - xxlor 32+29, 0, 0 - vxor 15, 30, 29 # IV + round key - add round key 0 - li 15, 0 - std 15, 56(7) # partial done - clear - b Partial_done -Save_partial: - std 15, 56(7) # partial - -Partial_done: + # AES crypt output v15 + # Write partial + li 10, 224 + stxvb16x 15+32, 10, 1 # write v15 to stack + addi 10, 1, 223 + add 10, 10, 12 # add offset + addi 15, 9, -1 + mtctr 21 # partial block len +__Write_combine_partial: + lbzu 22, 1(10) + stbu 22, 1(15) + bdnz __Write_combine_partial + + add 14, 14, 21 + add 11, 11, 21 + add 9, 9, 21 + sub 5, 5, 21 + + # Encrypt/Decrypt? + cmpdi 24, 0 # decrypt? + bne __Encrypt_combine_partial + vmr 15, 19 # decrypt using the input block + +__Encrypt_combine_partial: + # + # Update partial flag and combine ghash. +__Update_partial_ghash: + li 10, 64 + lxvb16x 32+17, 10, 7 # load previous pblock + add 12, 12, 21 # combined pprocessed + vxor 15, 15, 17 # combined pblock + + cmpdi 12, 16 + beq __Clear_partial_flag + std 12, 56(7) # update partial len + stxvb16x 32+15, 10, 7 # Update current pblock blr - # - # Write partial block - # r9 - output - # r12 - remaining bytes - # v15 - partial input data - # -SYM_FUNC_START_LOCAL(Write_partial_block) - li 10, 192 - stxvb16x 15+32, 10, 1 # last block - - addi 10, 9, -1 - addi 16, 1, 191 - - mtctr 12 # remaining bytes - li 15, 0 - -Write_last_byte: - lbzu 14, 1(16) - stbu 14, 1(10) - bdnz Write_last_byte +__Clear_partial_flag: + li 12, 0 + std 12, 56(7) + # Update IV and ghash here + vadduwm 30, 30, 31 # increase IV + stxvb16x 32+30, 0, 7 # update IV + + # v15 either is either (input blockor encrypted)^(AES state) + vxor 15, 15, 0 + PPC_GHASH1x 0, 15 + stxvb16x 32+0, 10, 7 # update pblock for debug? + stxvb16x 32+0, 0, 8 # update Xi blr -SYM_FUNC_END(Write_partial_block) +SYM_FUNC_END(__Combine_partial) -aes_gcm_out: - # out = state - stxvb16x 32, 0, 8 # write out Xi - add 3, 11, 12 # return count +################################################################################ +# gcm_update(iv, Xi) - compute last hash +# +################################################################################ +SYM_FUNC_START(gcm_update) - RESTORE_REGS - blr + ld 10, 56(3) + cmpdi 10, 0 + beq __no_update - # - # 8x Decrypt - # -_GLOBAL(aes_p10_gcm_decrypt) -.align 5 + lxvb16x 32, 0, 4 # load Xi + # load Hash - h^4, h^3, h^2, h + li 10, 32 + lxvd2x 2+32, 10, 4 # H Poli + li 10, 48 + lxvd2x 3+32, 10, 4 # Hl + li 10, 64 + lxvd2x 4+32, 10, 4 # H + li 10, 80 + lxvd2x 5+32, 10, 4 # Hh + + addis 11, 2, permx@toc@ha + addi 11, 11, permx@toc@l + lxv 10, 0(11) # vs10: vpermxor vector + + li 9, 64 + lxvb16x 32+6, 9, 3 # load pblock + vxor 6, 6, 0 + + vxor 1, 1, 1 + vpmsumd 12, 3, 6 # L + vpmsumd 13, 4, 6 # M + vpmsumd 14, 5, 6 # H + vpmsumd 17, 12, 2 # reduction + vsldoi 15, 13, 1, 8 # mL + vsldoi 16, 1, 13, 8 # mH + vxor 12, 12, 15 # LL + LL + vxor 14, 14, 16 # HH + HH + xxlor 32+15, 10, 10 + vpermxor 12, 12, 17, 15 + vsldoi 13, 12, 12, 8 # swap + vpmsumd 12, 12, 2 # reduction + vxor 13, 13, 14 + vxor 7, 12, 13 + + #vxor 0, 0, 0 + #stxvb16x 32+0, 9, 3 + li 10, 0 + std 10, 56(3) + stxvb16x 32+7, 0, 4 + +__no_update: + blr +SYM_FUNC_END(gcm_update) + +################################################################################ +# aes_p10_gcm_encrypt (const void *inp, void *out, size_t len, +# const char *rk, unsigned char iv[16], void *Xip); +# +# r3 - inp +# r4 - out +# r5 - len +# r6 - AES round keys +# r7 - iv and other data +# r8 - Xi, HPoli, hash keys +# +# rounds is at offset 240 in rk +# Xi is at 0 in gcm_table (Xip). +# +################################################################################ +SYM_FUNC_START(aes_p10_gcm_encrypt) + + cmpdi 5, 0 + ble __Invalid_msg_len SAVE_REGS - LOAD_HASH_TABLE # initialize ICB: GHASH( IV ), IV - r7 lxvb16x 30+32, 0, 7 # load IV - v30 - mr 12, 5 # length - li 11, 0 # block index + mr 14, 3 + mr 9, 4 # counter 1 vxor 31, 31, 31 vspltisb 22, 1 vsldoi 31, 31, 22,1 # counter 1 - # load round key to VSR - lxv 0, 0(6) - lxv 1, 0x10(6) - lxv 2, 0x20(6) - lxv 3, 0x30(6) - lxv 4, 0x40(6) - lxv 5, 0x50(6) - lxv 6, 0x60(6) - lxv 7, 0x70(6) - lxv 8, 0x80(6) - lxv 9, 0x90(6) - lxv 10, 0xa0(6) + addis 11, 2, permx@toc@ha + addi 11, 11, permx@toc@l + lxv 10, 0(11) # vs10: vpermxor vector + li 11, 0 + + # load 9 round keys to VSR + lxv 0, 0(6) # round key 0 + lxv 1, 16(6) # round key 1 + lxv 2, 32(6) # round key 2 + lxv 3, 48(6) # round key 3 + lxv 4, 64(6) # round key 4 + lxv 5, 80(6) # round key 5 + lxv 6, 96(6) # round key 6 + lxv 7, 112(6) # round key 7 + lxv 8, 128(6) # round key 8 # load rounds - 10 (128), 12 (192), 14 (256) - lwz 9,240(6) + lwz 23, 240(6) # n rounds + li 24, 1 # encrypt +__Process_encrypt: # - # vxor state, state, w # addroundkey - xxlor 32+29, 0, 0 - vxor 15, 30, 29 # IV + round key - add round key 0 - - cmpdi 9, 10 - beq Loop_aes_gcm_8x_dec - - # load 2 more round keys (v11, v12) - lxv 11, 0xb0(6) - lxv 12, 0xc0(6) - - cmpdi 9, 12 - beq Loop_aes_gcm_8x_dec - - # load 2 more round keys (v11, v12, v13, v14) - lxv 13, 0xd0(6) - lxv 14, 0xe0(6) - cmpdi 9, 14 - beq Loop_aes_gcm_8x_dec + # Process different blocks + # + ld 12, 56(7) + cmpdi 12, 0 + bgt __Do_combine_enc + cmpdi 5, 128 + blt __Process_more_enc + +# +# Process 8x AES/GCM blocks +# +__Process_8x_enc: + # 8x blcoks + li 10, 128 + divdu 12, 5, 10 # n 128 bytes-blocks - b aes_gcm_out + addi 12, 12, -1 # loop - 1 -.align 5 -Loop_aes_gcm_8x_dec: - mr 14, 3 - mr 9, 4 + vmr 15, 30 # first state: IV + vadduwm 16, 15, 31 # state + counter + vadduwm 17, 16, 31 + vadduwm 18, 17, 31 + vadduwm 19, 18, 31 + vadduwm 20, 19, 31 + vadduwm 21, 20, 31 + vadduwm 22, 21, 31 + xxlor 9, 32+22, 32+22 # save last state - # - # check partial block - # -Continue_partial_check_dec: - ld 15, 56(7) - cmpdi 15, 0 - beq Continue_dec - bgt Final_block_dec - cmpdi 15, 16 - blt Final_block_dec - -Continue_dec: - # n blcoks - li 10, 128 - divdu 10, 12, 10 # n 128 bytes-blocks - cmpdi 10, 0 - beq Loop_last_block_dec - - vaddudm 30, 30, 31 # IV + counter - vxor 16, 30, 29 - vaddudm 30, 30, 31 - vxor 17, 30, 29 - vaddudm 30, 30, 31 - vxor 18, 30, 29 - vaddudm 30, 30, 31 - vxor 19, 30, 29 - vaddudm 30, 30, 31 - vxor 20, 30, 29 - vaddudm 30, 30, 31 - vxor 21, 30, 29 - vaddudm 30, 30, 31 - vxor 22, 30, 29 - - mtctr 10 + # vxor state, state, w # addroundkey + xxlor 32+29, 0, 0 + vxor 15, 15, 29 # IV + round key - add round key 0 + vxor 16, 16, 29 + vxor 17, 17, 29 + vxor 18, 18, 29 + vxor 19, 19, 29 + vxor 20, 20, 29 + vxor 21, 21, 29 + vxor 22, 22, 29 li 15, 16 li 16, 32 @@ -1217,305 +735,502 @@ Continue_dec: li 20, 96 li 21, 112 - lwz 10, 240(6) - -Loop_8x_block_dec: - - lxvb16x 15, 0, 14 # load block - lxvb16x 16, 15, 14 # load block - lxvb16x 17, 16, 14 # load block - lxvb16x 18, 17, 14 # load block - lxvb16x 19, 18, 14 # load block - lxvb16x 20, 19, 14 # load block - lxvb16x 21, 20, 14 # load block - lxvb16x 22, 21, 14 # load block - addi 14, 14, 128 - - Loop_aes_middle8x - - xxlor 23+32, 10, 10 - - cmpdi 10, 10 - beq Do_next_ghash_dec - - # 192 bits - xxlor 24+32, 11, 11 - - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 - - vcipher 15, 15, 24 - vcipher 16, 16, 24 - vcipher 17, 17, 24 - vcipher 18, 18, 24 - vcipher 19, 19, 24 - vcipher 20, 20, 24 - vcipher 21, 21, 24 - vcipher 22, 22, 24 - - xxlor 23+32, 12, 12 - - cmpdi 10, 12 - beq Do_next_ghash_dec - - # 256 bits - xxlor 24+32, 13, 13 - - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 - - vcipher 15, 15, 24 - vcipher 16, 16, 24 - vcipher 17, 17, 24 - vcipher 18, 18, 24 - vcipher 19, 19, 24 - vcipher 20, 20, 24 - vcipher 21, 21, 24 - vcipher 22, 22, 24 - - xxlor 23+32, 14, 14 - - cmpdi 10, 14 - beq Do_next_ghash_dec - b aes_gcm_out + # + # Pre-compute first 8 AES state and leave 1/3/5 more rounds + # for the loop. + # + addi 22, 23, -9 # process 8 keys + mtctr 22 # AES key loop + addi 10, 6, 144 -Do_next_ghash_dec: + LOOP_8AES_STATE # process 8 AES keys - # - # last round - vcipherlast 15, 15, 23 - vcipherlast 16, 16, 23 - - xxlxor 47, 47, 15 - stxvb16x 47, 0, 9 # store output - xxlxor 48, 48, 16 - stxvb16x 48, 15, 9 # store output - - vcipherlast 17, 17, 23 - vcipherlast 18, 18, 23 - - xxlxor 49, 49, 17 - stxvb16x 49, 16, 9 # store output - xxlxor 50, 50, 18 - stxvb16x 50, 17, 9 # store output - - vcipherlast 19, 19, 23 - vcipherlast 20, 20, 23 - - xxlxor 51, 51, 19 - stxvb16x 51, 18, 9 # store output - xxlxor 52, 52, 20 - stxvb16x 52, 19, 9 # store output - - vcipherlast 21, 21, 23 - vcipherlast 22, 22, 23 - - xxlxor 53, 53, 21 - stxvb16x 53, 20, 9 # store output - xxlxor 54, 54, 22 - stxvb16x 54, 21, 9 # store output - - addi 9, 9, 128 - - xxlor 15+32, 15, 15 - xxlor 16+32, 16, 16 - xxlor 17+32, 17, 17 - xxlor 18+32, 18, 18 - xxlor 19+32, 19, 19 - xxlor 20+32, 20, 20 - xxlor 21+32, 21, 21 - xxlor 22+32, 22, 22 +__PreLoop_aes_state: + lxv 32+1, 0(10) # round key + AES_CIPHER_8x vcipher 15 1 + addi 10, 10, 16 + bdnz __PreLoop_aes_state + lxv 32+1, 0(10) # last round key (v1) + + cmpdi 12, 0 # Only one loop (8 block) + beq __Finish_ghash + +# +# Loop 8x blocks and compute ghash +# +__Loop_8x_block_enc: + vcipherlast 15, 15, 1 + vcipherlast 16, 16, 1 + vcipherlast 17, 17, 1 + vcipherlast 18, 18, 1 + vcipherlast 19, 19, 1 + vcipherlast 20, 20, 1 + vcipherlast 21, 21, 1 + vcipherlast 22, 22, 1 + + lxvb16x 32+23, 0, 14 # load block + lxvb16x 32+24, 15, 14 # load block + lxvb16x 32+25, 16, 14 # load block + lxvb16x 32+26, 17, 14 # load block + lxvb16x 32+27, 18, 14 # load block + lxvb16x 32+28, 19, 14 # load block + lxvb16x 32+29, 20, 14 # load block + lxvb16x 32+30, 21, 14 # load block + addi 14, 14, 128 + + vxor 15, 15, 23 + vxor 16, 16, 24 + vxor 17, 17, 25 + vxor 18, 18, 26 + vxor 19, 19, 27 + vxor 20, 20, 28 + vxor 21, 21, 29 + vxor 22, 22, 30 + + stxvb16x 47, 0, 9 # store output + stxvb16x 48, 15, 9 # store output + stxvb16x 49, 16, 9 # store output + stxvb16x 50, 17, 9 # store output + stxvb16x 51, 18, 9 # store output + stxvb16x 52, 19, 9 # store output + stxvb16x 53, 20, 9 # store output + stxvb16x 54, 21, 9 # store output + addi 9, 9, 128 # ghash here - ppc_aes_gcm_ghash2_4x - - xxlor 27+32, 0, 0 - vaddudm 30, 30, 31 # IV + counter - vmr 29, 30 - vxor 15, 30, 27 # add round key - vaddudm 30, 30, 31 - vxor 16, 30, 27 - vaddudm 30, 30, 31 - vxor 17, 30, 27 - vaddudm 30, 30, 31 - vxor 18, 30, 27 - vaddudm 30, 30, 31 - vxor 19, 30, 27 - vaddudm 30, 30, 31 - vxor 20, 30, 27 - vaddudm 30, 30, 31 - vxor 21, 30, 27 - vaddudm 30, 30, 31 - vxor 22, 30, 27 - - addi 12, 12, -128 + vxor 15, 15, 0 + PPC_GHASH4x 0, 15, 16, 17, 18 + + vxor 19, 19, 0 + PPC_GHASH4x 0, 19, 20, 21, 22 + + xxlor 32+15, 9, 9 # last state + vadduwm 15, 15, 31 # state + counter + vadduwm 16, 15, 31 + vadduwm 17, 16, 31 + vadduwm 18, 17, 31 + vadduwm 19, 18, 31 + vadduwm 20, 19, 31 + vadduwm 21, 20, 31 + vadduwm 22, 21, 31 + xxlor 9, 32+22, 32+22 # save last state + + xxlor 32+27, 0, 0 # restore roundkey 0 + vxor 15, 15, 27 # IV + round key - add round key 0 + vxor 16, 16, 27 + vxor 17, 17, 27 + vxor 18, 18, 27 + vxor 19, 19, 27 + vxor 20, 20, 27 + vxor 21, 21, 27 + vxor 22, 22, 27 + + addi 5, 5, -128 addi 11, 11, 128 - bdnz Loop_8x_block_dec - - vmr 30, 29 - stxvb16x 30+32, 0, 7 # update IV - -Loop_last_block_dec: - cmpdi 12, 0 - beq aes_gcm_out - - # loop last few blocks - li 10, 16 - divdu 10, 12, 10 - - mtctr 10 - - lwz 10, 240(6) - - cmpdi 12, 16 - blt Final_block_dec - -Next_rem_block_dec: - lxvb16x 15, 0, 14 # load block - - Loop_aes_middle_1x - - xxlor 23+32, 10, 10 + LOOP_8AES_STATE # process 8 AES keys + mtctr 22 # AES key loop + addi 10, 6, 144 +__LastLoop_aes_state: + lxv 32+1, 0(10) # round key + AES_CIPHER_8x vcipher 15 1 + addi 10, 10, 16 + bdnz __LastLoop_aes_state + lxv 32+1, 0(10) # last round key (v1) - cmpdi 10, 10 - beq Do_next_1x_dec + addi 12, 12, -1 + cmpdi 12, 0 + bne __Loop_8x_block_enc + +__Finish_ghash: + vcipherlast 15, 15, 1 + vcipherlast 16, 16, 1 + vcipherlast 17, 17, 1 + vcipherlast 18, 18, 1 + vcipherlast 19, 19, 1 + vcipherlast 20, 20, 1 + vcipherlast 21, 21, 1 + vcipherlast 22, 22, 1 + + lxvb16x 32+23, 0, 14 # load block + lxvb16x 32+24, 15, 14 # load block + lxvb16x 32+25, 16, 14 # load block + lxvb16x 32+26, 17, 14 # load block + lxvb16x 32+27, 18, 14 # load block + lxvb16x 32+28, 19, 14 # load block + lxvb16x 32+29, 20, 14 # load block + lxvb16x 32+30, 21, 14 # load block + addi 14, 14, 128 + + vxor 15, 15, 23 + vxor 16, 16, 24 + vxor 17, 17, 25 + vxor 18, 18, 26 + vxor 19, 19, 27 + vxor 20, 20, 28 + vxor 21, 21, 29 + vxor 22, 22, 30 + + stxvb16x 47, 0, 9 # store output + stxvb16x 48, 15, 9 # store output + stxvb16x 49, 16, 9 # store output + stxvb16x 50, 17, 9 # store output + stxvb16x 51, 18, 9 # store output + stxvb16x 52, 19, 9 # store output + stxvb16x 53, 20, 9 # store output + stxvb16x 54, 21, 9 # store output + addi 9, 9, 128 + + vxor 15, 15, 0 + PPC_GHASH4x 0, 15, 16, 17, 18 + + vxor 19, 19, 0 + PPC_GHASH4x 0, 19, 20, 21, 22 + + xxlor 30+32, 9, 9 # last ctr + vadduwm 30, 30, 31 # increase ctr + stxvb16x 32+30, 0, 7 # update IV + stxvb16x 32+0, 0, 8 # update Xi + + addi 5, 5, -128 + addi 11, 11, 128 - # 192 bits - xxlor 24+32, 11, 11 + # + # Done 8x blocks + # - vcipher 15, 15, 23 - vcipher 15, 15, 24 + cmpdi 5, 0 + beq aes_gcm_out - xxlor 23+32, 12, 12 +__Process_more_enc: + li 24, 1 # encrypt + bl aes_gcm_crypt_1x + cmpdi 5, 0 + beq aes_gcm_out - cmpdi 10, 12 - beq Do_next_1x_dec + bl __Process_partial + cmpdi 5, 0 + beq aes_gcm_out +__Do_combine_enc: + bl __Combine_partial + cmpdi 5, 0 + bgt __Process_encrypt + b aes_gcm_out - # 256 bits - xxlor 24+32, 13, 13 +SYM_FUNC_END(aes_p10_gcm_encrypt) - vcipher 15, 15, 23 - vcipher 15, 15, 24 +################################################################################ +# aes_p10_gcm_decrypt (const void *inp, void *out, size_t len, +# const char *rk, unsigned char iv[16], void *Xip); +# 8x Decrypt +# +################################################################################ +SYM_FUNC_START(aes_p10_gcm_decrypt) - xxlor 23+32, 14, 14 + cmpdi 5, 0 + ble __Invalid_msg_len - cmpdi 10, 14 - beq Do_next_1x_dec + SAVE_REGS + LOAD_HASH_TABLE -Do_next_1x_dec: - vcipherlast 15, 15, 23 + # initialize ICB: GHASH( IV ), IV - r7 + lxvb16x 30+32, 0, 7 # load IV - v30 - xxlxor 47, 47, 15 - stxvb16x 47, 0, 9 # store output - addi 14, 14, 16 - addi 9, 9, 16 + mr 14, 3 + mr 9, 4 - xxlor 28+32, 15, 15 - #vmr 28, 15 - ppc_update_hash_1x + # counter 1 + vxor 31, 31, 31 + vspltisb 22, 1 + vsldoi 31, 31, 22,1 # counter 1 - addi 12, 12, -16 - addi 11, 11, 16 - xxlor 19+32, 0, 0 - vaddudm 30, 30, 31 # IV + counter - vxor 15, 30, 19 # add round key + addis 11, 2, permx@toc@ha + addi 11, 11, permx@toc@l + lxv 10, 0(11) # vs10: vpermxor vector + li 11, 0 + + # load 9 round keys to VSR + lxv 0, 0(6) # round key 0 + lxv 1, 16(6) # round key 1 + lxv 2, 32(6) # round key 2 + lxv 3, 48(6) # round key 3 + lxv 4, 64(6) # round key 4 + lxv 5, 80(6) # round key 5 + lxv 6, 96(6) # round key 6 + lxv 7, 112(6) # round key 7 + lxv 8, 128(6) # round key 8 - bdnz Next_rem_block_dec + # load rounds - 10 (128), 12 (192), 14 (256) + lwz 23, 240(6) # n rounds + li 24, 0 # decrypt - li 15, 0 - std 15, 56(7) # clear partial? - stxvb16x 30+32, 0, 7 # update IV +__Process_decrypt: + # + # Process different blocks + # + ld 12, 56(7) cmpdi 12, 0 - beq aes_gcm_out - -Final_block_dec: - lwz 10, 240(6) - Loop_aes_middle_1x - - xxlor 23+32, 10, 10 - - cmpdi 10, 10 - beq Do_final_1x_dec + bgt __Do_combine_dec + cmpdi 5, 128 + blt __Process_more_dec + +# +# Process 8x AES/GCM blocks +# +__Process_8x_dec: + # 8x blcoks + li 10, 128 + divdu 12, 5, 10 # n 128 bytes-blocks - # 192 bits - xxlor 24+32, 11, 11 + addi 12, 12, -1 # loop - 1 - vcipher 15, 15, 23 - vcipher 15, 15, 24 + vmr 15, 30 # first state: IV + vadduwm 16, 15, 31 # state + counter + vadduwm 17, 16, 31 + vadduwm 18, 17, 31 + vadduwm 19, 18, 31 + vadduwm 20, 19, 31 + vadduwm 21, 20, 31 + vadduwm 22, 21, 31 + xxlor 9, 32+22, 32+22 # save last state - xxlor 23+32, 12, 12 + # vxor state, state, w # addroundkey + xxlor 32+29, 0, 0 + vxor 15, 15, 29 # IV + round key - add round key 0 + vxor 16, 16, 29 + vxor 17, 17, 29 + vxor 18, 18, 29 + vxor 19, 19, 29 + vxor 20, 20, 29 + vxor 21, 21, 29 + vxor 22, 22, 29 - cmpdi 10, 12 - beq Do_final_1x_dec + li 15, 16 + li 16, 32 + li 17, 48 + li 18, 64 + li 19, 80 + li 20, 96 + li 21, 112 - # 256 bits - xxlor 24+32, 13, 13 + # + # Pre-compute first 8 AES state and leave 1/3/5 more rounds + # for the loop. + # + addi 22, 23, -9 # process 8 keys + mtctr 22 # AES key loop + addi 10, 6, 144 - vcipher 15, 15, 23 - vcipher 15, 15, 24 + LOOP_8AES_STATE # process 8 AES keys - xxlor 23+32, 14, 14 +__PreLoop_aes_state_dec: + lxv 32+1, 0(10) # round key + AES_CIPHER_8x vcipher 15 1 + addi 10, 10, 16 + bdnz __PreLoop_aes_state_dec + lxv 32+1, 0(10) # last round key (v1) + + cmpdi 12, 0 # Only one loop (8 block) + beq __Finish_ghash_dec + +# +# Loop 8x blocks and compute ghash +# +__Loop_8x_block_dec: + vcipherlast 15, 15, 1 + vcipherlast 16, 16, 1 + vcipherlast 17, 17, 1 + vcipherlast 18, 18, 1 + vcipherlast 19, 19, 1 + vcipherlast 20, 20, 1 + vcipherlast 21, 21, 1 + vcipherlast 22, 22, 1 + + lxvb16x 32+23, 0, 14 # load block + lxvb16x 32+24, 15, 14 # load block + lxvb16x 32+25, 16, 14 # load block + lxvb16x 32+26, 17, 14 # load block + lxvb16x 32+27, 18, 14 # load block + lxvb16x 32+28, 19, 14 # load block + lxvb16x 32+29, 20, 14 # load block + lxvb16x 32+30, 21, 14 # load block + addi 14, 14, 128 + + vxor 15, 15, 23 + vxor 16, 16, 24 + vxor 17, 17, 25 + vxor 18, 18, 26 + vxor 19, 19, 27 + vxor 20, 20, 28 + vxor 21, 21, 29 + vxor 22, 22, 30 + + stxvb16x 47, 0, 9 # store output + stxvb16x 48, 15, 9 # store output + stxvb16x 49, 16, 9 # store output + stxvb16x 50, 17, 9 # store output + stxvb16x 51, 18, 9 # store output + stxvb16x 52, 19, 9 # store output + stxvb16x 53, 20, 9 # store output + stxvb16x 54, 21, 9 # store output + + addi 9, 9, 128 + + vmr 15, 23 + vmr 16, 24 + vmr 17, 25 + vmr 18, 26 + vmr 19, 27 + vmr 20, 28 + vmr 21, 29 + vmr 22, 30 - cmpdi 10, 14 - beq Do_final_1x_dec + # ghash here + vxor 15, 15, 0 + PPC_GHASH4x 0, 15, 16, 17, 18 + + vxor 19, 19, 0 + PPC_GHASH4x 0, 19, 20, 21, 22 + + xxlor 32+15, 9, 9 # last state + vadduwm 15, 15, 31 # state + counter + vadduwm 16, 15, 31 + vadduwm 17, 16, 31 + vadduwm 18, 17, 31 + vadduwm 19, 18, 31 + vadduwm 20, 19, 31 + vadduwm 21, 20, 31 + vadduwm 22, 21, 31 + xxlor 9, 32+22, 32+22 # save last state + + xxlor 32+27, 0, 0 # restore roundkey 0 + vxor 15, 15, 27 # IV + round key - add round key 0 + vxor 16, 16, 27 + vxor 17, 17, 27 + vxor 18, 18, 27 + vxor 19, 19, 27 + vxor 20, 20, 27 + vxor 21, 21, 27 + vxor 22, 22, 27 + + addi 5, 5, -128 + addi 11, 11, 128 -Do_final_1x_dec: - vcipherlast 15, 15, 23 + LOOP_8AES_STATE # process 8 AES keys + mtctr 22 # AES key loop + addi 10, 6, 144 +__LastLoop_aes_state_dec: + lxv 32+1, 0(10) # round key + AES_CIPHER_8x vcipher 15 1 + addi 10, 10, 16 + bdnz __LastLoop_aes_state_dec + lxv 32+1, 0(10) # last round key (v1) - # check partial block - li 21, 1 # decrypt - ld 15, 56(7) # partial? - cmpdi 15, 0 - beq Normal_block_dec - bl Do_partial_block + addi 12, 12, -1 cmpdi 12, 0 - ble aes_gcm_out - - b Continue_partial_check_dec + bne __Loop_8x_block_dec + +__Finish_ghash_dec: + vcipherlast 15, 15, 1 + vcipherlast 16, 16, 1 + vcipherlast 17, 17, 1 + vcipherlast 18, 18, 1 + vcipherlast 19, 19, 1 + vcipherlast 20, 20, 1 + vcipherlast 21, 21, 1 + vcipherlast 22, 22, 1 + + lxvb16x 32+23, 0, 14 # load block + lxvb16x 32+24, 15, 14 # load block + lxvb16x 32+25, 16, 14 # load block + lxvb16x 32+26, 17, 14 # load block + lxvb16x 32+27, 18, 14 # load block + lxvb16x 32+28, 19, 14 # load block + lxvb16x 32+29, 20, 14 # load block + lxvb16x 32+30, 21, 14 # load block + addi 14, 14, 128 + + vxor 15, 15, 23 + vxor 16, 16, 24 + vxor 17, 17, 25 + vxor 18, 18, 26 + vxor 19, 19, 27 + vxor 20, 20, 28 + vxor 21, 21, 29 + vxor 22, 22, 30 + + stxvb16x 47, 0, 9 # store output + stxvb16x 48, 15, 9 # store output + stxvb16x 49, 16, 9 # store output + stxvb16x 50, 17, 9 # store output + stxvb16x 51, 18, 9 # store output + stxvb16x 52, 19, 9 # store output + stxvb16x 53, 20, 9 # store output + stxvb16x 54, 21, 9 # store output + addi 9, 9, 128 + + #vmr 15, 23 + vxor 15, 23, 0 + vmr 16, 24 + vmr 17, 25 + vmr 18, 26 + vmr 19, 27 + vmr 20, 28 + vmr 21, 29 + vmr 22, 30 + + #vxor 15, 15, 0 + PPC_GHASH4x 0, 15, 16, 17, 18 + + vxor 19, 19, 0 + PPC_GHASH4x 0, 19, 20, 21, 22 + + xxlor 30+32, 9, 9 # last ctr + vadduwm 30, 30, 31 # increase ctr + stxvb16x 32+30, 0, 7 # update IV + stxvb16x 32+0, 0, 8 # update Xi + + addi 5, 5, -128 + addi 11, 11, 128 -Normal_block_dec: - lxvb16x 15, 0, 14 # load last block - xxlxor 47, 47, 15 + # + # Done 8x blocks + # - # create partial block mask - li 15, 16 - sub 15, 15, 12 # index to the mask + cmpdi 5, 0 + beq aes_gcm_out - vspltisb 16, -1 # first 16 bytes - 0xffff...ff - vspltisb 17, 0 # second 16 bytes - 0x0000...00 - li 10, 192 - stvx 16, 10, 1 - addi 10, 10, 16 - stvx 17, 10, 1 +__Process_more_dec: + li 24, 0 # decrypt + bl aes_gcm_crypt_1x + cmpdi 5, 0 + beq aes_gcm_out - addi 10, 1, 192 - lxvb16x 16, 15, 10 # load partial block mask - xxland 47, 47, 16 + bl __Process_partial + cmpdi 5, 0 + beq aes_gcm_out +__Do_combine_dec: + bl __Combine_partial + cmpdi 5, 0 + bgt __Process_decrypt + b aes_gcm_out +SYM_FUNC_END(aes_p10_gcm_decrypt) - xxland 32+28, 15, 16 - #vmr 28, 15 - ppc_update_hash_1x +SYM_FUNC_START_LOCAL(aes_gcm_out) - # * should store only the remaining bytes. - bl Write_partial_block + mr 3, 11 # return count - stxvb16x 30+32, 0, 7 # update IV - std 12, 56(7) # update partial? - li 16, 16 + RESTORE_REGS + blr - stxvb16x 32, 0, 8 # write out Xi - stxvb16x 32, 16, 8 # write out Xi - b aes_gcm_out +__Invalid_msg_len: + li 3, 0 + blr +SYM_FUNC_END(aes_gcm_out) + +SYM_DATA_START_LOCAL(PERMX) +.align 4 +# for vector permute and xor +permx: +.long 0x4c5d6e7f, 0x08192a3b, 0xc4d5e6f7, 0x8091a2b3 +SYM_DATA_END(permx) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 5eb7631355a1..db481b336bca 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -35,42 +35,21 @@ struct dyn_arch_ftrace { int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop -struct ftrace_regs { - struct pt_regs regs; -}; +#include <linux/ftrace_regs.h> static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { /* We clear regs.msr in ftrace_call */ - return fregs->regs.msr ? &fregs->regs : NULL; + return arch_ftrace_regs(fregs)->regs.msr ? &arch_ftrace_regs(fregs)->regs : NULL; } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) { - regs_set_return_ip(&fregs->regs, ip); + regs_set_return_ip(&arch_ftrace_regs(fregs)->regs, ip); } -static __always_inline unsigned long -ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs) -{ - return instruction_pointer(&fregs->regs); -} - -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&(fregs)->regs, n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(&(fregs)->regs) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&(fregs)->regs, ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&(fregs)->regs) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) - struct ftrace_ops; #define ftrace_graph_func ftrace_graph_func @@ -159,7 +138,7 @@ unsigned long ftrace_call_adjust(unsigned long addr); */ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { - struct pt_regs *regs = &fregs->regs; + struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs; regs->orig_gpr3 = addr; } diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 52e1b1d15ff6..fd92ac450169 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -970,18 +970,6 @@ static inline void * phys_to_virt(unsigned long address) #define phys_to_virt phys_to_virt /* - * Change "struct page" to physical address. - */ -static inline phys_addr_t page_to_phys(struct page *page) -{ - unsigned long pfn = page_to_pfn(page); - - WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && !pfn_valid(pfn)); - - return PFN_PHYS(pfn); -} - -/* * 32 bits still uses virt_to_bus() for its implementation of DMA * mappings se we have to keep it defined here. We also have some old * drivers (shame shame shame) that use bus_to_virt() and haven't been diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 4525a9c68260..dfe2e5ad3b21 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -21,7 +21,7 @@ #include <linux/percpu.h> #include <linux/module.h> #include <asm/probes.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #ifdef CONFIG_KPROBES #define __ARCH_WANT_KPROBES_INSN_SLOT diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 83d0a4fc5f75..af9a2628d1df 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -21,8 +21,7 @@ * page size. When using 64K pages however, whether we are really supporting * 64K pages in HW or not is irrelevant to those definitions. */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) +#include <vdso/page.h> #ifndef __ASSEMBLY__ #ifndef CONFIG_HUGETLB_PAGE @@ -42,13 +41,6 @@ extern unsigned int hpage_shift; #endif /* - * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we - * assign PAGE_MASK to a larger type it gets extended the way we want - * (i.e. with 1s in the high bits) - */ -#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) - -/* * KERNELBASE is the virtual address of the start of the kernel, it's often * the same as PAGE_OFFSET, but _might not be_. * diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 5995614e9062..af0f46e2373b 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -102,8 +102,8 @@ struct power_pmu { int __init register_power_pmu(struct power_pmu *pmu); struct pt_regs; -extern unsigned long perf_misc_flags(struct pt_regs *regs); -extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_arch_misc_flags(struct pt_regs *regs); +extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs); extern unsigned long int read_bhrb(int n); /* @@ -111,7 +111,7 @@ extern unsigned long int read_bhrb(int n); * if we have hardware PMU support. */ #ifdef CONFIG_PPC_PERF_CTRS -#define perf_misc_flags(regs) perf_misc_flags(regs) +#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs) #endif /* diff --git a/arch/powerpc/include/asm/systemcfg.h b/arch/powerpc/include/asm/systemcfg.h new file mode 100644 index 000000000000..2f9b1d6a5c98 --- /dev/null +++ b/arch/powerpc/include/asm/systemcfg.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _SYSTEMCFG_H +#define _SYSTEMCFG_H + +/* + * Copyright (C) 2002 Peter Bergner <bergner@vnet.ibm.com>, IBM + * Copyright (C) 2005 Benjamin Herrenschmidy <benh@kernel.crashing.org>, + * IBM Corp. + */ + +#ifdef CONFIG_PPC64 + +/* + * If the major version changes we are incompatible. + * Minor version changes are a hint. + */ +#define SYSTEMCFG_MAJOR 1 +#define SYSTEMCFG_MINOR 1 + +#include <linux/types.h> + +struct systemcfg { + __u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */ + struct { /* Systemcfg version numbers */ + __u32 major; /* Major number 0x10 */ + __u32 minor; /* Minor number 0x14 */ + } version; + + /* Note about the platform flags: it now only contains the lpar + * bit. The actual platform number is dead and buried + */ + __u32 platform; /* Platform flags 0x18 */ + __u32 processor; /* Processor type 0x1C */ + __u64 processorCount; /* # of physical processors 0x20 */ + __u64 physicalMemorySize; /* Size of real memory(B) 0x28 */ + __u64 tb_orig_stamp; /* (NU) Timebase at boot 0x30 */ + __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ + __u64 tb_to_xs; /* (NU) Inverse of TB to 2^20 0x40 */ + __u64 stamp_xsec; /* (NU) 0x48 */ + __u64 tb_update_count; /* (NU) Timebase atomicity ctr 0x50 */ + __u32 tz_minuteswest; /* (NU) Min. west of Greenwich 0x58 */ + __u32 tz_dsttime; /* (NU) Type of dst correction 0x5C */ + __u32 dcache_size; /* L1 d-cache size 0x60 */ + __u32 dcache_line_size; /* L1 d-cache line size 0x64 */ + __u32 icache_size; /* L1 i-cache size 0x68 */ + __u32 icache_line_size; /* L1 i-cache line size 0x6C */ +}; + +extern struct systemcfg *systemcfg; + +#endif /* CONFIG_PPC64 */ +#endif /* _SYSTEMCFG_H */ diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/text-patching.h index e7f14720f630..e7f14720f630 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/text-patching.h diff --git a/arch/powerpc/include/asm/vdso/vsyscall.h b/arch/powerpc/include/asm/vdso/vsyscall.h index 92f480d8cc6d..48560a119559 100644 --- a/arch/powerpc/include/asm/vdso/vsyscall.h +++ b/arch/powerpc/include/asm/vdso/vsyscall.h @@ -4,12 +4,8 @@ #ifndef __ASSEMBLY__ -#include <linux/timekeeper_internal.h> #include <asm/vdso_datapage.h> -/* - * Update the vDSO data page to keep in sync with kernel timekeeping. - */ static __always_inline struct vdso_data *__arch_get_k_vdso_data(void) { diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index 33069afccb3c..a202f5b63479 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -9,29 +9,6 @@ * IBM Corp. */ - -/* - * Note about this structure: - * - * This structure was historically called systemcfg and exposed to - * userland via /proc/ppc64/systemcfg. Unfortunately, this became an - * ABI issue as some proprietary software started relying on being able - * to mmap() it, thus we have to keep the base layout at least for a - * few kernel versions. - * - * However, since ppc32 doesn't suffer from this backward handicap, - * a simpler version of the data structure is used there with only the - * fields actually used by the vDSO. - * - */ - -/* - * If the major version changes we are incompatible. - * Minor version changes are a hint. - */ -#define SYSTEMCFG_MAJOR 1 -#define SYSTEMCFG_MINOR 1 - #ifndef __ASSEMBLY__ #include <linux/unistd.h> @@ -40,41 +17,10 @@ #define SYSCALL_MAP_SIZE ((NR_syscalls + 31) / 32) -/* - * So here is the ppc64 backward compatible version - */ - #ifdef CONFIG_PPC64 struct vdso_arch_data { - __u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */ - struct { /* Systemcfg version numbers */ - __u32 major; /* Major number 0x10 */ - __u32 minor; /* Minor number 0x14 */ - } version; - - /* Note about the platform flags: it now only contains the lpar - * bit. The actual platform number is dead and buried - */ - __u32 platform; /* Platform flags 0x18 */ - __u32 processor; /* Processor type 0x1C */ - __u64 processorCount; /* # of physical processors 0x20 */ - __u64 physicalMemorySize; /* Size of real memory(B) 0x28 */ - __u64 tb_orig_stamp; /* (NU) Timebase at boot 0x30 */ - __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ - __u64 tb_to_xs; /* (NU) Inverse of TB to 2^20 0x40 */ - __u64 stamp_xsec; /* (NU) 0x48 */ - __u64 tb_update_count; /* (NU) Timebase atomicity ctr 0x50 */ - __u32 tz_minuteswest; /* (NU) Min. west of Greenwich 0x58 */ - __u32 tz_dsttime; /* (NU) Type of dst correction 0x5C */ - __u32 dcache_size; /* L1 d-cache size 0x60 */ - __u32 dcache_line_size; /* L1 d-cache line size 0x64 */ - __u32 icache_size; /* L1 i-cache size 0x68 */ - __u32 icache_line_size; /* L1 i-cache line size 0x6C */ - - /* those additional ones don't have to be located anywhere - * special as they were not part of the original systemcfg - */ + __u64 tb_ticks_per_sec; /* Timebase tics / sec */ __u32 dcache_block_size; /* L1 d-cache block size */ __u32 icache_block_size; /* L1 i-cache block size */ __u32 dcache_log_block_size; /* L1 d-cache log block size */ @@ -89,11 +35,8 @@ struct vdso_arch_data { #else /* CONFIG_PPC64 */ -/* - * And here is the simpler 32 bits version - */ struct vdso_arch_data { - __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ + __u64 tb_ticks_per_sec; /* Timebase tics / sec */ __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ __u32 compat_syscall_map[0]; /* No compat syscalls on PPC32 */ struct vdso_rng_data rng_data; diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h index fcf721682a71..f2dc40e1c52a 100644 --- a/arch/powerpc/include/asm/vga.h +++ b/arch/powerpc/include/asm/vga.h @@ -40,11 +40,6 @@ static inline void scr_memsetw(u16 *s, u16 v, unsigned int n) memset16(s, cpu_to_le16(v), n / 2); } -#define VT_BUF_HAVE_MEMCPYW -#define VT_BUF_HAVE_MEMMOVEW -#define scr_memcpyw memcpy -#define scr_memmovew memmove - #endif /* !CONFIG_VGA_CONSOLE && !CONFIG_MDA_CONSOLE */ #ifdef __powerpc64__ diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 2086fa6cdc25..103b6605dd68 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,7 +13,7 @@ #include <linux/io.h> #include <linux/memblock.h> #include <linux/of.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/kdump.h> #include <asm/firmware.h> #include <linux/uio.h> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index af4263594eb2..1bee15c013e7 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -867,7 +867,7 @@ bool __init dt_cpu_ftrs_init(void *fdt) using_dt_cpu_ftrs = false; /* Setup and verify the FDT, if it fails we just bail */ - if (!early_init_dt_verify(fdt)) + if (!early_init_dt_verify(fdt, __pa(fdt))) return false; if (!of_scan_flat_dt(fdt_find_cpu_features, NULL)) diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index d4b8aff20815..247ab2acaccc 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -9,7 +9,7 @@ #include <linux/of_fdt.h> #include <asm/epapr_hcalls.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/machdep.h> #include <asm/inst.h> diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 811a7130505c..56c5ebe21b99 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -494,6 +494,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ bctr /* jump into table */ 152: mfdar r11 + mtdar r10 mtctr r11 /* restore ctr reg from DAR */ mfspr r11, SPRN_SPRG_THREAD stw r10, DAR(r11) diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index 5277cf582c16..2659e1ac8604 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -5,7 +5,7 @@ #include <linux/kernel.h> #include <linux/jump_label.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/inst.h> void arch_jump_label_transform(struct jump_entry *entry, diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7a8bc03a00af..5081334b7bd2 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -21,7 +21,7 @@ #include <asm/processor.h> #include <asm/machdep.h> #include <asm/debug.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <linux/slab.h> #include <asm/inst.h> diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index bf382c459e1f..c0d9f12cb441 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -21,7 +21,7 @@ #include <linux/slab.h> #include <linux/set_memory.h> #include <linux/execmem.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/sections.h> diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 816a63fd71fb..f930e3395a7f 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -18,7 +18,7 @@ #include <linux/bug.h> #include <linux/sort.h> #include <asm/setup.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> /* Count how many different relocations (different symbol, different addend) */ diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 6816e9967cab..45dac7b46aa3 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -17,7 +17,7 @@ #include <linux/kernel.h> #include <asm/module.h> #include <asm/firmware.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <linux/sort.h> #include <asm/setup.h> #include <asm/sections.h> diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index c0b351d61058..2e83702bf9ba 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -13,7 +13,7 @@ #include <asm/kprobes.h> #include <asm/ptrace.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> #include <asm/inst.h> diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index b109cd7b5d01..3816a2bf2b84 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -4,6 +4,7 @@ */ #include <linux/init.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/kernel.h> @@ -12,9 +13,10 @@ #include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/rtas.h> +#include <asm/systemcfg.h> #include <linux/uaccess.h> -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG static loff_t page_map_seek(struct file *file, loff_t off, int whence) { @@ -33,10 +35,9 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) return -EINVAL; - remap_pfn_range(vma, vma->vm_start, - __pa(pde_data(file_inode(file))) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot); - return 0; + return remap_pfn_range(vma, vma->vm_start, + __pa(pde_data(file_inode(file))) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot); } static const struct proc_ops page_map_proc_ops = { @@ -45,13 +46,35 @@ static const struct proc_ops page_map_proc_ops = { .proc_mmap = page_map_mmap, }; +static union { + struct systemcfg data; + u8 page[PAGE_SIZE]; +} systemcfg_data_store __page_aligned_data; +struct systemcfg *systemcfg = &systemcfg_data_store.data; static int __init proc_ppc64_init(void) { struct proc_dir_entry *pde; + strcpy((char *)systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); + systemcfg->version.major = SYSTEMCFG_MAJOR; + systemcfg->version.minor = SYSTEMCFG_MINOR; + systemcfg->processor = mfspr(SPRN_PVR); + /* + * Fake the old platform number for pSeries and add + * in LPAR bit if necessary + */ + systemcfg->platform = 0x100; + if (firmware_has_feature(FW_FEATURE_LPAR)) + systemcfg->platform |= 1; + systemcfg->physicalMemorySize = memblock_phys_mem_size(); + systemcfg->dcache_size = ppc64_caches.l1d.size; + systemcfg->dcache_line_size = ppc64_caches.l1d.line_size; + systemcfg->icache_size = ppc64_caches.l1i.size; + systemcfg->icache_line_size = ppc64_caches.l1i.line_size; + pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL, - &page_map_proc_ops, vdso_data); + &page_map_proc_ops, systemcfg); if (!pde) return 1; proc_set_size(pde, PAGE_SIZE); @@ -60,7 +83,7 @@ static int __init proc_ppc64_init(void) } __initcall(proc_ppc64_init); -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC64_PROC_SYSTEMCFG */ /* * Create the ppc64 and ppc64/rtas directories early. This allows us to diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ff61a3e7984c..7b739b9a91ab 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -54,7 +54,7 @@ #include <asm/firmware.h> #include <asm/hw_irq.h> #endif -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/exec.h> #include <asm/livepatch.h> #include <asm/cpu_has_feature.h> diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 47db1b1aef25..e0059842a1c6 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -791,7 +791,7 @@ void __init early_init_devtree(void *params) DBG(" -> early_init_devtree(%px)\n", params); /* Too early to BUG_ON(), do it by hand */ - if (!early_init_dt_verify(params)) + if (!early_init_dt_verify(params, __pa(params))) panic("BUG: Failed verifying flat device tree, bad version?"); of_scan_flat_dt(early_init_dt_scan_model, NULL); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index f7e86e09c49f..d31c9799cab2 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1390,21 +1390,14 @@ bool __ref rtas_busy_delay(int status) */ ms = clamp(ms, 1U, 1000U); /* - * The delay hint is an order-of-magnitude suggestion, not - * a minimum. It is fine, possibly even advantageous, for - * us to pause for less time than hinted. For small values, - * use usleep_range() to ensure we don't sleep much longer - * than actually needed. - * - * See Documentation/timers/timers-howto.rst for - * explanation of the threshold used here. In effect we use - * usleep_range() for 9900 and 9901, msleep() for - * 9902-9905. + * The delay hint is an order-of-magnitude suggestion, not a + * minimum. It is fine, possibly even advantageous, for us to + * pause for less time than hinted. To make sure pause time will + * not be way longer than requested independent of HZ + * configuration, use fsleep(). See fsleep() for details of + * used sleeping functions. */ - if (ms <= 20) - usleep_range(ms * 100, ms * 1000); - else - msleep(ms); + fsleep(ms * 1000); break; case RTAS_BUSY: ret = true; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 4856e1a5161c..fbb7ebd8aa08 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -14,7 +14,7 @@ #include <linux/debugfs.h> #include <asm/asm-prototypes.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/security_features.h> #include <asm/sections.h> #include <asm/setup.h> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index b6b01502e504..6fa179448c33 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -67,6 +67,7 @@ #include <asm/cpu_has_feature.h> #include <asm/kasan.h> #include <asm/mce.h> +#include <asm/systemcfg.h> #include "setup.h" @@ -560,7 +561,9 @@ void __init smp_setup_cpu_maps(void) out: of_node_put(dn); } - vdso_data->processorCount = num_present_cpus(); +#endif +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount = num_present_cpus(); #endif /* CONFIG_PPC64 */ /* Initialize CPU <=> thread mapping/ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index e515c1f7d8d3..75dbf3e0d9c4 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -40,7 +40,7 @@ #include <asm/time.h> #include <asm/serial.h> #include <asm/udbg.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cpu_has_feature.h> #include <asm/asm-prototypes.h> #include <asm/kdump.h> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 1edc7cd68c10..e67f3048611f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -60,7 +60,7 @@ #include <asm/xmon.h> #include <asm/udbg.h> #include <asm/kexec.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/opal.h> #include <asm/cputhreads.h> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 4ab9b8cee77a..5ac7084eebc0 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -61,6 +61,7 @@ #include <asm/ftrace.h> #include <asm/kup.h> #include <asm/fadump.h> +#include <asm/systemcfg.h> #include <trace/events/ipi.h> @@ -1186,8 +1187,8 @@ int generic_cpu_disable(void) return -EBUSY; set_cpu_online(cpu, false); -#ifdef CONFIG_PPC64 - vdso_data->processorCount--; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount--; #endif /* Update affinity of all IRQs previously aimed at this CPU */ irq_migrate_all_off_this_cpu(); @@ -1642,10 +1643,12 @@ void start_secondary(void *unused) secondary_cpu_time_init(); -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG if (system_state == SYSTEM_RUNNING) - vdso_data->processorCount++; + systemcfg->processorCount++; +#endif +#ifdef CONFIG_PPC64 vdso_getcpu_init(); #endif set_numa_node(numa_cpu_lookup_table[cpu]); diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c index 1502b7e439ca..7cfd0710e757 100644 --- a/arch/powerpc/kernel/static_call.c +++ b/arch/powerpc/kernel/static_call.c @@ -2,7 +2,7 @@ #include <linux/memory.h> #include <linux/static_call.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) { diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index ebae8415dfbb..d8b4ab78bef0 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -553,3 +553,7 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 0ff9f038e800..0727332ad86f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -71,11 +71,11 @@ #include <asm/vdso_datapage.h> #include <asm/firmware.h> #include <asm/mce.h> +#include <asm/systemcfg.h> /* powerpc clocksource/clockevent code */ #include <linux/clockchips.h> -#include <linux/timekeeper_internal.h> static u64 timebase_read(struct clocksource *); static struct clocksource clocksource_timebase = { @@ -951,6 +951,9 @@ void __init time_init(void) } vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; +#endif /* initialise and enable the large decrementer (if we have one) */ set_decrementer_max(); diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 051f3db14606..5ccd791761e8 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -23,7 +23,7 @@ #include <linux/list.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/syscall.h> #include <asm/inst.h> @@ -657,7 +657,7 @@ int __init ftrace_dyn_arch_init(void) void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - unsigned long sp = fregs->regs.gpr[1]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gpr[1]; int bit; if (unlikely(ftrace_graph_is_dead())) @@ -675,6 +675,6 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, ftrace_test_recursion_unlock(bit); out: - fregs->regs.link = parent_ip; + arch_ftrace_regs(fregs)->regs.link = parent_ip; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index 8a551dfca3d0..98787376eb87 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -23,7 +23,7 @@ #include <linux/list.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/syscall.h> #include <asm/inst.h> @@ -816,7 +816,7 @@ out: void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); + arch_ftrace_regs(fregs)->regs.link = __prepare_ftrace_return(parent_ip, ip, arch_ftrace_regs(fregs)->regs.gpr[1]); } #else unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 6166c1862c06..43379365ce1b 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -16,7 +16,6 @@ #include <linux/user.h> #include <linux/elf.h> #include <linux/security.h> -#include <linux/memblock.h> #include <linux/syscalls.h> #include <linux/time_namespace.h> #include <vdso/datapage.h> @@ -353,25 +352,6 @@ static struct page ** __init vdso_setup_pages(void *start, void *end) static int __init vdso_init(void) { #ifdef CONFIG_PPC64 - /* - * Fill up the "systemcfg" stuff for backward compatibility - */ - strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); - vdso_data->version.major = SYSTEMCFG_MAJOR; - vdso_data->version.minor = SYSTEMCFG_MINOR; - vdso_data->processor = mfspr(SPRN_PVR); - /* - * Fake the old platform number for pSeries and add - * in LPAR bit if necessary - */ - vdso_data->platform = 0x100; - if (firmware_has_feature(FW_FEATURE_LPAR)) - vdso_data->platform |= 1; - vdso_data->physicalMemorySize = memblock_phys_mem_size(); - vdso_data->dcache_size = ppc64_caches.l1d.size; - vdso_data->dcache_line_size = ppc64_caches.l1d.line_size; - vdso_data->icache_size = ppc64_caches.l1i.size; - vdso_data->icache_line_size = ppc64_caches.l1i.line_size; vdso_data->dcache_block_size = ppc64_caches.l1d.block_size; vdso_data->icache_block_size = ppc64_caches.l1i.block_size; vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 34c0adb9fdbf..742aa58a7c7e 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -115,10 +115,9 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, struct iommu_table_group *table_group; long i; struct kvmppc_spapr_tce_iommu_table *stit; - struct fd f; + CLASS(fd, f)(tablefd); - f = fdget(tablefd); - if (!fd_file(f)) + if (fd_empty(f)) return -EBADF; rcu_read_lock(); @@ -130,16 +129,12 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, } rcu_read_unlock(); - if (!found) { - fdput(f); + if (!found) return -EINVAL; - } table_group = iommu_group_get_iommudata(grp); - if (WARN_ON(!table_group)) { - fdput(f); + if (WARN_ON(!table_group)) return -EFAULT; - } for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { struct iommu_table *tbltmp = table_group->tables[i]; @@ -160,10 +155,8 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, break; } } - if (!tbl) { - fdput(f); + if (!tbl) return -EINVAL; - } rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { @@ -174,7 +167,6 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, /* stit is being destroyed */ iommu_tce_table_put(tbl); rcu_read_unlock(); - fdput(f); return -ENOTTY; } /* @@ -182,7 +174,6 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, * its KVM reference counter and can return. */ rcu_read_unlock(); - fdput(f); return 0; } rcu_read_unlock(); @@ -190,7 +181,6 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, stit = kzalloc(sizeof(*stit), GFP_KERNEL); if (!stit) { iommu_tce_table_put(tbl); - fdput(f); return -ENOMEM; } @@ -199,7 +189,6 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, list_add_rcu(&stit->next, &stt->iommu_tables); - fdput(f); return 0; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index d8938fdf1936..25429905ae90 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4950,6 +4950,18 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, BOOK3S_INTERRUPT_EXTERNAL, 0); else lpcr |= LPCR_MER; + } else { + /* + * L1's copy of L2's LPCR (vcpu->arch.vcore->lpcr) can get its MER bit + * unexpectedly set - for e.g. during NMI handling when all register + * states are synchronized from L0 to L1. L1 needs to inform L0 about + * MER=1 only when there are pending external interrupts. + * In the above if check, MER bit is set if there are pending + * external interrupts. Hence, explicity mask off MER bit + * here as otherwise it may generate spurious interrupts in L2 KVM + * causing an endless loop, which results in L2 guest getting hung. + */ + lpcr &= ~LPCR_MER; } } else if (vcpu->arch.pending_exceptions || xive_interrupt_pending(vcpu)) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index f14329989e9a..b3b37ea77849 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1933,12 +1933,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, #endif #ifdef CONFIG_KVM_MPIC case KVM_CAP_IRQ_MPIC: { - struct fd f; + CLASS(fd, f)(cap->args[0]); struct kvm_device *dev; r = -EBADF; - f = fdget(cap->args[0]); - if (!fd_file(f)) + if (fd_empty(f)) break; r = -EPERM; @@ -1946,18 +1945,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, if (dev) r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]); - fdput(f); break; } #endif #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: { - struct fd f; + CLASS(fd, f)(cap->args[0]); struct kvm_device *dev; r = -EBADF; - f = fdget(cap->args[0]); - if (!fd_file(f)) + if (fd_empty(f)) break; r = -EPERM; @@ -1968,34 +1965,27 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, else r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); } - - fdput(f); break; } #endif /* CONFIG_KVM_XICS */ #ifdef CONFIG_KVM_XIVE case KVM_CAP_PPC_IRQ_XIVE: { - struct fd f; + CLASS(fd, f)(cap->args[0]); struct kvm_device *dev; r = -EBADF; - f = fdget(cap->args[0]); - if (!fd_file(f)) + if (fd_empty(f)) break; r = -ENXIO; - if (!xive_enabled()) { - fdput(f); + if (!xive_enabled()) break; - } r = -EPERM; dev = kvm_device_from_filp(fd_file(f)); if (dev) r = kvmppc_xive_native_connect_vcpu(dev, vcpu, cap->args[1]); - - fdput(f); break; } #endif /* CONFIG_KVM_XIVE */ diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index acdab294b340..af97fbb3c257 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -17,7 +17,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/page.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/inst.h> static int __patch_mem(void *exec_addr, unsigned long val, void *patch_addr, bool is_dword) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index b7201ba50b2e..587c8cf1230f 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -16,7 +16,7 @@ #include <linux/sched/mm.h> #include <linux/stop_machine.h> #include <asm/cputable.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/interrupt.h> #include <asm/page.h> #include <asm/sections.h> diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c index 8cd3b32f805b..1440d99630b3 100644 --- a/arch/powerpc/lib/test-code-patching.c +++ b/arch/powerpc/lib/test-code-patching.c @@ -6,7 +6,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr) { diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 23c7805fb7b3..66b5b4fa1686 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -11,7 +11,7 @@ #include <asm/cpu_has_feature.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/inst.h> #define MAX_SUBTESTS 16 diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 2db167f4233f..6978344edcb4 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -25,7 +25,7 @@ #include <asm/mmu.h> #include <asm/machdep.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/sections.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index db87c2cc2fb6..c8b4fa71d4a7 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -58,7 +58,7 @@ #include <asm/sections.h> #include <asm/copro.h> #include <asm/udbg.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/fadump.h> #include <asm/firmware.h> #include <asm/tm.h> diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index f2708c8629a5..6b783552403c 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -24,7 +24,7 @@ #include <linux/pgtable.h> #include <asm/udbg.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include "internal.h" diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c index 87307d0fc3b8..bc9a39821d1c 100644 --- a/arch/powerpc/mm/book3s64/slice.c +++ b/arch/powerpc/mm/book3s64/slice.c @@ -633,6 +633,20 @@ return_addr: } EXPORT_SYMBOL_GPL(slice_get_unmapped_area); +#ifdef CONFIG_HUGETLB_PAGE +static int file_to_psize(struct file *file) +{ + struct hstate *hstate = hstate_file(file); + + return shift_to_mmu_psize(huge_page_shift(hstate)); +} +#else +static int file_to_psize(struct file *file) +{ + return 0; +} +#endif + unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, @@ -640,11 +654,17 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long flags, vm_flags_t vm_flags) { + unsigned int psize; + if (radix_enabled()) return generic_get_unmapped_area(filp, addr, len, pgoff, flags, vm_flags); - return slice_get_unmapped_area(addr, len, flags, - mm_ctx_user_psize(¤t->mm->context), 0); + if (filp && is_file_hugepages(filp)) + psize = file_to_psize(filp); + else + psize = mm_ctx_user_psize(¤t->mm->context); + + return slice_get_unmapped_area(addr, len, flags, psize, 0); } unsigned long arch_get_unmapped_area_topdown(struct file *filp, @@ -654,11 +674,17 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, const unsigned long flags, vm_flags_t vm_flags) { + unsigned int psize; + if (radix_enabled()) return generic_get_unmapped_area_topdown(filp, addr0, len, pgoff, flags, vm_flags); - return slice_get_unmapped_area(addr0, len, flags, - mm_ctx_user_psize(¤t->mm->context), 1); + if (filp && is_file_hugepages(filp)) + psize = file_to_psize(filp); + else + psize = mm_ctx_user_psize(¤t->mm->context); + + return slice_get_unmapped_area(addr0, len, flags, psize, 1); } unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr) @@ -788,20 +814,4 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) return 1UL << mmu_psize_to_shift(get_slice_psize(vma->vm_mm, vma->vm_start)); } - -static int file_to_psize(struct file *file) -{ - struct hstate *hstate = hstate_file(file); - return shift_to_mmu_psize(huge_page_shift(hstate)); -} - -unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - if (radix_enabled()) - return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags); - - return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 1); -} #endif diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index aa9aa11927b2..03666d790a53 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -7,7 +7,7 @@ #include <linux/memblock.h> #include <linux/sched/task.h> #include <asm/pgalloc.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <mm/mmu_decl.h> static pgprot_t __init kasan_prot_ro(void) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1221c561b43a..c7708c8fad29 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -26,7 +26,7 @@ #include <asm/svm.h> #include <asm/mmzone.h> #include <asm/ftrace.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/setup.h> #include <asm/fixmap.h> diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c index 1beae802bb1c..6d10c6d8be71 100644 --- a/arch/powerpc/mm/nohash/44x.c +++ b/arch/powerpc/mm/nohash/44x.c @@ -24,7 +24,7 @@ #include <asm/mmu.h> #include <asm/page.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/smp.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index ad2a7c26f2a0..062e8785c1bb 100644 --- a/arch/powerpc/mm/nohash/book3e_pgtable.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -10,7 +10,7 @@ #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/dma.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index b653a7be4cb1..0a650742f3a0 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -37,7 +37,7 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cputhreads.h> #include <asm/hugetlb.h> #include <asm/paca.h> diff --git a/arch/powerpc/mm/nohash/tlb_64e.c b/arch/powerpc/mm/nohash/tlb_64e.c index d26656b07b72..4f925adf2695 100644 --- a/arch/powerpc/mm/nohash/tlb_64e.c +++ b/arch/powerpc/mm/nohash/tlb_64e.c @@ -24,7 +24,7 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cputhreads.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 7316396e452d..61df5aed7989 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -398,7 +398,7 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) */ if (pmd_none(*pmd)) return; - pte = pte_offset_map_nolock(mm, pmd, addr, &ptl); + pte = pte_offset_map_ro_nolock(mm, pmd, addr, &ptl); BUG_ON(!pte); assert_spin_locked(ptl); pte_unmap(pte); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 28e2fd8b7900..2991bb171a9b 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -18,7 +18,7 @@ #include <linux/bpf.h> #include <asm/kprobes.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include "bpf_jit.h" diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index 308a2e40d7be..1d2972229e3a 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -14,7 +14,7 @@ #include <asm/machdep.h> #include <asm/firmware.h> #include <asm/ptrace.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/inst.h> #define PERF_8xx_ID_CPU_CYCLES 1 diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 6b4434dd0ff3..26aa26482c9a 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -51,7 +51,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re lr = regs->link; sp = regs->gpr[1]; - perf_callchain_store(entry, perf_instruction_pointer(regs)); + perf_callchain_store(entry, perf_arch_instruction_pointer(regs)); if (!validate_sp(sp, current)) return; diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index ea8cfe3806dc..ddcc2d8aa64a 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -139,7 +139,7 @@ void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, long level = 0; unsigned int __user *fp, *uregs; - next_ip = perf_instruction_pointer(regs); + next_ip = perf_arch_instruction_pointer(regs); lr = regs->link; sp = regs->gpr[1]; perf_callchain_store(entry, next_ip); diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 488e8a21a11e..115d1c105e8a 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -74,7 +74,7 @@ void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, struct signal_frame_64 __user *sigframe; unsigned long __user *fp, *uregs; - next_ip = perf_instruction_pointer(regs); + next_ip = perf_arch_instruction_pointer(regs); lr = regs->link; sp = regs->gpr[1]; perf_callchain_store(entry, next_ip); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 42867469752d..2b79171ee185 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -16,7 +16,7 @@ #include <asm/machdep.h> #include <asm/firmware.h> #include <asm/ptrace.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/hw_irq.h> #include <asm/interrupt.h> @@ -2332,7 +2332,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * Called from generic code to get the misc flags (i.e. processor mode) * for an event_id. */ -unsigned long perf_misc_flags(struct pt_regs *regs) +unsigned long perf_arch_misc_flags(struct pt_regs *regs) { u32 flags = perf_get_misc_flags(regs); @@ -2346,7 +2346,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs) * Called from generic code to get the instruction pointer * for an event_id. */ -unsigned long perf_instruction_pointer(struct pt_regs *regs) +unsigned long perf_arch_instruction_pointer(struct pt_regs *regs) { unsigned long siar = mfspr(SPRN_SIAR); diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index e52b848b64b7..32fa5fb557c0 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -23,7 +23,7 @@ #include <asm/mpic.h> #include <asm/cacheflush.h> #include <asm/dbell.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cputhreads.h> #include <asm/fsl_pm.h> diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index 8a7e55acf090..9be33e41af6d 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -12,7 +12,7 @@ #include <linux/delay.h> #include <linux/pgtable.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/page.h> #include <asm/pci-bridge.h> #include <asm/mpic.h> diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 28dc86744cac..d243f7fd8982 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -112,7 +112,7 @@ static void axon_msi_cascade(struct irq_desc *desc) pr_devel("axon_msi: woff %x roff %x msi %x\n", write_offset, msic->read_offset, msi); - if (msi < nr_irqs && irq_get_chip_data(msi) == msic) { + if (msi < irq_get_nr_irqs() && irq_get_chip_data(msi) == msic) { generic_handle_irq(msi); msic->fifo_virt[idx] = cpu_to_le32(0xffffffff); } else { diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index fee638fd8970..0e8f20ecca08 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -35,7 +35,7 @@ #include <asm/firmware.h> #include <asm/rtas.h> #include <asm/cputhreads.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include "interrupt.h" #include <asm/udbg.h> diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index cd7d42fc12a6..000894e07b02 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -36,6 +36,9 @@ static inline struct spufs_calls *spufs_calls_get(void) static inline void spufs_calls_put(struct spufs_calls *calls) { + if (!calls) + return; + BUG_ON(calls != spufs_calls); /* we don't need to rcu this, as we hold a reference to the module */ @@ -53,82 +56,55 @@ static inline void spufs_calls_put(struct spufs_calls *calls) { } #endif /* CONFIG_SPU_FS_MODULE */ +DEFINE_CLASS(spufs_calls, struct spufs_calls *, spufs_calls_put(_T), spufs_calls_get(), void) + SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, umode_t, mode, int, neighbor_fd) { - long ret; - struct spufs_calls *calls; - - calls = spufs_calls_get(); + CLASS(spufs_calls, calls)(); if (!calls) return -ENOSYS; if (flags & SPU_CREATE_AFFINITY_SPU) { - struct fd neighbor = fdget(neighbor_fd); - ret = -EBADF; - if (fd_file(neighbor)) { - ret = calls->create_thread(name, flags, mode, fd_file(neighbor)); - fdput(neighbor); - } - } else - ret = calls->create_thread(name, flags, mode, NULL); - - spufs_calls_put(calls); - return ret; + CLASS(fd, neighbor)(neighbor_fd); + if (fd_empty(neighbor)) + return -EBADF; + return calls->create_thread(name, flags, mode, fd_file(neighbor)); + } else { + return calls->create_thread(name, flags, mode, NULL); + } } SYSCALL_DEFINE3(spu_run,int, fd, __u32 __user *, unpc, __u32 __user *, ustatus) { - long ret; - struct fd arg; - struct spufs_calls *calls; - - calls = spufs_calls_get(); + CLASS(spufs_calls, calls)(); if (!calls) return -ENOSYS; - ret = -EBADF; - arg = fdget(fd); - if (fd_file(arg)) { - ret = calls->spu_run(fd_file(arg), unpc, ustatus); - fdput(arg); - } + CLASS(fd, arg)(fd); + if (fd_empty(arg)) + return -EBADF; - spufs_calls_put(calls); - return ret; + return calls->spu_run(fd_file(arg), unpc, ustatus); } #ifdef CONFIG_COREDUMP int elf_coredump_extra_notes_size(void) { - struct spufs_calls *calls; - int ret; - - calls = spufs_calls_get(); + CLASS(spufs_calls, calls)(); if (!calls) return 0; - ret = calls->coredump_extra_notes_size(); - - spufs_calls_put(calls); - - return ret; + return calls->coredump_extra_notes_size(); } int elf_coredump_extra_notes_write(struct coredump_params *cprm) { - struct spufs_calls *calls; - int ret; - - calls = spufs_calls_get(); + CLASS(spufs_calls, calls)(); if (!calls) return 0; - ret = calls->coredump_extra_notes_write(cprm); - - spufs_calls_put(calls); - - return ret; + return calls->coredump_extra_notes_write(cprm); } #endif diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 18daafbe2e65..301ee7d8b7df 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -73,9 +73,7 @@ static struct spu_context *coredump_next_context(int *fd) return NULL; *fd = n - 1; - rcu_read_lock(); - file = lookup_fdget_rcu(*fd); - rcu_read_unlock(); + file = fget_raw(*fd); if (file) { ctx = SPUFS_I(file_inode(file))->i_ctx; get_spu_context(ctx); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index d21b681f52fb..09e7fe24fac1 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -35,7 +35,7 @@ #include <asm/ptrace.h> #include <linux/atomic.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/irq.h> #include <asm/page.h> #include <asm/sections.h> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index ad41dffe4d92..d98b933e4984 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -18,7 +18,7 @@ #include <asm/opal.h> #include <asm/cputhreads.h> #include <asm/cpuidle.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/smp.h> #include <asm/runlatch.h> #include <asm/dbell.h> diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index 56a1f7ce78d2..d92759c21fae 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -282,6 +282,7 @@ int __init opal_event_init(void) name, NULL); if (rc) { pr_warn("Error %d requesting OPAL irq %d\n", rc, (int)r->start); + kfree(name); continue; } } diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 8f14f0581a21..8f41ef364fc6 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -28,7 +28,7 @@ #include <asm/xive.h> #include <asm/opal.h> #include <asm/runlatch.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/dbell.h> #include <asm/kvm_ppc.h> #include <asm/ppc-opcode.h> @@ -36,6 +36,7 @@ #include <asm/kexec.h> #include <asm/reg.h> #include <asm/powernv.h> +#include <asm/systemcfg.h> #include "powernv.h" @@ -136,7 +137,9 @@ static int pnv_smp_cpu_disable(void) * the generic fixup_irqs. --BenH. */ set_cpu_online(cpu, false); - vdso_data->processorCount--; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount--; +#endif if (cpu == boot_cpuid) boot_cpuid = cpumask_any(cpu_online_mask); if (xive_enabled()) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 6838a0fcda29..bc6926dbf148 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -33,6 +33,7 @@ #include <asm/xive.h> #include <asm/plpar_wrappers.h> #include <asm/topology.h> +#include <asm/systemcfg.h> #include "pseries.h" @@ -83,7 +84,9 @@ static int pseries_cpu_disable(void) int cpu = smp_processor_id(); set_cpu_online(cpu, false); - vdso_data->processorCount--; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount--; +#endif /*fix boot_cpuid here*/ if (cpu == boot_cpuid) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index 62da20f9700a..cc22924f159f 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -29,7 +29,6 @@ #include <asm/firmware.h> #include <asm/rtas.h> #include <asm/time.h> -#include <asm/vdso_datapage.h> #include <asm/vio.h> #include <asm/mmu.h> #include <asm/machdep.h> @@ -530,7 +529,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL); if (lrdrp == NULL) { - partition_potential_processors = vdso_data->processorCount; + partition_potential_processors = num_possible_cpus(); } else { partition_potential_processors = be32_to_cpup(lrdrp + 4); } @@ -553,7 +552,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) } else { /* non SPLPAR case */ seq_printf(m, "system_active_processors=%d\n", - partition_potential_processors); + partition_active_processors); seq_printf(m, "system_potential_processors=%d\n", partition_potential_processors); diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 4a595493d28a..b1667ed05f98 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -683,7 +683,7 @@ void __init plpks_early_init_devtree(void) out: fdt_nop_property(fdt, chosen_node, "ibm,plpks-pw"); // Since we've cleared the password, we must update the FDT checksum - early_init_dt_verify(fdt); + early_init_dt_verify(fdt, __pa(fdt)); } static __init int pseries_plpks_init(void) diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index c597711ef20a..db99725e752b 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -39,7 +39,7 @@ #include <asm/xive.h> #include <asm/dbell.h> #include <asm/plpar_wrappers.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/svm.h> #include <asm/kvm_guest.h> diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index 10b8eb6bff39..c5d0f92c7969 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -8,6 +8,7 @@ #include <linux/mm.h> #include <linux/memblock.h> +#include <linux/mem_encrypt.h> #include <linux/cc_platform.h> #include <linux/mem_encrypt.h> #include <asm/machdep.h> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 22b8b5cc4df0..f4e841a36458 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -50,7 +50,7 @@ #include <asm/xive.h> #include <asm/opal.h> #include <asm/firmware.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/sections.h> #include <asm/inst.h> #include <asm/interrupt.h> |