262 files changed, 3224 insertions, 1718 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 6682b2a53e34..b8a4ff365582 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1526,6 +1526,14 @@ config HAVE_ARCH_COMPILER_H
 	  linux/compiler-*.h in order to override macro definitions that those
 	  headers generally provide.
 
+config HAVE_ARCH_LIBGCC_H
+	bool
+	help
+	  An architecture can select this if it provides an
+	  asm/libgcc.h header that should be included after
+	  linux/libgcc.h in order to override macro definitions that
+	  header generally provides.
+
 config HAVE_ARCH_PREL32_RELOCATIONS
 	bool
 	help
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 5b2488142041..4f2eeda907ec 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -6,6 +6,7 @@
 config ARC
 	def_bool y
 	select ARC_TIMERS
+	select ARCH_HAS_CPU_CACHE_ALIASING
 	select ARCH_HAS_CACHE_LINE_SIZE
 	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DMA_PREP_COHERENT
@@ -297,7 +298,6 @@ config ARC_PAGE_SIZE_16K
 config ARC_PAGE_SIZE_4K
 	bool "4KB"
 	select HAVE_PAGE_SIZE_4KB
-	depends on ARC_MMU_V3 || ARC_MMU_V4
 
 endchoice
 
@@ -474,7 +474,8 @@ config HIGHMEM
 
 config ARC_HAS_PAE40
 	bool "Support for the 40-bit Physical Address Extension"
-	depends on ISA_ARCV2
+	depends on ARC_MMU_V4
+	depends on !ARC_PAGE_SIZE_4K
 	select HIGHMEM
 	select PHYS_ADDR_T_64BIT
 	help
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 2390dd042e36..fb98478ed1ab 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -6,7 +6,7 @@
 KBUILD_DEFCONFIG := haps_hs_smp_defconfig
 
 ifeq ($(CROSS_COMPILE),)
-CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-)
+CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux- arc-linux-gnu-)
 endif
 
 cflags-y	+= -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
index 2a151607b080..88bcc7ab6f5a 100644
--- a/arch/arc/boot/dts/axc001.dtsi
+++ b/arch/arc/boot/dts/axc001.dtsi
@@ -54,7 +54,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <30>;
+				ngpios = <30>;
 				reg = <0>;
 				interrupt-controller;
 				#interrupt-cells = <2>;
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index c0a812674ce9..9a2dc39a5cff 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -62,7 +62,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <30>;
+				ngpios = <30>;
 				reg = <0>;
 				interrupt-controller;
 				#interrupt-cells = <2>;
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 67556f4b7057..f31382cb8be4 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -69,7 +69,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <30>;
+				ngpios = <30>;
 				reg = <0>;
 				interrupt-controller;
 				#interrupt-cells = <2>;
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index b64435385304..3add2fe257f8 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -250,7 +250,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <32>;
+				ngpios = <32>;
 				reg = <0>;
 			};
 
@@ -258,7 +258,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <8>;
+				ngpios = <8>;
 				reg = <1>;
 			};
 
@@ -266,7 +266,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <8>;
+				ngpios = <8>;
 				reg = <2>;
 			};
 		};
@@ -281,7 +281,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <30>;
+				ngpios = <30>;
 				reg = <0>;
 			};
 
@@ -289,7 +289,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <10>;
+				ngpios = <10>;
 				reg = <1>;
 			};
 
@@ -297,7 +297,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <8>;
+				ngpios = <8>;
 				reg = <2>;
 			};
 		};
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index 41b980df862b..98bb850722a4 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -308,7 +308,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <24>;
+				ngpios = <24>;
 				reg = <0>;
 			};
 		};
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 4b13f60fe7ca..005d9e4d187a 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -146,7 +146,7 @@
 
 #ifndef __ASSEMBLY__
 
-#include <soc/arc/aux.h>
+#include <soc/arc/arc_aux.h>
 
 /* Helpers */
 #define TO_KB(bytes)		((bytes) >> 10)
diff --git a/arch/arc/include/asm/cachetype.h b/arch/arc/include/asm/cachetype.h
new file mode 100644
index 000000000000..acd3b6cb4bf5
--- /dev/null
+++ b/arch/arc/include/asm/cachetype.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ARC_CACHETYPE_H
+#define __ASM_ARC_CACHETYPE_H
+
+#define cpu_dcache_is_aliasing()	false
+#define cpu_icache_is_aliasing()	true
+
+#endif
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index 58045c898340..76f43db0890f 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -48,7 +48,7 @@
 									\
 	switch(sizeof((_p_))) {						\
 	case 1:								\
-		_prev_ = (__typeof__(*(ptr)))cmpxchg_emu_u8((volatile u8 *)_p_, (uintptr_t)_o_, (uintptr_t)_n_);	\
+		_prev_ = (__typeof__(*(ptr)))cmpxchg_emu_u8((volatile u8 *__force)_p_, (uintptr_t)_o_, (uintptr_t)_n_);	\
 		break;							\
 	case 4:								\
 		_prev_ = __cmpxchg(_p_, _o_, _n_);			\
diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h
index d85dc0721907..41412642f279 100644
--- a/arch/arc/include/asm/mmu-arcv2.h
+++ b/arch/arc/include/asm/mmu-arcv2.h
@@ -9,7 +9,7 @@
 #ifndef _ASM_ARC_MMU_ARCV2_H
 #define _ASM_ARC_MMU_ARCV2_H
 
-#include <soc/arc/aux.h>
+#include <soc/arc/arc_aux.h>
 
 /*
  * TLB Management regs
diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c
index 4458e409ca0a..6d989b6d88c6 100644
--- a/arch/arc/net/bpf_jit_arcv2.c
+++ b/arch/arc/net/bpf_jit_arcv2.c
@@ -2916,7 +2916,7 @@ bool check_jmp_32(u32 curr_off, u32 targ_off, u8 cond)
 	addendum = (cond == ARC_CC_AL) ? 0 : INSN_len_normal;
 	disp = get_displacement(curr_off + addendum, targ_off);
 
-	if (ARC_CC_AL)
+	if (cond == ARC_CC_AL)
 		return is_valid_far_disp(disp);
 	else
 		return is_valid_near_disp(disp);
diff --git a/arch/arm/boot/dts/nxp/imx/imxrt1050.dtsi b/arch/arm/boot/dts/nxp/imx/imxrt1050.dtsi
index dd714d235d5f..b0bad0d1ba36 100644
--- a/arch/arm/boot/dts/nxp/imx/imxrt1050.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imxrt1050.dtsi
@@ -87,7 +87,7 @@
 			reg = <0x402c0000 0x4000>;
 			interrupts = <110>;
 			clocks = <&clks IMXRT1050_CLK_IPG_PDOF>,
-				<&clks IMXRT1050_CLK_OSC>,
+				<&clks IMXRT1050_CLK_AHB_PODF>,
 				<&clks IMXRT1050_CLK_USDHC1>;
 			clock-names = "ipg", "ahb", "per";
 			bus-width = <4>;
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index 06b0e5fd54a6..cb6ef449b987 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -516,7 +516,7 @@ static void locomo_remove(struct platform_device *dev)
  */
 static struct platform_driver locomo_device_driver = {
 	.probe		= locomo_probe,
-	.remove_new	= locomo_remove,
+	.remove		= locomo_remove,
 #ifdef CONFIG_PM
 	.suspend	= locomo_suspend,
 	.resume		= locomo_resume,
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 550978dc3c50..9846f30990f7 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -1154,7 +1154,7 @@ static struct dev_pm_ops sa1111_pm_ops = {
  */
 static struct platform_driver sa1111_device_driver = {
 	.probe		= sa1111_probe,
-	.remove_new	= sa1111_remove,
+	.remove		= sa1111_remove,
 	.driver		= {
 		.name	= "sa1111",
 		.pm	= &sa1111_pm_ops,
diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c
index 9018c7240166..0b08b6621878 100644
--- a/arch/arm/common/scoop.c
+++ b/arch/arm/common/scoop.c
@@ -250,7 +250,7 @@ static void scoop_remove(struct platform_device *pdev)
 
 static struct platform_driver scoop_driver = {
 	.probe		= scoop_probe,
-	.remove_new	= scoop_remove,
+	.remove		= scoop_remove,
 	.suspend	= scoop_suspend,
 	.resume		= scoop_resume,
 	.driver		= {
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 0beecdde55f5..f25eadcba5e6 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -323,6 +323,7 @@ CONFIG_SND_SOC_IMX_SGTL5000=y
 CONFIG_SND_SOC_FSL_ASOC_CARD=y
 CONFIG_SND_SOC_AC97_CODEC=y
 CONFIG_SND_SOC_CS42XX8_I2C=y
+CONFIG_SND_SOC_SPDIF=y
 CONFIG_SND_SOC_TLV320AIC3X_I2C=y
 CONFIG_SND_SOC_WM8960=y
 CONFIG_SND_SOC_WM8962=y
diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig
index e4fe059cd861..dc47b2312127 100644
--- a/arch/arm/mach-imx/Kconfig
+++ b/arch/arm/mach-imx/Kconfig
@@ -6,6 +6,7 @@ menuconfig ARCH_MXC
 	select CLKSRC_IMX_GPT
 	select GENERIC_IRQ_CHIP
 	select GPIOLIB
+	select PINCTRL
 	select PM_OPP if PM
 	select SOC_BUS
 	select SRAM
diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index b68cb86dbe4c..e898f7c2733e 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -596,7 +596,7 @@ static struct platform_driver imx_mmdc_driver = {
 		.of_match_table = imx_mmdc_dt_ids,
 	},
 	.probe		= imx_mmdc_probe,
-	.remove_new	= imx_mmdc_remove,
+	.remove		= imx_mmdc_remove,
 };
 
 static int __init imx_mmdc_init(void)
diff --git a/arch/arm/mach-omap1/omap-dma.c b/arch/arm/mach-omap1/omap-dma.c
index f091f78631d0..aebe5e55ff60 100644
--- a/arch/arm/mach-omap1/omap-dma.c
+++ b/arch/arm/mach-omap1/omap-dma.c
@@ -832,7 +832,7 @@ static void omap_system_dma_remove(struct platform_device *pdev)
 
 static struct platform_driver omap_system_dma_driver = {
 	.probe		= omap_system_dma_probe,
-	.remove_new	= omap_system_dma_remove,
+	.remove		= omap_system_dma_remove,
 	.driver		= {
 		.name	= "omap_dma_system"
 	},
diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c
index 72fa2e3fd353..0c8d9000df5a 100644
--- a/arch/arm/mach-pxa/sharpsl_pm.c
+++ b/arch/arm/mach-pxa/sharpsl_pm.c
@@ -919,7 +919,7 @@ static void sharpsl_pm_remove(struct platform_device *pdev)
 
 static struct platform_driver sharpsl_pm_driver = {
 	.probe		= sharpsl_pm_probe,
-	.remove_new	= sharpsl_pm_remove,
+	.remove		= sharpsl_pm_remove,
 	.suspend	= sharpsl_pm_suspend,
 	.resume		= sharpsl_pm_resume,
 	.driver		= {
diff --git a/arch/arm/mach-sa1100/jornada720_ssp.c b/arch/arm/mach-sa1100/jornada720_ssp.c
index 1956b095e699..d94810217095 100644
--- a/arch/arm/mach-sa1100/jornada720_ssp.c
+++ b/arch/arm/mach-sa1100/jornada720_ssp.c
@@ -188,7 +188,7 @@ static void jornada_ssp_remove(struct platform_device *dev)
 
 struct platform_driver jornadassp_driver = {
 	.probe	= jornada_ssp_probe,
-	.remove_new = jornada_ssp_remove,
+	.remove = jornada_ssp_remove,
 	.driver	= {
 		.name	= "jornada_ssp",
 	},
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index 0ef0ebbf31ac..88fe79f0a4ed 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -423,7 +423,7 @@ static const struct dev_pm_ops neponset_pm_ops = {
 
 static struct platform_driver neponset_device_driver = {
 	.probe		= neponset_probe,
-	.remove_new	= neponset_remove,
+	.remove		= neponset_remove,
 	.driver		= {
 		.name	= "neponset",
 		.pm	= PM_OPS,
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 100570a048c5..213f42d5ca27 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -113,7 +113,7 @@ config ARM64
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
 	select ARCH_WANT_LD_ORPHAN_WARN
-	select ARCH_WANTS_EXECMEM_LATE if EXECMEM
+	select ARCH_WANTS_EXECMEM_LATE
 	select ARCH_WANTS_NO_INSTR
 	select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
 	select ARCH_HAS_UBSAN
@@ -1379,7 +1379,6 @@ config ARM64_VA_BITS_48
 
 config ARM64_VA_BITS_52
 	bool "52-bit"
-	depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN
 	help
 	  Enable 52-bit virtual addressing for userspace when explicitly
 	  requested via a hint to mmap(). The kernel will also use 52-bit
@@ -1431,7 +1430,6 @@ config ARM64_PA_BITS_48
 config ARM64_PA_BITS_52
 	bool "52-bit"
 	depends on ARM64_64K_PAGES || ARM64_VA_BITS_52
-	depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN
 	help
 	  Enable support for a 52-bit physical address space, introduced as
 	  part of the ARMv8.2-LPA extension.
@@ -1681,6 +1679,7 @@ config RODATA_FULL_DEFAULT_ENABLED
 config ARM64_SW_TTBR0_PAN
 	bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
 	depends on !KCSAN
+	select ARM64_PAN
 	help
 	  Enabling this option prevents the kernel from accessing
 	  user-space memory directly by pointing TTBR0_EL1 to a reserved
@@ -1937,7 +1936,6 @@ config ARM64_RAS_EXTN
 config ARM64_CNP
 	bool "Enable support for Common Not Private (CNP) translations"
 	default y
-	depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN
 	help
 	  Common Not Private (CNP) allows translation table entries to
 	  be shared between different PEs in the same inner shareable
@@ -2132,7 +2130,7 @@ config ARM64_MTE
 	depends on AS_HAS_ARMV8_5
 	depends on AS_HAS_LSE_ATOMICS
 	# Required for tag checking in the uaccess routines
-	depends on ARM64_PAN
+	select ARM64_PAN
 	select ARCH_HAS_SUBPAGE_FAULTS
 	select ARCH_USES_HIGH_VMA_FLAGS
 	select ARCH_USES_PG_ARCH_2
diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts
index 19973ab4ea6b..9e10d7a6b5a2 100644
--- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts
+++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts
@@ -233,7 +233,7 @@
 		#interrupt-cells = <0x1>;
 		compatible = "pci-host-ecam-generic";
 		device_type = "pci";
-		bus-range = <0x0 0x1>;
+		bus-range = <0x0 0xff>;
 		reg = <0x0 0x40000000 0x0 0x10000000>;
 		ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>;
 		interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi
index 6e5a984c1d4e..26a29e5e5078 100644
--- a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi
+++ b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi
@@ -67,7 +67,7 @@
 			l2_cache_l0: l2-cache-l0 {
 				compatible = "cache";
 				cache-size = <0x80000>;
-				cache-line-size = <128>;
+				cache-line-size = <64>;
 				cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
 				cache-level = <2>;
 				cache-unified;
@@ -91,7 +91,7 @@
 			l2_cache_l1: l2-cache-l1 {
 				compatible = "cache";
 				cache-size = <0x80000>;
-				cache-line-size = <128>;
+				cache-line-size = <64>;
 				cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
 				cache-level = <2>;
 				cache-unified;
@@ -115,7 +115,7 @@
 			l2_cache_l2: l2-cache-l2 {
 				compatible = "cache";
 				cache-size = <0x80000>;
-				cache-line-size = <128>;
+				cache-line-size = <64>;
 				cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
 				cache-level = <2>;
 				cache-unified;
@@ -139,7 +139,7 @@
 			l2_cache_l3: l2-cache-l3 {
 				compatible = "cache";
 				cache-size = <0x80000>;
-				cache-line-size = <128>;
+				cache-line-size = <64>;
 				cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set
 				cache-level = <2>;
 				cache-unified;
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi
index a60ebb718789..c32a6947ae9c 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi
@@ -165,7 +165,7 @@ audio_subsys: bus@59000000 {
 	};
 
 	esai0: esai@59010000 {
-		compatible = "fsl,imx8qm-esai";
+		compatible = "fsl,imx8qm-esai", "fsl,imx6ull-esai";
 		reg = <0x59010000 0x10000>;
 		interrupts = <GIC_SPI 409 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&esai0_lpcg IMX_LPCG_CLK_4>,
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-audio.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-audio.dtsi
index e24e639b98ee..c9b55f02497a 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-ss-audio.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-audio.dtsi
@@ -134,7 +134,7 @@
 	};
 
 	esai1: esai@59810000 {
-		compatible = "fsl,imx8qm-esai";
+		compatible = "fsl,imx8qm-esai", "fsl,imx6ull-esai";
 		reg = <0x59810000 0x10000>;
 		interrupts = <GIC_SPI 411 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&esai1_lpcg IMX_LPCG_CLK_0>,
diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi
index d10f62eacfe0..e9c7a8265d71 100644
--- a/arch/arm64/boot/dts/freescale/imx95.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx95.dtsi
@@ -1673,7 +1673,7 @@
 
 		netcmix_blk_ctrl: syscon@4c810000 {
 			compatible = "nxp,imx95-netcmix-blk-ctrl", "syscon";
-			reg = <0x0 0x4c810000 0x0 0x10000>;
+			reg = <0x0 0x4c810000 0x0 0x8>;
 			#clock-cells = <1>;
 			clocks = <&scmi_clk IMX95_CLK_BUSNETCMIX>;
 			assigned-clocks = <&scmi_clk IMX95_CLK_BUSNETCMIX>;
diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi
index 9f315a51a7c1..9da62d7c4d27 100644
--- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi
+++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi
@@ -2440,6 +2440,7 @@
 
 			qcom,cmb-element-bits = <32>;
 			qcom,cmb-msrs-num = <32>;
+			status = "disabled";
 
 			out-ports {
 				port {
@@ -6092,7 +6093,7 @@
 		      <0x0 0x40000000 0x0 0xf20>,
 		      <0x0 0x40000f20 0x0 0xa8>,
 		      <0x0 0x40001000 0x0 0x4000>,
-		      <0x0 0x40200000 0x0 0x100000>,
+		      <0x0 0x40200000 0x0 0x1fe00000>,
 		      <0x0 0x01c03000 0x0 0x1000>,
 		      <0x0 0x40005000 0x0 0x2000>;
 		reg-names = "parf", "dbi", "elbi", "atu", "addr_space",
@@ -6250,7 +6251,7 @@
 		      <0x0 0x60000000 0x0 0xf20>,
 		      <0x0 0x60000f20 0x0 0xa8>,
 		      <0x0 0x60001000 0x0 0x4000>,
-		      <0x0 0x60200000 0x0 0x100000>,
+		      <0x0 0x60200000 0x0 0x1fe00000>,
 		      <0x0 0x01c13000 0x0 0x1000>,
 		      <0x0 0x60005000 0x0 0x2000>;
 		reg-names = "parf", "dbi", "elbi", "atu", "addr_space",
diff --git a/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts b/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts
index 975550139e10..66513fc8e67a 100644
--- a/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts
+++ b/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts
@@ -773,6 +773,10 @@
 	status = "okay";
 };
 
+&usb_1_ss0_dwc3 {
+	dr_mode = "host";
+};
+
 &usb_1_ss0_dwc3_hs {
 	remote-endpoint = <&pmic_glink_ss0_hs_in>;
 };
@@ -801,6 +805,10 @@
 	status = "okay";
 };
 
+&usb_1_ss1_dwc3 {
+	dr_mode = "host";
+};
+
 &usb_1_ss1_dwc3_hs {
 	remote-endpoint = <&pmic_glink_ss1_hs_in>;
 };
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
index 39f9d9cdc10d..d51a9bdcf67f 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
+++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts
@@ -1197,6 +1197,10 @@
 	status = "okay";
 };
 
+&usb_1_ss0_dwc3 {
+	dr_mode = "host";
+};
+
 &usb_1_ss0_dwc3_hs {
 	remote-endpoint = <&pmic_glink_ss0_hs_in>;
 };
@@ -1225,6 +1229,10 @@
 	status = "okay";
 };
 
+&usb_1_ss1_dwc3 {
+	dr_mode = "host";
+};
+
 &usb_1_ss1_dwc3_hs {
 	remote-endpoint = <&pmic_glink_ss1_hs_in>;
 };
@@ -1253,6 +1261,10 @@
 	status = "okay";
 };
 
+&usb_1_ss2_dwc3 {
+	dr_mode = "host";
+};
+
 &usb_1_ss2_dwc3_hs {
 	remote-endpoint = <&pmic_glink_ss2_hs_in>;
 };
diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
index 88805629ed2b..7e4f46ad8edd 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi
+++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
@@ -2924,7 +2924,7 @@
 			#address-cells = <3>;
 			#size-cells = <2>;
 			ranges = <0x01000000 0x0 0x00000000 0x0 0x70200000 0x0 0x100000>,
-				 <0x02000000 0x0 0x70300000 0x0 0x70300000 0x0 0x1d00000>;
+				 <0x02000000 0x0 0x70300000 0x0 0x70300000 0x0 0x3d00000>;
 			bus-range = <0x00 0xff>;
 
 			dma-coherent;
@@ -4066,8 +4066,6 @@
 
 				dma-coherent;
 
-				usb-role-switch;
-
 				ports {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -4321,8 +4319,6 @@
 
 				dma-coherent;
 
-				usb-role-switch;
-
 				ports {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -4421,8 +4417,6 @@
 
 				dma-coherent;
 
-				usb-role-switch;
-
 				ports {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 0597de415fe0..7d992c3c01ce 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -333,6 +333,7 @@
 
 			power-domain@RK3328_PD_HEVC {
 				reg = <RK3328_PD_HEVC>;
+				clocks = <&cru SCLK_VENC_CORE>;
 				#power-domain-cells = <0>;
 			};
 			power-domain@RK3328_PD_VIDEO {
diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi
index ecaefe208e3e..695cccbdab0f 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi
@@ -350,6 +350,7 @@
 		assigned-clocks = <&pmucru CLK_PCIEPHY0_REF>;
 		assigned-clock-rates = <100000000>;
 		resets = <&cru SRST_PIPEPHY0>;
+		reset-names = "phy";
 		rockchip,pipe-grf = <&pipegrf>;
 		rockchip,pipe-phy-grf = <&pipe_phy_grf0>;
 		#phy-cells = <1>;
diff --git a/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi b/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi
index 62be06f3b863..e55390629114 100644
--- a/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi
@@ -1681,6 +1681,7 @@
 		assigned-clocks = <&pmucru CLK_PCIEPHY1_REF>;
 		assigned-clock-rates = <100000000>;
 		resets = <&cru SRST_PIPEPHY1>;
+		reset-names = "phy";
 		rockchip,pipe-grf = <&pipegrf>;
 		rockchip,pipe-phy-grf = <&pipe_phy_grf1>;
 		#phy-cells = <1>;
@@ -1697,6 +1698,7 @@
 		assigned-clocks = <&pmucru CLK_PCIEPHY2_REF>;
 		assigned-clock-rates = <100000000>;
 		resets = <&cru SRST_PIPEPHY2>;
+		reset-names = "phy";
 		rockchip,pipe-grf = <&pipegrf>;
 		rockchip,pipe-phy-grf = <&pipe_phy_grf2>;
 		#phy-cells = <1>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index c44d001da169..d597112f1d5b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -72,7 +72,7 @@
 
 	rfkill {
 		compatible = "rfkill-gpio";
-		label = "rfkill-pcie-wlan";
+		label = "rfkill-m2-wlan";
 		radio-type = "wlan";
 		shutdown-gpios = <&gpio4 RK_PA2 GPIO_ACTIVE_HIGH>;
 	};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6.dtsi
index 76a6e8e517e9..c9749cb50076 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6.dtsi
@@ -434,6 +434,7 @@
 &sdmmc {
 	bus-width = <4>;
 	cap-sd-highspeed;
+	cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
 	disable-wp;
 	max-frequency = <150000000>;
 	no-mmc;
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index a523b519700f..a2b5d6f20f4d 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -18,7 +18,7 @@
 
 #include "aes-ce-setkey.h"
 
-MODULE_IMPORT_NS(CRYPTO_INTERNAL);
+MODULE_IMPORT_NS("CRYPTO_INTERNAL");
 
 static int num_rounds(struct crypto_aes_ctx *ctx)
 {
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index a147e847a5a1..b0150999743f 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -1048,7 +1048,7 @@ unregister_ciphers:
 
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 module_cpu_feature_match(AES, aes_init);
-EXPORT_SYMBOL_NS(ce_aes_mac_update, CRYPTO_INTERNAL);
+EXPORT_SYMBOL_NS(ce_aes_mac_update, "CRYPTO_INTERNAL");
 #else
 module_init(aes_init);
 EXPORT_SYMBOL(neon_aes_ecb_encrypt);
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 3d8d534a7a77..ad63457a05c5 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -343,6 +343,11 @@ alternative_cb_end
 	// Narrow PARange to fit the PS field in TCR_ELx
 	ubfx	\tmp0, \tmp0, #ID_AA64MMFR0_EL1_PARANGE_SHIFT, #3
 	mov	\tmp1, #ID_AA64MMFR0_EL1_PARANGE_MAX
+#ifdef CONFIG_ARM64_LPA2
+alternative_if_not ARM64_HAS_VA52
+	mov	\tmp1, #ID_AA64MMFR0_EL1_PARANGE_48
+alternative_else_nop_endif
+#endif
 	cmp	\tmp0, \tmp1
 	csel	\tmp0, \tmp1, \tmp0, hi
 	bfi	\tcr, \tmp0, \pos, #3
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 201a46efd918..0b5ca6e0eb09 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -44,6 +44,10 @@ cpucap_is_possible(const unsigned int cap)
 		return IS_ENABLED(CONFIG_ARM64_TLB_RANGE);
 	case ARM64_HAS_S1POE:
 		return IS_ENABLED(CONFIG_ARM64_POE);
+	case ARM64_HAS_GCS:
+		return IS_ENABLED(CONFIG_ARM64_GCS);
+	case ARM64_HAFT:
+		return IS_ENABLED(CONFIG_ARM64_HAFT);
 	case ARM64_UNMAP_KERNEL_AT_EL0:
 		return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0);
 	case ARM64_WORKAROUND_843419:
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index b64e49bd9d10..a4d0b77a68d9 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -847,14 +847,12 @@ static inline bool system_supports_poe(void)
 
 static inline bool system_supports_gcs(void)
 {
-	return IS_ENABLED(CONFIG_ARM64_GCS) &&
-		alternative_has_cap_unlikely(ARM64_HAS_GCS);
+	return alternative_has_cap_unlikely(ARM64_HAS_GCS);
 }
 
 static inline bool system_supports_haft(void)
 {
-	return IS_ENABLED(CONFIG_ARM64_HAFT) &&
-		cpus_have_final_cap(ARM64_HAFT);
+	return cpus_have_final_cap(ARM64_HAFT);
 }
 
 static __always_inline bool system_supports_mpam(void)
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 85ef966c08cd..25e162651750 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -87,7 +87,7 @@
 		      1 << PMSCR_EL2_PA_SHIFT)
 	msr_s	SYS_PMSCR_EL2, x0		// addresses and physical counter
 .Lskip_spe_el2_\@:
-	mov	x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
+	mov	x0, #MDCR_EL2_E2PB_MASK
 	orr	x2, x2, x0			// If we don't have VHE, then
 						// use EL1&0 translation.
 
@@ -100,7 +100,7 @@
 	and	x0, x0, TRBIDR_EL1_P
 	cbnz	x0, .Lskip_trace_\@		// If TRBE is available at EL2
 
-	mov	x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT)
+	mov	x0, #MDCR_EL2_E2TB_MASK
 	orr	x2, x2, x0			// allow the EL1&0 translation
 						// to own it.
 
@@ -154,7 +154,7 @@
 /* Coprocessor traps */
 .macro __init_el2_cptr
 	__check_hvhe .LnVHE_\@, x1
-	mov	x0, #CPACR_ELx_FPEN
+	mov	x0, #CPACR_EL1_FPEN
 	msr	cpacr_el1, x0
 	b	.Lskip_set_cptr_\@
 .LnVHE_\@:
@@ -332,7 +332,7 @@
 
 	// (h)VHE case
 	mrs	x0, cpacr_el1			// Disable SVE traps
-	orr	x0, x0, #CPACR_ELx_ZEN
+	orr	x0, x0, #CPACR_EL1_ZEN
 	msr	cpacr_el1, x0
 	b	.Lskip_set_cptr_\@
 
@@ -353,7 +353,7 @@
 
 	// (h)VHE case
 	mrs	x0, cpacr_el1			// Disable SME traps
-	orr	x0, x0, #CPACR_ELx_SMEN
+	orr	x0, x0, #CPACR_EL1_SMEN
 	msr	cpacr_el1, x0
 	b	.Lskip_set_cptr_sme_\@
 
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 2b6c61c608e2..1c3f9617d54f 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -93,6 +93,21 @@
 #define KERNEL_HWCAP_PACA		__khwcap_feature(PACA)
 #define KERNEL_HWCAP_PACG		__khwcap_feature(PACG)
 #define KERNEL_HWCAP_GCS		__khwcap_feature(GCS)
+#define KERNEL_HWCAP_CMPBR		__khwcap_feature(CMPBR)
+#define KERNEL_HWCAP_FPRCVT		__khwcap_feature(FPRCVT)
+#define KERNEL_HWCAP_F8MM8		__khwcap_feature(F8MM8)
+#define KERNEL_HWCAP_F8MM4		__khwcap_feature(F8MM4)
+#define KERNEL_HWCAP_SVE_F16MM		__khwcap_feature(SVE_F16MM)
+#define KERNEL_HWCAP_SVE_ELTPERM	__khwcap_feature(SVE_ELTPERM)
+#define KERNEL_HWCAP_SVE_AES2		__khwcap_feature(SVE_AES2)
+#define KERNEL_HWCAP_SVE_BFSCALE	__khwcap_feature(SVE_BFSCALE)
+#define KERNEL_HWCAP_SVE2P2		__khwcap_feature(SVE2P2)
+#define KERNEL_HWCAP_SME2P2		__khwcap_feature(SME2P2)
+#define KERNEL_HWCAP_SME_SBITPERM	__khwcap_feature(SME_SBITPERM)
+#define KERNEL_HWCAP_SME_AES		__khwcap_feature(SME_AES)
+#define KERNEL_HWCAP_SME_SFEXPA		__khwcap_feature(SME_SFEXPA)
+#define KERNEL_HWCAP_SME_STMOP		__khwcap_feature(SME_STMOP)
+#define KERNEL_HWCAP_SME_SMOP4		__khwcap_feature(SME_SMOP4)
 
 #define __khwcap2_feature(x)		(const_ilog2(HWCAP2_ ## x) + 64)
 #define KERNEL_HWCAP_DCPODP		__khwcap2_feature(DCPODP)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3e0f0de1d2da..43e365fbff0b 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -391,8 +391,6 @@
 	ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
 	ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
 
-#define CPACR_EL1_TTA		(1 << 28)
-
 #define kvm_mode_names				\
 	{ PSR_MODE_EL0t,	"EL0t" },	\
 	{ PSR_MODE_EL1t,	"EL1t" },	\
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index cf811009a33c..4f1d99725f6b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -556,13 +556,13 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
 	({								\
 		u64 cptr = 0;						\
 									\
-		if ((set) & CPACR_ELx_FPEN)				\
+		if ((set) & CPACR_EL1_FPEN)				\
 			cptr |= CPTR_EL2_TFP;				\
-		if ((set) & CPACR_ELx_ZEN)				\
+		if ((set) & CPACR_EL1_ZEN)				\
 			cptr |= CPTR_EL2_TZ;				\
-		if ((set) & CPACR_ELx_SMEN)				\
+		if ((set) & CPACR_EL1_SMEN)				\
 			cptr |= CPTR_EL2_TSM;				\
-		if ((clr) & CPACR_ELx_TTA)				\
+		if ((clr) & CPACR_EL1_TTA)				\
 			cptr |= CPTR_EL2_TTA;				\
 		if ((clr) & CPTR_EL2_TAM)				\
 			cptr |= CPTR_EL2_TAM;				\
@@ -576,13 +576,13 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
 	({								\
 		u64 cptr = 0;						\
 									\
-		if ((clr) & CPACR_ELx_FPEN)				\
+		if ((clr) & CPACR_EL1_FPEN)				\
 			cptr |= CPTR_EL2_TFP;				\
-		if ((clr) & CPACR_ELx_ZEN)				\
+		if ((clr) & CPACR_EL1_ZEN)				\
 			cptr |= CPTR_EL2_TZ;				\
-		if ((clr) & CPACR_ELx_SMEN)				\
+		if ((clr) & CPACR_EL1_SMEN)				\
 			cptr |= CPTR_EL2_TSM;				\
-		if ((set) & CPACR_ELx_TTA)				\
+		if ((set) & CPACR_EL1_TTA)				\
 			cptr |= CPTR_EL2_TTA;				\
 		if ((set) & CPTR_EL2_TAM)				\
 			cptr |= CPTR_EL2_TAM;				\
@@ -595,13 +595,13 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
 #define cpacr_clear_set(clr, set)					\
 	do {								\
 		BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0);		\
-		BUILD_BUG_ON((clr) & CPACR_ELx_E0POE);			\
-		__build_check_all_or_none((clr), CPACR_ELx_FPEN);	\
-		__build_check_all_or_none((set), CPACR_ELx_FPEN);	\
-		__build_check_all_or_none((clr), CPACR_ELx_ZEN);	\
-		__build_check_all_or_none((set), CPACR_ELx_ZEN);	\
-		__build_check_all_or_none((clr), CPACR_ELx_SMEN);	\
-		__build_check_all_or_none((set), CPACR_ELx_SMEN);	\
+		BUILD_BUG_ON((clr) & CPACR_EL1_E0POE);			\
+		__build_check_all_or_none((clr), CPACR_EL1_FPEN);	\
+		__build_check_all_or_none((set), CPACR_EL1_FPEN);	\
+		__build_check_all_or_none((clr), CPACR_EL1_ZEN);	\
+		__build_check_all_or_none((set), CPACR_EL1_ZEN);	\
+		__build_check_all_or_none((clr), CPACR_EL1_SMEN);	\
+		__build_check_all_or_none((set), CPACR_EL1_SMEN);	\
 									\
 		if (has_vhe() || has_hvhe())				\
 			sysreg_clear_set(cpacr_el1, clr, set);		\
@@ -624,16 +624,16 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
 	u64 val;
 
 	if (has_vhe()) {
-		val = (CPACR_ELx_FPEN | CPACR_EL1_ZEN_EL1EN);
+		val = (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN);
 		if (cpus_have_final_cap(ARM64_SME))
 			val |= CPACR_EL1_SMEN_EL1EN;
 	} else if (has_hvhe()) {
-		val = CPACR_ELx_FPEN;
+		val = CPACR_EL1_FPEN;
 
 		if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
-			val |= CPACR_ELx_ZEN;
+			val |= CPACR_EL1_ZEN;
 		if (cpus_have_final_cap(ARM64_SME))
-			val |= CPACR_ELx_SMEN;
+			val |= CPACR_EL1_SMEN;
 	} else {
 		val = CPTR_NVHE_EL2_RES1;
 
@@ -685,7 +685,7 @@ static inline bool ____cptr_xen_trap_enabled(const struct kvm_vcpu *vcpu,
 #define __guest_hyp_cptr_xen_trap_enabled(vcpu, xen)				\
 	(!vcpu_has_nv(vcpu) ? false :						\
 	 ____cptr_xen_trap_enabled(vcpu,					\
-				   SYS_FIELD_GET(CPACR_ELx, xen,		\
+				   SYS_FIELD_GET(CPACR_EL1, xen,		\
 						 vcpu_sanitised_cptr_el2(vcpu))))
 
 static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 233e65522716..6cd08198bf19 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -33,14 +33,14 @@ static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
 
 static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2)
 {
-	u64 cpacr_el1 = CPACR_ELx_RES1;
+	u64 cpacr_el1 = CPACR_EL1_RES1;
 
 	if (cptr_el2 & CPTR_EL2_TTA)
-		cpacr_el1 |= CPACR_ELx_TTA;
+		cpacr_el1 |= CPACR_EL1_TTA;
 	if (!(cptr_el2 & CPTR_EL2_TFP))
-		cpacr_el1 |= CPACR_ELx_FPEN;
+		cpacr_el1 |= CPACR_EL1_FPEN;
 	if (!(cptr_el2 & CPTR_EL2_TZ))
-		cpacr_el1 |= CPACR_ELx_ZEN;
+		cpacr_el1 |= CPACR_EL1_ZEN;
 
 	cpacr_el1 |= cptr_el2 & (CPTR_EL2_TCPAC | CPTR_EL2_TAM);
 
diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
index 1d53022fc7e1..21df8bbd2668 100644
--- a/arch/arm64/include/asm/mman.h
+++ b/arch/arm64/include/asm/mman.h
@@ -7,6 +7,7 @@
 #ifndef BUILD_VDSO
 #include <linux/compiler.h>
 #include <linux/fs.h>
+#include <linux/hugetlb.h>
 #include <linux/shmem_fs.h>
 #include <linux/types.h>
 
@@ -44,7 +45,7 @@ static inline unsigned long arch_calc_vm_flag_bits(struct file *file,
 	if (system_supports_mte()) {
 		if (flags & (MAP_ANONYMOUS | MAP_HUGETLB))
 			return VM_MTE_ALLOWED;
-		if (shmem_file(file))
+		if (shmem_file(file) || is_file_hugepages(file))
 			return VM_MTE_ALLOWED;
 	}
 
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 2ec96d91acc6..662471cfc536 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -109,8 +109,5 @@ static inline bool kaslr_requires_kpti(void)
 	return true;
 }
 
-#define INIT_MM_CONTEXT(name)	\
-	.pgd = swapper_pg_dir,
-
 #endif	/* !__ASSEMBLY__ */
 #endif
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index c78a988cca93..a9136cc551cc 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -222,12 +222,6 @@
  */
 #define S1_TABLE_AP		(_AT(pmdval_t, 3) << 61)
 
-/*
- * Highest possible physical address supported.
- */
-#define PHYS_MASK_SHIFT		(CONFIG_ARM64_PA_BITS)
-#define PHYS_MASK		((UL(1) << PHYS_MASK_SHIFT) - 1)
-
 #define TTBR_CNP_BIT		(UL(1) << 0)
 
 /*
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 9f9cf13bbd95..a95f1f77bb39 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -81,6 +81,7 @@ extern unsigned long prot_ns_shared;
 #define lpa2_is_enabled()	false
 #define PTE_MAYBE_SHARED	PTE_SHARED
 #define PMD_MAYBE_SHARED	PMD_SECT_S
+#define PHYS_MASK_SHIFT		(CONFIG_ARM64_PA_BITS)
 #else
 static inline bool __pure lpa2_is_enabled(void)
 {
@@ -89,9 +90,15 @@ static inline bool __pure lpa2_is_enabled(void)
 
 #define PTE_MAYBE_SHARED	(lpa2_is_enabled() ? 0 : PTE_SHARED)
 #define PMD_MAYBE_SHARED	(lpa2_is_enabled() ? 0 : PMD_SECT_S)
+#define PHYS_MASK_SHIFT		(lpa2_is_enabled() ? CONFIG_ARM64_PA_BITS : 48)
 #endif
 
 /*
+ * Highest possible physical address supported.
+ */
+#define PHYS_MASK		((UL(1) << PHYS_MASK_SHIFT) - 1)
+
+/*
  * If we have userspace only BTI we don't want to mark kernel pages
  * guarded even if the system does support BTI.
  */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 6986345b537a..0b2a2ad1b9e8 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -273,7 +273,7 @@ static inline pte_t pte_mknoncont(pte_t pte)
 	return clear_pte_bit(pte, __pgprot(PTE_CONT));
 }
 
-static inline pte_t pte_mkpresent(pte_t pte)
+static inline pte_t pte_mkvalid(pte_t pte)
 {
 	return set_pte_bit(pte, __pgprot(PTE_VALID));
 }
@@ -896,7 +896,7 @@ static inline bool mm_pud_folded(const struct mm_struct *mm)
 	pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
 
 #define p4d_none(p4d)		(pgtable_l4_enabled() && !p4d_val(p4d))
-#define p4d_bad(p4d)		(pgtable_l4_enabled() && !(p4d_val(p4d) & 2))
+#define p4d_bad(p4d)		(pgtable_l4_enabled() && !(p4d_val(p4d) & P4D_TABLE_BIT))
 #define p4d_present(p4d)	(!p4d_none(p4d))
 
 static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
@@ -1023,7 +1023,7 @@ static inline bool mm_p4d_folded(const struct mm_struct *mm)
 	pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e))
 
 #define pgd_none(pgd)		(pgtable_l5_enabled() && !pgd_val(pgd))
-#define pgd_bad(pgd)		(pgtable_l5_enabled() && !(pgd_val(pgd) & 2))
+#define pgd_bad(pgd)		(pgtable_l5_enabled() && !(pgd_val(pgd) & PGD_TABLE_BIT))
 #define pgd_present(pgd)	(!pgd_none(pgd))
 
 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -1345,7 +1345,7 @@ static inline void ___ptep_set_wrprotect(struct mm_struct *mm,
 }
 
 /*
- * __ptep_set_wrprotect - mark read-only while trasferring potential hardware
+ * __ptep_set_wrprotect - mark read-only while transferring potential hardware
  * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
  */
 static inline void __ptep_set_wrprotect(struct mm_struct *mm,
diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
index 188cbb9b23f5..b42aeac05340 100644
--- a/arch/arm64/include/asm/rsi.h
+++ b/arch/arm64/include/asm/rsi.h
@@ -10,6 +10,8 @@
 #include <linux/jump_label.h>
 #include <asm/rsi_cmds.h>
 
+#define RSI_PDEV_NAME "arm-cca-dev"
+
 DECLARE_STATIC_KEY_FALSE(rsi_present);
 
 void __init arm64_rsi_init(void);
diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h
index b83975555314..bf6bf40bc5ab 100644
--- a/arch/arm64/include/asm/seccomp.h
+++ b/arch/arm64/include/asm/seccomp.h
@@ -23,7 +23,6 @@
 #define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
 #define SECCOMP_ARCH_NATIVE_NAME	"aarch64"
 #ifdef CONFIG_COMPAT
-#include <asm/unistd_compat_32.h>
 # define SECCOMP_ARCH_COMPAT		AUDIT_ARCH_ARM
 # define SECCOMP_ARCH_COMPAT_NR		__NR_compat32_syscalls
 # define SECCOMP_ARCH_COMPAT_NAME	"arm"
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
index 8a8acc220371..84783efdc9d1 100644
--- a/arch/arm64/include/asm/sparsemem.h
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -5,7 +5,10 @@
 #ifndef __ASM_SPARSEMEM_H
 #define __ASM_SPARSEMEM_H
 
-#define MAX_PHYSMEM_BITS	CONFIG_ARM64_PA_BITS
+#include <asm/pgtable-prot.h>
+
+#define MAX_PHYSMEM_BITS		PHYS_MASK_SHIFT
+#define MAX_POSSIBLE_PHYSMEM_BITS	(52)
 
 /*
  * Section size must be at least 512MB for 64K base
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 48d46b768eae..705a7afa8e58 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -56,6 +56,21 @@
 #define HWCAP_PACA		(1 << 30)
 #define HWCAP_PACG		(1UL << 31)
 #define HWCAP_GCS		(1UL << 32)
+#define HWCAP_CMPBR		(1UL << 33)
+#define HWCAP_FPRCVT		(1UL << 34)
+#define HWCAP_F8MM8		(1UL << 35)
+#define HWCAP_F8MM4		(1UL << 36)
+#define HWCAP_SVE_F16MM		(1UL << 37)
+#define HWCAP_SVE_ELTPERM	(1UL << 38)
+#define HWCAP_SVE_AES2		(1UL << 39)
+#define HWCAP_SVE_BFSCALE	(1UL << 40)
+#define HWCAP_SVE2P2		(1UL << 41)
+#define HWCAP_SME2P2		(1UL << 42)
+#define HWCAP_SME_SBITPERM	(1UL << 43)
+#define HWCAP_SME_AES		(1UL << 44)
+#define HWCAP_SME_SFEXPA	(1UL << 45)
+#define HWCAP_SME_STMOP		(1UL << 46)
+#define HWCAP_SME_SMOP4		(1UL << 47)
 
 /*
  * HWCAP2 flags - for AT_HWCAP2
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6ce71f444ed8..d41128e37701 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -268,6 +268,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64isar3[] = {
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FPRCVT_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FAMINMAX_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
@@ -318,6 +319,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F16MM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0),
@@ -330,6 +333,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_EltPerm_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_AES_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, 0),
@@ -373,6 +378,16 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
 		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP4_SHIFT, 1, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
 		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP2_SHIFT, 1, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SBitPerm_SHIFT, 1, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_AES_SHIFT, 1, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SFEXPA_SHIFT, 1, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_STMOP_SHIFT, 1, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+		       FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMOP4_SHIFT, 1, 0),
 	ARM64_FTR_END,
 };
 
@@ -381,6 +396,8 @@ static const struct arm64_ftr_bits ftr_id_aa64fpfr0[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8FMA_SHIFT, 1, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP4_SHIFT, 1, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP2_SHIFT, 1, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8MM8_SHIFT, 1, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8MM4_SHIFT, 1, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E4M3_SHIFT, 1, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E5M2_SHIFT, 1, 0),
 	ARM64_FTR_END,
@@ -1004,17 +1021,16 @@ static void init_cpu_ftr_reg(u32 sys_reg, u64 new)
 				/* Override was valid */
 				ftr_new = tmp;
 				str = "forced";
-			} else if (ftr_ovr == tmp) {
+			} else {
 				/* Override was the safe value */
 				str = "already set";
 			}
 
-			if (str)
-				pr_warn("%s[%d:%d]: %s to %llx\n",
-					reg->name,
-					ftrp->shift + ftrp->width - 1,
-					ftrp->shift, str,
-					tmp & (BIT(ftrp->width) - 1));
+			pr_warn("%s[%d:%d]: %s to %llx\n",
+				reg->name,
+				ftrp->shift + ftrp->width - 1,
+				ftrp->shift, str,
+				tmp & (BIT(ftrp->width) - 1));
 		} else if ((ftr_mask & reg->override->val) == ftr_mask) {
 			reg->override->val &= ~ftr_mask;
 			pr_warn("%s[%d:%d]: impossible override, ignored\n",
@@ -1167,12 +1183,6 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 	    id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
 		unsigned long cpacr = cpacr_save_enable_kernel_sme();
 
-		/*
-		 * We mask out SMPS since even if the hardware
-		 * supports priorities the kernel does not at present
-		 * and we block access to them.
-		 */
-		info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
 		vec_init_vq_map(ARM64_VEC_SME);
 
 		cpacr_restore(cpacr);
@@ -1423,13 +1433,6 @@ void update_cpu_features(int cpu,
 	    id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
 		unsigned long cpacr = cpacr_save_enable_kernel_sme();
 
-		/*
-		 * We mask out SMPS since even if the hardware
-		 * supports priorities the kernel does not at present
-		 * and we block access to them.
-		 */
-		info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
-
 		/* Probe vector lengths */
 		if (!system_capabilities_finalized())
 			vec_update_vq_map(ARM64_VEC_SME);
@@ -2376,8 +2379,8 @@ static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
 #ifdef CONFIG_ARM64_POE
 static void cpu_enable_poe(const struct arm64_cpu_capabilities *__unused)
 {
-	sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1x_E0POE);
-	sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_E0POE);
+	sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1_E0POE);
+	sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_E0POE);
 }
 #endif
 
@@ -3022,6 +3025,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = match,						\
 	}
 
+#define HWCAP_CAP_MATCH_ID(match, reg, field, min_value, cap_type, cap)		\
+	{									\
+		__HWCAP_CAP(#cap, cap_type, cap)				\
+		HWCAP_CPUID_MATCH(reg, field, min_value) 			\
+		.matches = match,						\
+	}
+
 #ifdef CONFIG_ARM64_PTR_AUTH
 static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
 	{
@@ -3050,6 +3060,13 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
 };
 #endif
 
+#ifdef CONFIG_ARM64_SVE
+static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope)
+{
+	return system_supports_sve() && has_user_cpuid_feature(cap, scope);
+}
+#endif
+
 static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL),
 	HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES),
@@ -3092,19 +3109,24 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT),
 #ifdef CONFIG_ARM64_SVE
 	HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
-	HWCAP_CAP(ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2p2, CAP_HWCAP, KERNEL_HWCAP_SVE2P2),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, AES2, CAP_HWCAP, KERNEL_HWCAP_SVE_AES2),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, B16B16, BFSCALE, CAP_HWCAP, KERNEL_HWCAP_SVE_BFSCALE),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F16MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_F16MM),
+	HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, EltPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_ELTPERM),
 #endif
 #ifdef CONFIG_ARM64_GCS
 	HWCAP_CAP(ID_AA64PFR1_EL1, GCS, IMP, CAP_HWCAP, KERNEL_HWCAP_GCS),
@@ -3124,6 +3146,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(ID_AA64MMFR0_EL1, ECV, IMP, CAP_HWCAP, KERNEL_HWCAP_ECV),
 	HWCAP_CAP(ID_AA64MMFR1_EL1, AFP, IMP, CAP_HWCAP, KERNEL_HWCAP_AFP),
 	HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, IMP, CAP_HWCAP, KERNEL_HWCAP_CSSC),
+	HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, CMPBR, CAP_HWCAP, KERNEL_HWCAP_CMPBR),
 	HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM),
 	HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES),
 	HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
@@ -3133,6 +3156,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
 	HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
 	HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2),
+	HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2),
 	HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
 	HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
 	HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
@@ -3150,6 +3174,13 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
 	HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
 	HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
+	HWCAP_CAP(ID_AA64SMFR0_EL1, SF8MM8, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8MM8),
+	HWCAP_CAP(ID_AA64SMFR0_EL1, SF8MM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8MM4),
+	HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM),
+	HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES),
+	HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA),
+	HWCAP_CAP(ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP),
+	HWCAP_CAP(ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4),
 #endif /* CONFIG_ARM64_SME */
 	HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT),
 	HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA),
@@ -3478,7 +3509,7 @@ static void verify_hyp_capabilities(void)
 		return;
 
 	safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
-	mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
 
 	/* Verify VMID bits */
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index d79e88fccdfc..285d7d538342 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -145,6 +145,21 @@ static const char *const hwcap_str[] = {
 	[KERNEL_HWCAP_SME_SF8DP4]	= "smesf8dp4",
 	[KERNEL_HWCAP_SME_SF8DP2]	= "smesf8dp2",
 	[KERNEL_HWCAP_POE]		= "poe",
+	[KERNEL_HWCAP_CMPBR]		= "cmpbr",
+	[KERNEL_HWCAP_FPRCVT]		= "fprcvt",
+	[KERNEL_HWCAP_F8MM8]		= "f8mm8",
+	[KERNEL_HWCAP_F8MM4]		= "f8mm4",
+	[KERNEL_HWCAP_SVE_F16MM]	= "svef16mm",
+	[KERNEL_HWCAP_SVE_ELTPERM]	= "sveeltperm",
+	[KERNEL_HWCAP_SVE_AES2]		= "sveaes2",
+	[KERNEL_HWCAP_SVE_BFSCALE]	= "svebfscale",
+	[KERNEL_HWCAP_SVE2P2]		= "sve2p2",
+	[KERNEL_HWCAP_SME2P2]		= "sme2p2",
+	[KERNEL_HWCAP_SME_SBITPERM]	= "smesbitperm",
+	[KERNEL_HWCAP_SME_AES]		= "smeaes",
+	[KERNEL_HWCAP_SME_SFEXPA]	= "smesfexpa",
+	[KERNEL_HWCAP_SME_STMOP]	= "smestmop",
+	[KERNEL_HWCAP_SME_SMOP4]	= "smesmop4",
 };
 
 #ifdef CONFIG_COMPAT
@@ -482,6 +497,16 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0))
 		info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
 
+	if (IS_ENABLED(CONFIG_ARM64_SME) &&
+	    id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
+		/*
+		 * We mask out SMPS since even if the hardware
+		 * supports priorities the kernel does not at present
+		 * and we block access to them.
+		 */
+		info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
+	}
+
 	cpuinfo_detect_icache_policy(info);
 }
 
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 65f76064c86b..ae990da1eae5 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -114,8 +114,8 @@ SYM_CODE_START_LOCAL(__finalise_el2)
 
 	// Use EL2 translations for SPE & TRBE and disable access from EL1
 	mrs	x0, mdcr_el2
-	bic	x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
-	bic	x0, x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT)
+	bic	x0, x0, #MDCR_EL2_E2PB_MASK
+	bic	x0, x0, #MDCR_EL2_E2TB_MASK
 	msr	mdcr_el2, x0
 
 	// Transfer the MM state from EL1 to EL2
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index 7f99723fbb8c..1041bc67a3ee 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -30,20 +30,17 @@ static bool is_image_text(unsigned long addr)
 
 static void __kprobes *patch_map(void *addr, int fixmap)
 {
-	unsigned long uintaddr = (uintptr_t) addr;
-	bool image = is_image_text(uintaddr);
-	struct page *page;
-
-	if (image)
-		page = phys_to_page(__pa_symbol(addr));
-	else if (IS_ENABLED(CONFIG_EXECMEM))
-		page = vmalloc_to_page(addr);
-	else
-		return addr;
-
-	BUG_ON(!page);
-	return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
-			(uintaddr & ~PAGE_MASK));
+	phys_addr_t phys;
+
+	if (is_image_text((unsigned long)addr)) {
+		phys = __pa_symbol(addr);
+	} else {
+		struct page *page = vmalloc_to_page(addr);
+		BUG_ON(!page);
+		phys = page_to_phys(page) + offset_in_page(addr);
+	}
+
+	return (void *)set_fixmap_offset(fixmap, phys);
 }
 
 static void __kprobes patch_unmap(int fixmap)
diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c
index 22159251eb3a..c6b185b885f7 100644
--- a/arch/arm64/kernel/pi/idreg-override.c
+++ b/arch/arm64/kernel/pi/idreg-override.c
@@ -83,6 +83,15 @@ static bool __init mmfr2_varange_filter(u64 val)
 		id_aa64mmfr0_override.val |=
 			(ID_AA64MMFR0_EL1_TGRAN_LPA2 - 1) << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
 		id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
+
+		/*
+		 * Override PARange to 48 bits - the override will just be
+		 * ignored if the actual PARange is smaller, but this is
+		 * unlikely to be the case for LPA2 capable silicon.
+		 */
+		id_aa64mmfr0_override.val |=
+			ID_AA64MMFR0_EL1_PARANGE_48 << ID_AA64MMFR0_EL1_PARANGE_SHIFT;
+		id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_PARANGE_SHIFT;
 	}
 #endif
 	return true;
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
index f374a3e5a5fe..e57b043f324b 100644
--- a/arch/arm64/kernel/pi/map_kernel.c
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -136,6 +136,12 @@ static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
 {
 	u64 sctlr = read_sysreg(sctlr_el1);
 	u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
+	u64 mmfr0 = read_sysreg(id_aa64mmfr0_el1);
+	u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
+							   ID_AA64MMFR0_EL1_PARANGE_SHIFT);
+
+	tcr &= ~TCR_IPS_MASK;
+	tcr |= parange << TCR_IPS_SHIFT;
 
 	asm("	msr	sctlr_el1, %0		;"
 	    "	isb				;"
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index e4437f62a2cd..f79b0d5f71ac 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -720,6 +720,8 @@ static int fpmr_set(struct task_struct *target, const struct user_regset *regset
 	if (!system_supports_fpmr())
 		return -EINVAL;
 
+	fpmr = target->thread.uw.fpmr;
+
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpmr, 0, count);
 	if (ret)
 		return ret;
@@ -1427,7 +1429,7 @@ static int tagged_addr_ctrl_get(struct task_struct *target,
 {
 	long ctrl = get_tagged_addr_ctrl(target);
 
-	if (IS_ERR_VALUE(ctrl))
+	if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl)))
 		return ctrl;
 
 	return membuf_write(&to, &ctrl, sizeof(ctrl));
@@ -1441,6 +1443,10 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
 	int ret;
 	long ctrl;
 
+	ctrl = get_tagged_addr_ctrl(target);
+	if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl)))
+		return ctrl;
+
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
 	if (ret)
 		return ret;
@@ -1472,6 +1478,8 @@ static int poe_set(struct task_struct *target, const struct
 	if (!system_supports_poe())
 		return -EINVAL;
 
+	ctrl = target->thread.por_el0;
+
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
 	if (ret)
 		return ret;
@@ -1483,6 +1491,22 @@ static int poe_set(struct task_struct *target, const struct
 #endif
 
 #ifdef CONFIG_ARM64_GCS
+static void task_gcs_to_user(struct user_gcs *user_gcs,
+			     const struct task_struct *target)
+{
+	user_gcs->features_enabled = target->thread.gcs_el0_mode;
+	user_gcs->features_locked = target->thread.gcs_el0_locked;
+	user_gcs->gcspr_el0 = target->thread.gcspr_el0;
+}
+
+static void task_gcs_from_user(struct task_struct *target,
+			       const struct user_gcs *user_gcs)
+{
+	target->thread.gcs_el0_mode = user_gcs->features_enabled;
+	target->thread.gcs_el0_locked = user_gcs->features_locked;
+	target->thread.gcspr_el0 = user_gcs->gcspr_el0;
+}
+
 static int gcs_get(struct task_struct *target,
 		   const struct user_regset *regset,
 		   struct membuf to)
@@ -1495,9 +1519,7 @@ static int gcs_get(struct task_struct *target,
 	if (target == current)
 		gcs_preserve_current_state();
 
-	user_gcs.features_enabled = target->thread.gcs_el0_mode;
-	user_gcs.features_locked = target->thread.gcs_el0_locked;
-	user_gcs.gcspr_el0 = target->thread.gcspr_el0;
+	task_gcs_to_user(&user_gcs, target);
 
 	return membuf_write(&to, &user_gcs, sizeof(user_gcs));
 }
@@ -1513,6 +1535,8 @@ static int gcs_set(struct task_struct *target, const struct
 	if (!system_supports_gcs())
 		return -EINVAL;
 
+	task_gcs_to_user(&user_gcs, target);
+
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_gcs, 0, -1);
 	if (ret)
 		return ret;
@@ -1520,9 +1544,7 @@ static int gcs_set(struct task_struct *target, const struct
 	if (user_gcs.features_enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
 		return -EINVAL;
 
-	target->thread.gcs_el0_mode = user_gcs.features_enabled;
-	target->thread.gcs_el0_locked = user_gcs.features_locked;
-	target->thread.gcspr_el0 = user_gcs.gcspr_el0;
+	task_gcs_from_user(target, &user_gcs);
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
index 3031f25c32ef..ce4778141ec7 100644
--- a/arch/arm64/kernel/rsi.c
+++ b/arch/arm64/kernel/rsi.c
@@ -8,6 +8,7 @@
 #include <linux/psci.h>
 #include <linux/swiotlb.h>
 #include <linux/cc_platform.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/mem_encrypt.h>
@@ -140,3 +141,17 @@ void __init arm64_rsi_init(void)
 	static_branch_enable(&rsi_present);
 }
 
+static struct platform_device rsi_dev = {
+	.name = RSI_PDEV_NAME,
+	.id = PLATFORM_DEVID_NONE
+};
+
+static int __init arm64_create_dummy_rsi_dev(void)
+{
+	if (is_realm_world() &&
+	    platform_device_register(&rsi_dev))
+		pr_err("failed to register rsi platform device\n");
+	return 0;
+}
+
+arch_initcall(arm64_create_dummy_rsi_dev)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 14ac6fdb872b..99ea26d400ff 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -36,15 +36,8 @@
 #include <asm/traps.h>
 #include <asm/vdso.h>
 
-#ifdef CONFIG_ARM64_GCS
 #define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK)
 
-static bool gcs_signal_cap_valid(u64 addr, u64 val)
-{
-	return val == GCS_SIGNAL_CAP(addr);
-}
-#endif
-
 /*
  * Do a signal return; undo the signal stack. These are aligned to 128-bit.
  */
@@ -1062,8 +1055,7 @@ static int restore_sigframe(struct pt_regs *regs,
 #ifdef CONFIG_ARM64_GCS
 static int gcs_restore_signal(void)
 {
-	unsigned long __user *gcspr_el0;
-	u64 cap;
+	u64 gcspr_el0, cap;
 	int ret;
 
 	if (!system_supports_gcs())
@@ -1072,7 +1064,7 @@ static int gcs_restore_signal(void)
 	if (!(current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE))
 		return 0;
 
-	gcspr_el0 = (unsigned long __user *)read_sysreg_s(SYS_GCSPR_EL0);
+	gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
 
 	/*
 	 * Ensure that any changes to the GCS done via GCS operations
@@ -1087,22 +1079,23 @@ static int gcs_restore_signal(void)
 	 * then faults will be generated on GCS operations - the main
 	 * concern is to protect GCS pages.
 	 */
-	ret = copy_from_user(&cap, gcspr_el0, sizeof(cap));
+	ret = copy_from_user(&cap, (unsigned long __user *)gcspr_el0,
+			     sizeof(cap));
 	if (ret)
 		return -EFAULT;
 
 	/*
 	 * Check that the cap is the actual GCS before replacing it.
 	 */
-	if (!gcs_signal_cap_valid((u64)gcspr_el0, cap))
+	if (cap != GCS_SIGNAL_CAP(gcspr_el0))
 		return -EINVAL;
 
 	/* Invalidate the token to prevent reuse */
-	put_user_gcs(0, (__user void*)gcspr_el0, &ret);
+	put_user_gcs(0, (unsigned long __user *)gcspr_el0, &ret);
 	if (ret != 0)
 		return -EFAULT;
 
-	write_sysreg_s(gcspr_el0 + 1, SYS_GCSPR_EL0);
+	write_sysreg_s(gcspr_el0 + 8, SYS_GCSPR_EL0);
 
 	return 0;
 }
@@ -1421,7 +1414,7 @@ static int get_sigframe(struct rt_sigframe_user_layout *user,
 
 static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
 {
-	unsigned long __user *gcspr_el0;
+	u64 gcspr_el0;
 	int ret = 0;
 
 	if (!system_supports_gcs())
@@ -1434,18 +1427,20 @@ static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
 	 * We are entering a signal handler, current register state is
 	 * active.
 	 */
-	gcspr_el0 = (unsigned long __user *)read_sysreg_s(SYS_GCSPR_EL0);
+	gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
 
 	/*
 	 * Push a cap and the GCS entry for the trampoline onto the GCS.
 	 */
-	put_user_gcs((unsigned long)sigtramp, gcspr_el0 - 2, &ret);
-	put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 1), gcspr_el0 - 1, &ret);
+	put_user_gcs((unsigned long)sigtramp,
+		     (unsigned long __user *)(gcspr_el0 - 16), &ret);
+	put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 8),
+		     (unsigned long __user *)(gcspr_el0 - 8), &ret);
 	if (ret != 0)
 		return ret;
 
-	gcspr_el0 -= 2;
-	write_sysreg_s((unsigned long)gcspr_el0, SYS_GCSPR_EL0);
+	gcspr_el0 -= 16;
+	write_sysreg_s(gcspr_el0, SYS_GCSPR_EL0);
 
 	return 0;
 }
@@ -1462,10 +1457,33 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig,
 			 struct rt_sigframe_user_layout *user, int usig)
 {
 	__sigrestore_t sigtramp;
+	int err;
+
+	if (ksig->ka.sa.sa_flags & SA_RESTORER)
+		sigtramp = ksig->ka.sa.sa_restorer;
+	else
+		sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
+
+	err = gcs_signal_entry(sigtramp, ksig);
+	if (err)
+		return err;
+
+	/*
+	 * We must not fail from this point onwards. We are going to update
+	 * registers, including SP, in order to invoke the signal handler. If
+	 * we failed and attempted to deliver a nested SIGSEGV to a handler
+	 * after that point, the subsequent sigreturn would end up restoring
+	 * the (partial) state for the original signal handler.
+	 */
 
 	regs->regs[0] = usig;
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+		regs->regs[1] = (unsigned long)&user->sigframe->info;
+		regs->regs[2] = (unsigned long)&user->sigframe->uc;
+	}
 	regs->sp = (unsigned long)user->sigframe;
 	regs->regs[29] = (unsigned long)&user->next_frame->fp;
+	regs->regs[30] = (unsigned long)sigtramp;
 	regs->pc = (unsigned long)ksig->ka.sa.sa_handler;
 
 	/*
@@ -1506,14 +1524,7 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig,
 		sme_smstop();
 	}
 
-	if (ksig->ka.sa.sa_flags & SA_RESTORER)
-		sigtramp = ksig->ka.sa.sa_restorer;
-	else
-		sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
-
-	regs->regs[30] = (unsigned long)sigtramp;
-
-	return gcs_signal_entry(sigtramp, ksig);
+	return 0;
 }
 
 static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
@@ -1537,14 +1548,16 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
 
 	err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
 	err |= setup_sigframe(&user, regs, set, &ua_state);
-	if (err == 0) {
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+
+	if (err == 0)
 		err = setup_return(regs, ksig, &user, usig);
-		if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
-			err |= copy_siginfo_to_user(&frame->info, &ksig->info);
-			regs->regs[1] = (unsigned long)&frame->info;
-			regs->regs[2] = (unsigned long)&frame->uc;
-		}
-	}
+
+	/*
+	 * We must not fail if setup_return() succeeded - see comment at the
+	 * beginning of setup_return().
+	 */
 
 	if (err == 0)
 		set_handler_user_access_state();
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index caef85462acb..1d9d51d7627f 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -26,7 +26,6 @@ enum kunwind_source {
 	KUNWIND_SOURCE_CALLER,
 	KUNWIND_SOURCE_TASK,
 	KUNWIND_SOURCE_REGS_PC,
-	KUNWIND_SOURCE_REGS_LR,
 };
 
 union unwind_flags {
@@ -138,8 +137,10 @@ kunwind_recover_return_address(struct kunwind_state *state)
 		orig_pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
 						state->common.pc,
 						(void *)state->common.fp);
-		if (WARN_ON_ONCE(state->common.pc == orig_pc))
+		if (state->common.pc == orig_pc) {
+			WARN_ON_ONCE(state->task == current);
 			return -EINVAL;
+		}
 		state->common.pc = orig_pc;
 		state->flags.fgraph = 1;
 	}
@@ -178,23 +179,8 @@ int kunwind_next_regs_pc(struct kunwind_state *state)
 	state->regs = regs;
 	state->common.pc = regs->pc;
 	state->common.fp = regs->regs[29];
-	state->source = KUNWIND_SOURCE_REGS_PC;
-	return 0;
-}
-
-static __always_inline int
-kunwind_next_regs_lr(struct kunwind_state *state)
-{
-	/*
-	 * The stack for the regs was consumed by kunwind_next_regs_pc(), so we
-	 * cannot consume that again here, but we know the regs are safe to
-	 * access.
-	 */
-	state->common.pc = state->regs->regs[30];
-	state->common.fp = state->regs->regs[29];
 	state->regs = NULL;
-	state->source = KUNWIND_SOURCE_REGS_LR;
-
+	state->source = KUNWIND_SOURCE_REGS_PC;
 	return 0;
 }
 
@@ -215,12 +201,12 @@ kunwind_next_frame_record_meta(struct kunwind_state *state)
 	case FRAME_META_TYPE_FINAL:
 		if (meta == &task_pt_regs(tsk)->stackframe)
 			return -ENOENT;
-		WARN_ON_ONCE(1);
+		WARN_ON_ONCE(tsk == current);
 		return -EINVAL;
 	case FRAME_META_TYPE_PT_REGS:
 		return kunwind_next_regs_pc(state);
 	default:
-		WARN_ON_ONCE(1);
+		WARN_ON_ONCE(tsk == current);
 		return -EINVAL;
 	}
 }
@@ -274,11 +260,8 @@ kunwind_next(struct kunwind_state *state)
 	case KUNWIND_SOURCE_FRAME:
 	case KUNWIND_SOURCE_CALLER:
 	case KUNWIND_SOURCE_TASK:
-	case KUNWIND_SOURCE_REGS_LR:
-		err = kunwind_next_frame_record(state);
-		break;
 	case KUNWIND_SOURCE_REGS_PC:
-		err = kunwind_next_regs_lr(state);
+		err = kunwind_next_frame_record(state);
 		break;
 	default:
 		err = -EINVAL;
@@ -436,7 +419,6 @@ static const char *state_source_string(const struct kunwind_state *state)
 	case KUNWIND_SOURCE_CALLER:	return "C";
 	case KUNWIND_SOURCE_TASK:	return "T";
 	case KUNWIND_SOURCE_REGS_PC:	return "P";
-	case KUNWIND_SOURCE_REGS_LR:	return "L";
 	default:			return "U";
 	}
 }
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a102c3aebdbc..7b2735ad32e9 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1990,8 +1990,7 @@ static int kvm_init_vector_slots(void)
 static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
 {
 	struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
-	u64 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
-	unsigned long tcr;
+	unsigned long tcr, ips;
 
 	/*
 	 * Calculate the raw per-cpu offset without a translation from the
@@ -2005,6 +2004,7 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
 	params->mair_el2 = read_sysreg(mair_el1);
 
 	tcr = read_sysreg(tcr_el1);
+	ips = FIELD_GET(TCR_IPS_MASK, tcr);
 	if (cpus_have_final_cap(ARM64_KVM_HVHE)) {
 		tcr |= TCR_EPD1_MASK;
 	} else {
@@ -2014,8 +2014,8 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
 	tcr &= ~TCR_T0SZ_MASK;
 	tcr |= TCR_T0SZ(hyp_va_bits);
 	tcr &= ~TCR_EL2_PS_MASK;
-	tcr |= FIELD_PREP(TCR_EL2_PS_MASK, kvm_get_parange(mmfr0));
-	if (kvm_lpa2_is_enabled())
+	tcr |= FIELD_PREP(TCR_EL2_PS_MASK, ips);
+	if (lpa2_is_enabled())
 		tcr |= TCR_EL2_DS;
 	params->tcr_el2 = tcr;
 
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 8c5d7990e5b3..3a96c96816e9 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -111,7 +111,7 @@ static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
 		return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
 	case TR_EL10:
 		return  (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
-			(__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1x_PIE);
+			(__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE);
 	default:
 		BUG();
 	}
@@ -140,8 +140,8 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
 		}
 
 		val = __vcpu_sys_reg(vcpu, TCR2_EL1);
-		wi->poe = val & TCR2_EL1x_POE;
-		wi->e0poe = val & TCR2_EL1x_E0POE;
+		wi->poe = val & TCR2_EL1_POE;
+		wi->e0poe = val & TCR2_EL1_E0POE;
 	}
 }
 
@@ -739,8 +739,15 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
 			final_attr = s1_parattr;
 			break;
 		default:
-			/* MemAttr[2]=0, Device from S2 */
-			final_attr = s2_memattr & GENMASK(1,0) << 2;
+			/*
+			 * MemAttr[2]=0, Device from S2.
+			 *
+			 * FWB does not influence the way that stage 1
+			 * memory types and attributes are combined
+			 * with stage 2 Device type and attributes.
+			 */
+			final_attr = min(s2_memattr_to_attr(s2_memattr),
+					 s1_parattr);
 		}
 	} else {
 		/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 1ffbfd1c3cf2..f1b7287e1f3c 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -494,7 +494,7 @@ static enum trap_behaviour check_cptr_tta(struct kvm_vcpu *vcpu)
 	if (!vcpu_el2_e2h_is_set(vcpu))
 		val = translate_cptr_el2_to_cpacr_el1(val);
 
-	if (val & CPACR_ELx_TTA)
+	if (val & CPACR_EL1_TTA)
 		return BEHAVE_FORWARD_RW;
 
 	return BEHAVE_HANDLE_LOCALLY;
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index ea5484ce1f3b..98718bd65bf1 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -169,7 +169,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 	if (has_vhe() && system_supports_sme()) {
 		/* Also restore EL0 state seen on entry */
 		if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
-			sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_SMEN);
+			sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_SMEN);
 		else
 			sysreg_clear_set(CPACR_EL1,
 					 CPACR_EL1_SMEN_EL0EN,
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 34f53707892d..abfa6ad92e91 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -419,9 +419,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
 
 	/* First disable enough traps to allow us to update the registers */
 	if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
-		cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+		cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
 	else
-		cpacr_clear_set(0, CPACR_ELx_FPEN);
+		cpacr_clear_set(0, CPACR_EL1_FPEN);
 	isb();
 
 	/* Write out the host state if it's in the registers */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 6aa0b13d86e5..6c90ef6736d6 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -68,7 +68,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
 	if (!guest_owns_fp_regs())
 		return;
 
-	cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+	cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
 	isb();
 
 	if (vcpu_has_sve(vcpu))
@@ -481,7 +481,7 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
 		handle_host_smc(host_ctxt);
 		break;
 	case ESR_ELx_EC_SVE:
-		cpacr_clear_set(0, CPACR_ELx_ZEN);
+		cpacr_clear_set(0, CPACR_EL1_ZEN);
 		isb();
 		sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
 				       SYS_ZCR_EL2);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index caba3e4bd09e..e75374d682f4 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -783,9 +783,6 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
 	if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr))
 		return -EBUSY;
 
-	if (__hyp_ack_skip_pgtable_check(tx))
-		return 0;
-
 	return __hyp_check_page_state_range(addr, size,
 					    PKVM_PAGE_SHARED_BORROWED);
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 01616c39a810..73e319891327 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -68,7 +68,7 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
 	/* Trap SVE */
 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
 		if (has_hvhe())
-			cptr_clear |= CPACR_ELx_ZEN;
+			cptr_clear |= CPACR_EL1_ZEN;
 		else
 			cptr_set |= CPTR_EL2_TZ;
 	}
@@ -126,7 +126,7 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
 	/* Trap SPE */
 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) {
 		mdcr_set |= MDCR_EL2_TPMS;
-		mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
+		mdcr_clear |= MDCR_EL2_E2PB_MASK;
 	}
 
 	/* Trap Trace Filter */
@@ -143,7 +143,7 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
 
 	/* Trap External Trace */
 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids))
-		mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
+		mdcr_clear |= MDCR_EL2_E2TB_MASK;
 
 	vcpu->arch.mdcr_el2 |= mdcr_set;
 	vcpu->arch.mdcr_el2 &= ~mdcr_clear;
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index cc69106734ca..0f6b01b3da5c 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -48,14 +48,14 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 	val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
 	if (cpus_have_final_cap(ARM64_SME)) {
 		if (has_hvhe())
-			val &= ~CPACR_ELx_SMEN;
+			val &= ~CPACR_EL1_SMEN;
 		else
 			val |= CPTR_EL2_TSM;
 	}
 
 	if (!guest_owns_fp_regs()) {
 		if (has_hvhe())
-			val &= ~(CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+			val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
 		else
 			val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
 
@@ -192,7 +192,7 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
 
 		/* Re-enable SVE traps if not supported for the guest vcpu. */
 		if (!vcpu_has_sve(vcpu))
-			cpacr_clear_set(CPACR_ELx_ZEN, 0);
+			cpacr_clear_set(CPACR_EL1_ZEN, 0);
 
 	} else {
 		__fpsimd_save_state(*host_data_ptr(fpsimd_state));
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 40bd55966540..d2b6fa051d6b 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -35,14 +35,6 @@ static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
 	return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_CMO);
 }
 
-static bool kvm_phys_is_valid(u64 phys)
-{
-	u64 parange_max = kvm_get_parange_max();
-	u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max);
-
-	return phys < BIT(shift);
-}
-
 static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys)
 {
 	u64 granule = kvm_granule_size(ctx->level);
@@ -53,7 +45,7 @@ static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx,
 	if (granule > (ctx->end - ctx->addr))
 		return false;
 
-	if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
+	if (!IS_ALIGNED(phys, granule))
 		return false;
 
 	return IS_ALIGNED(ctx->addr, granule);
@@ -587,6 +579,9 @@ struct stage2_map_data {
 
 	/* Force mappings to page granularity */
 	bool				force_pte;
+
+	/* Walk should update owner_id only */
+	bool				annotation;
 };
 
 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
@@ -885,18 +880,7 @@ static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
 {
 	u64 phys = data->phys;
 
-	/*
-	 * Stage-2 walks to update ownership data are communicated to the map
-	 * walker using an invalid PA. Avoid offsetting an already invalid PA,
-	 * which could overflow and make the address valid again.
-	 */
-	if (!kvm_phys_is_valid(phys))
-		return phys;
-
-	/*
-	 * Otherwise, work out the correct PA based on how far the walk has
-	 * gotten.
-	 */
+	/* Work out the correct PA based on how far the walk has gotten */
 	return phys + (ctx->addr - ctx->start);
 }
 
@@ -908,6 +892,9 @@ static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
 	if (data->force_pte && ctx->level < KVM_PGTABLE_LAST_LEVEL)
 		return false;
 
+	if (data->annotation)
+		return true;
+
 	return kvm_block_mapping_supported(ctx, phys);
 }
 
@@ -923,7 +910,7 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 	if (!stage2_leaf_mapping_allowed(ctx, data))
 		return -E2BIG;
 
-	if (kvm_phys_is_valid(phys))
+	if (!data->annotation)
 		new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
 	else
 		new = kvm_init_invalid_leaf_owner(data->owner_id);
@@ -1085,11 +1072,11 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
 {
 	int ret;
 	struct stage2_map_data map_data = {
-		.phys		= KVM_PHYS_INVALID,
 		.mmu		= pgt->mmu,
 		.memcache	= mc,
 		.owner_id	= owner_id,
 		.force_pte	= true,
+		.annotation	= true,
 	};
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_map_walker,
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 80581b1c3995..59d992455793 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -77,12 +77,12 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
 	 * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
 	 * shift value for trapping the AMU accesses.
 	 */
-	u64 val = CPACR_ELx_TTA | CPTR_EL2_TAM;
+	u64 val = CPACR_EL1_TTA | CPTR_EL2_TAM;
 
 	if (guest_owns_fp_regs()) {
-		val |= CPACR_ELx_FPEN;
+		val |= CPACR_EL1_FPEN;
 		if (vcpu_has_sve(vcpu))
-			val |= CPACR_ELx_ZEN;
+			val |= CPACR_EL1_ZEN;
 	} else {
 		__activate_traps_fpsimd32(vcpu);
 	}
@@ -122,13 +122,13 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
 	 * hypervisor has traps enabled to dispel any illusion of something more
 	 * complicated taking place.
 	 */
-	if (!(SYS_FIELD_GET(CPACR_ELx, FPEN, cptr) & BIT(0)))
-		val &= ~CPACR_ELx_FPEN;
-	if (!(SYS_FIELD_GET(CPACR_ELx, ZEN, cptr) & BIT(0)))
-		val &= ~CPACR_ELx_ZEN;
+	if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0)))
+		val &= ~CPACR_EL1_FPEN;
+	if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0)))
+		val &= ~CPACR_EL1_ZEN;
 
 	if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
-		val |= cptr & CPACR_ELx_E0POE;
+		val |= cptr & CPACR_EL1_E0POE;
 
 	val |= cptr & CPTR_EL2_TCPAC;
 
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 456102bc0b55..6c5950b9ceac 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -24,6 +24,7 @@ static DEFINE_MUTEX(arm_pmus_lock);
 
 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc);
 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc);
+static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc);
 
 static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc)
 {
@@ -327,48 +328,25 @@ u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu)
 		return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
 }
 
-/**
- * kvm_pmu_enable_counter_mask - enable selected PMU counters
- * @vcpu: The vcpu pointer
- * @val: the value guest writes to PMCNTENSET register
- *
- * Call perf_event_enable to start counting the perf event
- */
-void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
+static void kvm_pmc_enable_perf_event(struct kvm_pmc *pmc)
 {
-	int i;
-	if (!kvm_vcpu_has_pmu(vcpu))
-		return;
-
-	if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) || !val)
+	if (!pmc->perf_event) {
+		kvm_pmu_create_perf_event(pmc);
 		return;
+	}
 
-	for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) {
-		struct kvm_pmc *pmc;
-
-		if (!(val & BIT(i)))
-			continue;
-
-		pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
+	perf_event_enable(pmc->perf_event);
+	if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
+		kvm_debug("fail to enable perf event\n");
+}
 
-		if (!pmc->perf_event) {
-			kvm_pmu_create_perf_event(pmc);
-		} else {
-			perf_event_enable(pmc->perf_event);
-			if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
-				kvm_debug("fail to enable perf event\n");
-		}
-	}
+static void kvm_pmc_disable_perf_event(struct kvm_pmc *pmc)
+{
+	if (pmc->perf_event)
+		perf_event_disable(pmc->perf_event);
 }
 
-/**
- * kvm_pmu_disable_counter_mask - disable selected PMU counters
- * @vcpu: The vcpu pointer
- * @val: the value guest writes to PMCNTENCLR register
- *
- * Call perf_event_disable to stop counting the perf event
- */
-void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
+void kvm_pmu_reprogram_counter_mask(struct kvm_vcpu *vcpu, u64 val)
 {
 	int i;
 
@@ -376,16 +354,18 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
 		return;
 
 	for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) {
-		struct kvm_pmc *pmc;
+		struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
 
 		if (!(val & BIT(i)))
 			continue;
 
-		pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
-
-		if (pmc->perf_event)
-			perf_event_disable(pmc->perf_event);
+		if (kvm_pmu_counter_is_enabled(pmc))
+			kvm_pmc_enable_perf_event(pmc);
+		else
+			kvm_pmc_disable_perf_event(pmc);
 	}
+
+	kvm_vcpu_pmu_restore_guest(vcpu);
 }
 
 /*
@@ -626,27 +606,28 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
 	if (!kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5))
 		val &= ~ARMV8_PMU_PMCR_LP;
 
+	/* Request a reload of the PMU to enable/disable affected counters */
+	if ((__vcpu_sys_reg(vcpu, PMCR_EL0) ^ val) & ARMV8_PMU_PMCR_E)
+		kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
+
 	/* The reset bits don't indicate any state, and shouldn't be saved. */
 	__vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
 
-	if (val & ARMV8_PMU_PMCR_E) {
-		kvm_pmu_enable_counter_mask(vcpu,
-		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
-	} else {
-		kvm_pmu_disable_counter_mask(vcpu,
-		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
-	}
-
 	if (val & ARMV8_PMU_PMCR_C)
 		kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
 
 	if (val & ARMV8_PMU_PMCR_P) {
-		unsigned long mask = kvm_pmu_accessible_counter_mask(vcpu);
-		mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
+		/*
+		 * Unlike other PMU sysregs, the controls in PMCR_EL0 always apply
+		 * to the 'guest' range of counters and never the 'hyp' range.
+		 */
+		unsigned long mask = kvm_pmu_implemented_counter_mask(vcpu) &
+				     ~kvm_pmu_hyp_counter_mask(vcpu) &
+				     ~BIT(ARMV8_PMU_CYCLE_IDX);
+
 		for_each_set_bit(i, &mask, 32)
 			kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
 	}
-	kvm_vcpu_pmu_restore_guest(vcpu);
 }
 
 static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc)
@@ -910,11 +891,11 @@ void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu)
 {
 	u64 mask = kvm_pmu_implemented_counter_mask(vcpu);
 
-	kvm_pmu_handle_pmcr(vcpu, kvm_vcpu_read_pmcr(vcpu));
-
 	__vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask;
 	__vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask;
 	__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask;
+
+	kvm_pmu_reprogram_counter_mask(vcpu, mask);
 }
 
 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 83c6b4a07ef5..e4749ecbcd79 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1208,16 +1208,14 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	mask = kvm_pmu_accessible_counter_mask(vcpu);
 	if (p->is_write) {
 		val = p->regval & mask;
-		if (r->Op2 & 0x1) {
+		if (r->Op2 & 0x1)
 			/* accessing PMCNTENSET_EL0 */
 			__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
-			kvm_pmu_enable_counter_mask(vcpu, val);
-			kvm_vcpu_pmu_restore_guest(vcpu);
-		} else {
+		else
 			/* accessing PMCNTENCLR_EL0 */
 			__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
-			kvm_pmu_disable_counter_mask(vcpu, val);
-		}
+
+		kvm_pmu_reprogram_counter_mask(vcpu, val);
 	} else {
 		p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
 	}
@@ -1604,6 +1602,9 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
 		if (!cpus_have_final_cap(ARM64_HAS_WFXT))
 			val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
 		break;
+	case SYS_ID_AA64ISAR3_EL1:
+		val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_FAMINMAX;
+		break;
 	case SYS_ID_AA64MMFR2_EL1:
 		val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
 		break;
@@ -2450,6 +2451,26 @@ static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu,
 	return __el2_visibility(vcpu, rd, s1pie_visibility);
 }
 
+static bool access_mdcr(struct kvm_vcpu *vcpu,
+			struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	u64 old = __vcpu_sys_reg(vcpu, MDCR_EL2);
+
+	if (!access_rw(vcpu, p, r))
+		return false;
+
+	/*
+	 * Request a reload of the PMU to enable/disable the counters affected
+	 * by HPME.
+	 */
+	if ((old ^ __vcpu_sys_reg(vcpu, MDCR_EL2)) & MDCR_EL2_HPME)
+		kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
+
+	return true;
+}
+
+
 /*
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -2608,7 +2629,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	ID_WRITABLE(ID_AA64ISAR2_EL1, ~(ID_AA64ISAR2_EL1_RES0 |
 					ID_AA64ISAR2_EL1_APA3 |
 					ID_AA64ISAR2_EL1_GPA3)),
-	ID_UNALLOCATED(6,3),
+	ID_WRITABLE(ID_AA64ISAR3_EL1, (ID_AA64ISAR3_EL1_FPRCVT |
+				       ID_AA64ISAR3_EL1_FAMINMAX)),
 	ID_UNALLOCATED(6,4),
 	ID_UNALLOCATED(6,5),
 	ID_UNALLOCATED(6,6),
@@ -2618,7 +2640,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	ID_WRITABLE(ID_AA64MMFR0_EL1, ~(ID_AA64MMFR0_EL1_RES0 |
 					ID_AA64MMFR0_EL1_TGRAN4_2 |
 					ID_AA64MMFR0_EL1_TGRAN64_2 |
-					ID_AA64MMFR0_EL1_TGRAN16_2)),
+					ID_AA64MMFR0_EL1_TGRAN16_2 |
+					ID_AA64MMFR0_EL1_ASIDBITS)),
 	ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 |
 					ID_AA64MMFR1_EL1_HCX |
 					ID_AA64MMFR1_EL1_TWED |
@@ -2982,7 +3005,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
 	EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
 	EL2_REG_VNCR(HCR_EL2, reset_hcr, 0),
-	EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
+	EL2_REG(MDCR_EL2, access_mdcr, reset_val, 0),
 	EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
 	EL2_REG_VNCR(HSTR_EL2, reset_val, 0),
 	EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0),
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index f4c4494645c3..fb96802799c6 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -608,12 +608,22 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
 	lockdep_assert_held(&its->its_lock);
 	vgic_get_irq_kref(irq);
 
+	old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT);
+
+	/*
+	 * Put the reference taken on @irq if the store fails. Intentionally do
+	 * not return the error as the translation cache is best effort.
+	 */
+	if (xa_is_err(old)) {
+		vgic_put_irq(kvm, irq);
+		return;
+	}
+
 	/*
 	 * We could have raced with another CPU caching the same
 	 * translation behind our back, ensure we don't leak a
 	 * reference if that is the case.
 	 */
-	old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT);
 	if (old)
 		vgic_put_irq(kvm, old);
 }
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 188197590fc9..b2ac06246327 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -32,9 +32,9 @@ static unsigned long nr_pinned_asids;
 static unsigned long *pinned_asid_map;
 
 #define ASID_MASK		(~GENMASK(asid_bits - 1, 0))
-#define ASID_FIRST_VERSION	(1UL << asid_bits)
+#define ASID_FIRST_VERSION	(1UL << 16)
 
-#define NUM_USER_ASIDS		ASID_FIRST_VERSION
+#define NUM_USER_ASIDS		(1UL << asid_bits)
 #define ctxid2asid(asid)	((asid) & ~ASID_MASK)
 #define asid2ctxid(asid, genid)	((asid) | (genid))
 
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 87b3f1a25535..a86c897017df 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -30,11 +30,13 @@ void copy_highpage(struct page *to, struct page *from)
 	if (!system_supports_mte())
 		return;
 
-	if (folio_test_hugetlb(src) &&
-	    folio_test_hugetlb_mte_tagged(src)) {
-		if (!folio_try_hugetlb_mte_tagging(dst))
+	if (folio_test_hugetlb(src)) {
+		if (!folio_test_hugetlb_mte_tagged(src) ||
+		    from != folio_page(src, 0))
 			return;
 
+		WARN_ON_ONCE(!folio_try_hugetlb_mte_tagging(dst));
+
 		/*
 		 * Populate tags for all subpages.
 		 *
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 3215adf48a1b..98a2a0e64e25 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -519,6 +519,18 @@ pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
 
 static int __init hugetlbpage_init(void)
 {
+	/*
+	 * HugeTLB pages are supported on maximum four page table
+	 * levels (PUD, CONT PMD, PMD, CONT PTE) for a given base
+	 * page size, corresponding to hugetlb_add_hstate() calls
+	 * here.
+	 *
+	 * HUGE_MAX_HSTATE should at least match maximum supported
+	 * HugeTLB page sizes on the platform. Any new addition to
+	 * supported HugeTLB page sizes will also require changing
+	 * HUGE_MAX_HSTATE as well.
+	 */
+	BUILD_BUG_ON(HUGE_MAX_HSTATE < 4);
 	if (pud_sect_supported())
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d21f67d67cf5..9c0b8d9558fc 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -117,15 +117,6 @@ static void __init arch_reserve_crashkernel(void)
 
 static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit)
 {
-	/**
-	 * Information we get from firmware (e.g. DT dma-ranges) describe DMA
-	 * bus constraints. Devices using DMA might have their own limitations.
-	 * Some of them rely on DMA zone in low 32-bit memory. Keep low RAM
-	 * DMA zone on platforms that have RAM there.
-	 */
-	if (memblock_start_of_DRAM() < U32_MAX)
-		zone_limit = min(zone_limit, U32_MAX);
-
 	return min(zone_limit, memblock_end_of_DRAM() - 1) + 1;
 }
 
@@ -141,6 +132,14 @@ static void __init zone_sizes_init(void)
 	acpi_zone_dma_limit = acpi_iort_dma_get_max_cpu_address();
 	dt_zone_dma_limit = of_dma_get_max_cpu_address(NULL);
 	zone_dma_limit = min(dt_zone_dma_limit, acpi_zone_dma_limit);
+	/*
+	 * Information we get from firmware (e.g. DT dma-ranges) describe DMA
+	 * bus constraints. Devices using DMA might have their own limitations.
+	 * Some of them rely on DMA zone in low 32-bit memory. Keep low RAM
+	 * DMA zone on platforms that have RAM there.
+	 */
+	if (memblock_start_of_DRAM() < U32_MAX)
+		zone_dma_limit = min(zone_dma_limit, U32_MAX);
 	arm64_dma_phys_limit = max_zone_phys(zone_dma_limit);
 	max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
 #endif
@@ -280,7 +279,12 @@ void __init arm64_memblock_init(void)
 
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
 		extern u16 memstart_offset_seed;
-		u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+
+		/*
+		 * Use the sanitised version of id_aa64mmfr0_el1 so that linear
+		 * map randomization can be enabled by shrinking the IPA space.
+		 */
+		u64 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 		int parange = cpuid_feature_extract_unsigned_field(
 					mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT);
 		s64 range = linear_region_size -
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e2739b69e11b..b4df5bc5b1b8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1169,7 +1169,8 @@ int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
 				unsigned long addr, unsigned long next)
 {
 	vmemmap_verify((pte_t *)pmdp, node, addr, next);
-	return 1;
+
+	return pmd_sect(READ_ONCE(*pmdp));
 }
 
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index b8edc5765441..fb30c8804f87 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -501,7 +501,7 @@ alternative_else_nop_endif
 #ifdef CONFIG_ARM64_HAFT
 	cmp	x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT
 	b.lt	1f
-	orr	tcr2, tcr2, TCR2_EL1x_HAFT
+	orr	tcr2, tcr2, TCR2_EL1_HAFT
 #endif /* CONFIG_ARM64_HAFT */
 1:
 #endif	/* CONFIG_ARM64_HW_AFDBM */
@@ -532,7 +532,8 @@ alternative_else_nop_endif
 #undef PTE_MAYBE_NG
 #undef PTE_MAYBE_SHARED
 
-	orr	tcr2, tcr2, TCR2_EL1x_PIE
+	orr	tcr2, tcr2, TCR2_EL1_PIE
+	msr	REG_TCR2_EL1, x0
 
 .Lskip_indirection:
 
diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c
index 0f7b484cb2ff..19c67ed1a21f 100644
--- a/arch/arm64/mm/trans_pgd.c
+++ b/arch/arm64/mm/trans_pgd.c
@@ -57,7 +57,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
 		 */
 		BUG_ON(!pfn_valid(pte_pfn(pte)));
 
-		__set_pte(dst_ptep, pte_mkpresent(pte_mkwrite_novma(pte)));
+		__set_pte(dst_ptep, pte_mkvalid(pte_mkwrite_novma(pte)));
 	}
 }
 
diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk
index d1254a056114..1a2afc9fdd42 100755
--- a/arch/arm64/tools/gen-sysreg.awk
+++ b/arch/arm64/tools/gen-sysreg.awk
@@ -206,7 +206,7 @@ END {
 
 # Currently this is effectivey a comment, in future we may want to emit
 # defines for the fields.
-/^Fields/ && block_current() == "Sysreg" {
+(/^Fields/ || /^Mapping/) && block_current() == "Sysreg" {
 	expect_fields(2)
 
 	if (next_bit != 63)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index b081b54d6d22..94ca9cdb0b16 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -24,8 +24,16 @@
 #	...
 # EndEnum
 
-# Alternatively if multiple registers share the same layout then
-# a SysregFields block can be used to describe the shared layout
+# For VHE aliases (*_EL12, *_EL02) of system registers, a Mapping
+# entry describes the register the alias actually accesses:
+
+# Sysreg 	<name_EL12>	<op0> 	<op1>	<crn>	<crm>	<op2>
+# Mapping	<name_EL1>
+# EndSysreg
+
+# Where multiple system regsiters are not VHE aliases but share a
+# common layout, a SysregFields block can be used to describe the
+# shared layout:
 
 # SysregFields	<fieldsname>
 # <field>
@@ -1010,7 +1018,12 @@ UnsignedEnum	35:32	FPMR
 	0b0000	NI
 	0b0001	IMP
 EndEnum
-Res0	31:12
+Res0	31:20
+UnsignedEnum	19:16	UINJ
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Res0	15:12
 UnsignedEnum	11:8	MTEFAR
 	0b0000	NI
 	0b0001	IMP
@@ -1035,7 +1048,10 @@ UnsignedEnum	55:52	F32MM
 	0b0000	NI
 	0b0001	IMP
 EndEnum
-Res0	51:48
+UnsignedEnum	51:48	F16MM
+	0b0000	NI
+	0b0001	IMP
+EndEnum
 UnsignedEnum	47:44	I8MM
 	0b0000	NI
 	0b0001	IMP
@@ -1053,6 +1069,7 @@ Res0	31:28
 UnsignedEnum	27:24	B16B16
 	0b0000	NI
 	0b0001	IMP
+	0b0010	BFSCALE
 EndEnum
 UnsignedEnum	23:20	BF16
 	0b0000	NI
@@ -1063,16 +1080,22 @@ UnsignedEnum	19:16	BitPerm
 	0b0000	NI
 	0b0001	IMP
 EndEnum
-Res0	15:8
+UnsignedEnum	15:12	EltPerm
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Res0	11:8
 UnsignedEnum	7:4	AES
 	0b0000	NI
 	0b0001	IMP
 	0b0010	PMULL128
+	0b0011	AES2
 EndEnum
 UnsignedEnum	3:0	SVEver
 	0b0000	IMP
 	0b0001	SVE2
 	0b0010	SVE2p1
+	0b0011	SVE2p2
 EndEnum
 EndSysreg
 
@@ -1090,7 +1113,7 @@ UnsignedEnum	59:56	SMEver
 	0b0000	SME
 	0b0001	SME2
 	0b0010	SME2p1
-	0b0000	IMP
+	0b0011	SME2p2
 EndEnum
 UnsignedEnum	55:52	I16I64
 	0b0000	NI
@@ -1154,7 +1177,29 @@ UnsignedEnum	28	SF8DP2
 	0b0	NI
 	0b1	IMP
 EndEnum
-Res0	27:0
+Res0	27:26
+UnsignedEnum	25	SBitPerm
+	0b0	NI
+	0b1	IMP
+EndEnum
+UnsignedEnum	24	AES
+	0b0	NI
+	0b1	IMP
+EndEnum
+UnsignedEnum	23	SFEXPA
+	0b0	NI
+	0b1	IMP
+EndEnum
+Res0	22:17
+UnsignedEnum	16	STMOP
+	0b0	NI
+	0b1	IMP
+EndEnum
+Res0	15:1
+UnsignedEnum	0	SMOP4
+	0b0	NI
+	0b1	IMP
+EndEnum
 EndSysreg
 
 Sysreg	ID_AA64FPFR0_EL1	3	0	0	4	7
@@ -1175,7 +1220,15 @@ UnsignedEnum	28	F8DP2
 	0b0	NI
 	0b1	IMP
 EndEnum
-Res0	27:2
+UnsignedEnum	27	F8MM8
+	0b0	NI
+	0b1	IMP
+EndEnum
+UnsignedEnum	26	F8MM4
+	0b0	NI
+	0b1	IMP
+EndEnum
+Res0	25:2
 UnsignedEnum	1	F8E4M3
 	0b0	NI
 	0b1	IMP
@@ -1503,12 +1556,16 @@ EndEnum
 UnsignedEnum	55:52	CSSC
 	0b0000	NI
 	0b0001	IMP
+	0b0010	CMPBR
 EndEnum
 UnsignedEnum	51:48	RPRFM
 	0b0000	NI
 	0b0001	IMP
 EndEnum
-Res0	47:44
+UnsignedEnum	47:44	PCDPHINT
+	0b0000	NI
+	0b0001	IMP
+EndEnum
 UnsignedEnum	43:40	PRFMSLC
 	0b0000	NI
 	0b0001	IMP
@@ -1561,7 +1618,23 @@ EndEnum
 EndSysreg
 
 Sysreg	ID_AA64ISAR3_EL1	3	0	0	6	3
-Res0	63:16
+Res0	63:32
+UnsignedEnum	31:28	FPRCVT
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+UnsignedEnum	27:24	LSUI
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+UnsignedEnum	23:20	OCCMO
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+UnsignedEnum	19:16	LSFE
+	0b0000	NI
+	0b0001	IMP
+EndEnum
 UnsignedEnum	15:12	PACM
 	0b0000	NI
 	0b0001	TRIVIAL_IMP
@@ -1978,7 +2051,7 @@ Field	1	A
 Field	0	M
 EndSysreg
 
-SysregFields	CPACR_ELx
+Sysreg	CPACR_EL1	3	0	1	0	2
 Res0	63:30
 Field	29	E0POE
 Field	28	TTA
@@ -1989,10 +2062,6 @@ Field	21:20	FPEN
 Res0	19:18
 Field	17:16	ZEN
 Res0	15:0
-EndSysregFields
-
-Sysreg	CPACR_EL1	3	0	1	0	2
-Fields	CPACR_ELx
 EndSysreg
 
 Sysreg	SMPRI_EL1	3	0	1	2	4
@@ -2947,23 +3016,23 @@ Field	63:0	PhysicalOffset
 EndSysreg
 
 Sysreg	CPACR_EL12	3	5	1	0	2
-Fields	CPACR_ELx
+Mapping	CPACR_EL1
 EndSysreg
 
 Sysreg	ZCR_EL12	3	5	1	2	0
-Fields	ZCR_ELx
+Mapping	ZCR_EL1
 EndSysreg
 
 Sysreg	SMCR_EL12	3	5	1	2	6
-Fields	SMCR_ELx
+Mapping	SMCR_EL1
 EndSysreg
 
 Sysreg	GCSCR_EL12	3	5	2	5	0
-Fields	GCSCR_ELx
+Mapping	GCSCR_EL1
 EndSysreg
 
 Sysreg	GCSPR_EL12	3	5	2	5	1
-Fields	GCSPR_ELx
+Mapping	GCSPR_EL1
 EndSysreg
 
 Sysreg	FAR_EL12	3	5	6	0	0
@@ -2975,7 +3044,7 @@ Fields	MPAM1_ELx
 EndSysreg
 
 Sysreg	CONTEXTIDR_EL12	3	5	13	0	1
-Fields	CONTEXTIDR_ELx
+Mapping	CONTEXTIDR_EL1
 EndSysreg
 
 SysregFields TTBRx_EL1
@@ -2992,7 +3061,7 @@ Sysreg	TTBR1_EL1	3	0	2	0	1
 Fields	TTBRx_EL1
 EndSysreg
 
-SysregFields	TCR2_EL1x
+Sysreg	TCR2_EL1	3	0	2	0	3
 Res0	63:16
 Field	15	DisCH1
 Field	14	DisCH0
@@ -3006,14 +3075,10 @@ Field	3	POE
 Field	2	E0POE
 Field	1	PIE
 Field	0	PnCH
-EndSysregFields
-
-Sysreg	TCR2_EL1	3	0	2	0	3
-Fields	TCR2_EL1x
 EndSysreg
 
 Sysreg	TCR2_EL12	3	5	2	0	3
-Fields	TCR2_EL1x
+Mapping	TCR2_EL1
 EndSysreg
 
 Sysreg	TCR2_EL2	3	4	2	0	3
@@ -3084,7 +3149,7 @@ Fields	PIRx_ELx
 EndSysreg
 
 Sysreg	PIRE0_EL12	3	5	10	2	2
-Fields	PIRx_ELx
+Mapping	PIRE0_EL1
 EndSysreg
 
 Sysreg	PIRE0_EL2	3	4	10	2	2
@@ -3096,7 +3161,7 @@ Fields	PIRx_ELx
 EndSysreg
 
 Sysreg	PIR_EL12	3	5	10	2	3
-Fields	PIRx_ELx
+Mapping	PIR_EL1
 EndSysreg
 
 Sysreg	PIR_EL2		3	4	10	2	3
@@ -3116,7 +3181,7 @@ Fields	PIRx_ELx
 EndSysreg
 
 Sysreg	POR_EL12	3	5	10	2	4
-Fields	PIRx_ELx
+Mapping	POR_EL1
 EndSysreg
 
 Sysreg	S2POR_EL1	3	0	10	2	5
diff --git a/arch/hexagon/Makefile b/arch/hexagon/Makefile
index 92d005958dfb..ff172cbe5881 100644
--- a/arch/hexagon/Makefile
+++ b/arch/hexagon/Makefile
@@ -32,3 +32,9 @@ KBUILD_LDFLAGS += $(ldflags-y)
 TIR_NAME := r19
 KBUILD_CFLAGS += -ffixed-$(TIR_NAME) -DTHREADINFO_REG=$(TIR_NAME) -D__linux__
 KBUILD_AFLAGS += -DTHREADINFO_REG=$(TIR_NAME)
+
+# Disable HexagonConstExtenders pass for LLVM versions prior to 19.1.0
+# https://github.com/llvm/llvm-project/issues/99714
+ifneq ($(call clang-min-version, 190100),y)
+KBUILD_CFLAGS += -mllvm -hexagon-cext=false
+endif
diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h
index b837c65a4894..c8e4057734d0 100644
--- a/arch/loongarch/include/asm/hugetlb.h
+++ b/arch/loongarch/include/asm/hugetlb.h
@@ -24,6 +24,16 @@ static inline int prepare_hugepage_range(struct file *file,
 	return 0;
 }
 
+#define __HAVE_ARCH_HUGE_PTE_CLEAR
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+				  pte_t *ptep, unsigned long sz)
+{
+	pte_t clear;
+
+	pte_val(clear) = (unsigned long)invalid_pte_table;
+	set_pte_at(mm, addr, ptep, clear);
+}
+
 #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 					    unsigned long addr, pte_t *ptep)
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index 944482063f14..3089785ca97e 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -683,7 +683,17 @@ DEF_EMIT_REG2I16_FORMAT(blt, blt_op)
 DEF_EMIT_REG2I16_FORMAT(bge, bge_op)
 DEF_EMIT_REG2I16_FORMAT(bltu, bltu_op)
 DEF_EMIT_REG2I16_FORMAT(bgeu, bgeu_op)
-DEF_EMIT_REG2I16_FORMAT(jirl, jirl_op)
+
+static inline void emit_jirl(union loongarch_instruction *insn,
+			     enum loongarch_gpr rd,
+			     enum loongarch_gpr rj,
+			     int offset)
+{
+	insn->reg2i16_format.opcode = jirl_op;
+	insn->reg2i16_format.immediate = offset;
+	insn->reg2i16_format.rd = rd;
+	insn->reg2i16_format.rj = rj;
+}
 
 #define DEF_EMIT_REG2BSTRD_FORMAT(NAME, OP)				\
 static inline void emit_##NAME(union loongarch_instruction *insn,	\
diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c
index 2bf86aeda874..de21e72759ee 100644
--- a/arch/loongarch/kernel/efi.c
+++ b/arch/loongarch/kernel/efi.c
@@ -95,7 +95,7 @@ static void __init init_screen_info(void)
 	memset(si, 0, sizeof(*si));
 	early_memunmap(si, sizeof(*si));
 
-	memblock_reserve(screen_info.lfb_base, screen_info.lfb_size);
+	memblock_reserve(__screen_info_lfb_base(&screen_info), screen_info.lfb_size);
 }
 
 void __init efi_init(void)
diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c
index 3050329556d1..14d7d700bcb9 100644
--- a/arch/loongarch/kernel/inst.c
+++ b/arch/loongarch/kernel/inst.c
@@ -332,7 +332,7 @@ u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm)
 		return INSN_BREAK;
 	}
 
-	emit_jirl(&insn, rj, rd, imm >> 2);
+	emit_jirl(&insn, rd, rj, imm >> 2);
 
 	return insn.word;
 }
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 5d59e9ce2772..fbf747447f13 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -82,7 +82,7 @@ void show_ipi_list(struct seq_file *p, int prec)
 	for (i = 0; i < NR_IPI; i++) {
 		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : "");
 		for_each_online_cpu(cpu)
-			seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).ipi_irqs[i]);
+			seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, cpu).ipi_irqs[i], 10);
 		seq_printf(p, " LoongArch  %d  %s\n", i + 1, ipi_types[i]);
 	}
 }
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index 69f3e3782cc9..a7893bd01e73 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -156,7 +156,7 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst)
 
 int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
-	int ret;
+	int idx, ret;
 	unsigned long *val;
 	u32 addr, rd, rj, opcode;
 
@@ -167,7 +167,6 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu)
 	rj = inst.reg2_format.rj;
 	opcode = inst.reg2_format.opcode;
 	addr = vcpu->arch.gprs[rj];
-	ret = EMULATE_DO_IOCSR;
 	run->iocsr_io.phys_addr = addr;
 	run->iocsr_io.is_write = 0;
 	val = &vcpu->arch.gprs[rd];
@@ -207,20 +206,28 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 
 	if (run->iocsr_io.is_write) {
-		if (!kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val))
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		if (ret == 0)
 			ret = EMULATE_DONE;
-		else
+		else {
+			ret = EMULATE_DO_IOCSR;
 			/* Save data and let user space to write it */
 			memcpy(run->iocsr_io.data, val, run->iocsr_io.len);
-
+		}
 		trace_kvm_iocsr(KVM_TRACE_IOCSR_WRITE, run->iocsr_io.len, addr, val);
 	} else {
-		if (!kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val))
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		if (ret == 0)
 			ret = EMULATE_DONE;
-		else
+		else {
+			ret = EMULATE_DO_IOCSR;
 			/* Save register id for iocsr read completion */
 			vcpu->arch.io_gpr = rd;
-
+		}
 		trace_kvm_iocsr(KVM_TRACE_IOCSR_READ, run->iocsr_io.len, addr, NULL);
 	}
 
@@ -359,7 +366,7 @@ static int kvm_handle_gspr(struct kvm_vcpu *vcpu)
 
 int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst)
 {
-	int ret;
+	int idx, ret;
 	unsigned int op8, opcode, rd;
 	struct kvm_run *run = vcpu->run;
 
@@ -464,8 +471,10 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst)
 		 * it need not return to user space to handle the mmio
 		 * exception.
 		 */
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, vcpu->arch.badv,
 				      run->mmio.len, &vcpu->arch.gprs[rd]);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		if (!ret) {
 			update_pc(&vcpu->arch);
 			vcpu->mmio_needed = 0;
@@ -531,7 +540,7 @@ int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst)
 {
-	int ret;
+	int idx, ret;
 	unsigned int rd, op8, opcode;
 	unsigned long curr_pc, rd_val = 0;
 	struct kvm_run *run = vcpu->run;
@@ -631,7 +640,9 @@ int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst)
 		 * it need not return to user space to handle the mmio
 		 * exception.
 		 */
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, run->mmio.len, data);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		if (!ret)
 			return EMULATE_DONE;
 
diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c
index a233a323e295..93f4acd44523 100644
--- a/arch/loongarch/kvm/intc/ipi.c
+++ b/arch/loongarch/kvm/intc/ipi.c
@@ -98,7 +98,7 @@ static void write_mailbox(struct kvm_vcpu *vcpu, int offset, uint64_t data, int
 
 static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data)
 {
-	int i, ret;
+	int i, idx, ret;
 	uint32_t val = 0, mask = 0;
 
 	/*
@@ -107,7 +107,9 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data)
 	 */
 	if ((data >> 27) & 0xf) {
 		/* Read the old val */
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		if (unlikely(ret)) {
 			kvm_err("%s: : read date from addr %llx failed\n", __func__, addr);
 			return ret;
@@ -121,7 +123,9 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data)
 		val &= mask;
 	}
 	val |= ((uint32_t)(data >> 32) & ~mask);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	if (unlikely(ret))
 		kvm_err("%s: : write date to addr %llx failed\n", __func__, addr);
 
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index cab1818be68d..d18a4a270415 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -240,7 +240,7 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
  */
 static int kvm_enter_guest_check(struct kvm_vcpu *vcpu)
 {
-	int ret;
+	int idx, ret;
 
 	/*
 	 * Check conditions before entering the guest
@@ -249,7 +249,9 @@ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu)
 	if (ret < 0)
 		return ret;
 
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	ret = kvm_check_requests(vcpu);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
 	return ret;
 }
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index dd350cba1252..ea357a3edc09 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -181,13 +181,13 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
 		/* Set return value */
 		emit_insn(ctx, addiw, LOONGARCH_GPR_A0, regmap[BPF_REG_0], 0);
 		/* Return to the caller */
-		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0);
+		emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0);
 	} else {
 		/*
 		 * Call the next bpf prog and skip the first instruction
 		 * of TCC initialization.
 		 */
-		emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1);
+		emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 1);
 	}
 }
 
@@ -904,7 +904,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 			return ret;
 
 		move_addr(ctx, t1, func_addr);
-		emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0);
+		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0);
 		move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
 		break;
 
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 7c4f7bcc89d7..b2ed0308c0ea 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -23,7 +23,9 @@ config M68K
 	select GENERIC_LIB_ASHLDI3
 	select GENERIC_LIB_ASHRDI3
 	select GENERIC_LIB_LSHRDI3
+	select GENERIC_LIB_MULDI3
 	select HAS_IOPORT if PCI || ISA || ATARI_ROM_ISA
+	select HAVE_ARCH_LIBGCC_H
 	select HAVE_ARCH_SECCOMP
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ASM_MODVERSIONS
diff --git a/arch/m68k/atari/nvram.c b/arch/m68k/atari/nvram.c
index 7000d2443aa3..ededc31375c3 100644
--- a/arch/m68k/atari/nvram.c
+++ b/arch/m68k/atari/nvram.c
@@ -16,7 +16,9 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
+#include <linux/string_choices.h>
 #include <linux/types.h>
+
 #include <asm/atarihw.h>
 #include <asm/atariints.h>
 
@@ -198,7 +200,7 @@ static void atari_nvram_proc_read(unsigned char *nvram, struct seq_file *seq,
 		seq_printf(seq, "0x%02x (undefined)\n", nvram[1]);
 
 	seq_printf(seq, "SCSI arbitration : %s\n",
-		   (nvram[16] & 0x80) ? "on" : "off");
+		   str_on_off(nvram[16] & 0x80));
 	seq_puts(seq, "SCSI host ID     : ");
 	if (nvram[16] & 0x80)
 		seq_printf(seq, "%d\n", nvram[16] & 7);
@@ -236,7 +238,7 @@ static void atari_nvram_proc_read(unsigned char *nvram, struct seq_file *seq,
 		   vmode & 16 ? "VGA" : "TV", vmode & 32 ? "PAL" : "NTSC");
 	seq_printf(seq,
 		   "                   %soverscan, compat. mode %s%s\n",
-		   vmode & 64 ? "" : "no ", vmode & 128 ? "on" : "off",
+		   vmode & 64 ? "" : "no ", str_on_off(vmode & 128),
 		   vmode & 256 ?
 		   (vmode & 16 ? ", line doubling" : ", half screen") : "");
 }
diff --git a/arch/m68k/include/asm/libgcc.h b/arch/m68k/include/asm/libgcc.h
new file mode 100644
index 000000000000..27e17195bd7b
--- /dev/null
+++ b/arch/m68k/include/asm/libgcc.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_M68K_LIBGCC_H
+#define __ASM_M68K_LIBGCC_H
+
+#ifndef CONFIG_CPU_HAS_NO_MULDIV64
+/*
+ * For those 68K CPUs that support 64bit multiply define umul_ppm()
+ * for the common muldi3 libgcc helper function (in lib/muldi3.c).
+ * CPUs that don't have it (like the original 68000 and ColdFire)
+ * will fallback to using the C-coded version of umul_ppmm().
+ */
+#define umul_ppmm(w1, w0, u, v)				\
+	do {						\
+		unsigned long __u = (u), __v = (v);	\
+		unsigned long __w0, __w1;		\
+							\
+		__asm__ ("mulu%.l %3,%1:%0"		\
+			 : "=d" (__w0),			\
+			   "=d" (__w1)			\
+			 : "%0" (__u),			\
+			   "dmi" (__v));		\
+							\
+		(w0) = __w0; (w1) = __w1;		\
+	} while (0)
+#endif /* !CONFIG_CPU_HAS_NO_MULDIV64 */
+
+#endif /* __ASM_M68K_LIBGCC_H */
diff --git a/arch/m68k/include/asm/vga.h b/arch/m68k/include/asm/vga.h
index 4742e6bc3ab8..cdd414fa8710 100644
--- a/arch/m68k/include/asm/vga.h
+++ b/arch/m68k/include/asm/vga.h
@@ -9,7 +9,7 @@
  */
 #ifndef CONFIG_PCI
 
-#include <asm/raw_io.h>
+#include <asm/io.h>
 #include <asm/kmap.h>
 
 /*
@@ -29,9 +29,9 @@
 #define inw_p(port)		0
 #define outb_p(port, val)	do { } while (0)
 #define outw(port, val)		do { } while (0)
-#define readb			raw_inb
-#define writeb			raw_outb
-#define writew			raw_outw
+#define readb			__raw_readb
+#define writeb			__raw_writeb
+#define writew			__raw_writew
 
 #endif /* CONFIG_PCI */
 #endif /* _ASM_M68K_VGA_H */
diff --git a/arch/m68k/lib/Makefile b/arch/m68k/lib/Makefile
index 9158688e6cc6..15278a95259e 100644
--- a/arch/m68k/lib/Makefile
+++ b/arch/m68k/lib/Makefile
@@ -4,7 +4,7 @@
 # Makefile for m68k-specific library files..
 #
 
-lib-y	:= muldi3.o memcpy.o memset.o memmove.o
+lib-y	:= memcpy.o memset.o memmove.o
 
 lib-$(CONFIG_MMU) += uaccess.o
 lib-$(CONFIG_CPU_HAS_NO_MULDIV64) += mulsi3.o divsi3.o udivsi3.o
diff --git a/arch/m68k/lib/muldi3.c b/arch/m68k/lib/muldi3.c
deleted file mode 100644
index 5012a9b218c7..000000000000
--- a/arch/m68k/lib/muldi3.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/* muldi3.c extracted from gcc-2.7.2.3/libgcc2.c and 
-			   gcc-2.7.2.3/longlong.h which is: */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details. */
-
-#include <linux/compiler.h>
-#include <linux/export.h>
-#include <linux/libgcc.h>
-
-#ifdef CONFIG_CPU_HAS_NO_MULDIV64
-
-#define SI_TYPE_SIZE 32
-#define __BITS4 (SI_TYPE_SIZE / 4)
-#define __ll_B (1L << (SI_TYPE_SIZE / 2))
-#define __ll_lowpart(t) ((USItype) (t) % __ll_B)
-#define __ll_highpart(t) ((USItype) (t) / __ll_B)
-
-#define umul_ppmm(w1, w0, u, v)						\
-  do {									\
-    USItype __x0, __x1, __x2, __x3;					\
-    USItype __ul, __vl, __uh, __vh;					\
-									\
-    __ul = __ll_lowpart (u);						\
-    __uh = __ll_highpart (u);						\
-    __vl = __ll_lowpart (v);						\
-    __vh = __ll_highpart (v);						\
-									\
-    __x0 = (USItype) __ul * __vl;					\
-    __x1 = (USItype) __ul * __vh;					\
-    __x2 = (USItype) __uh * __vl;					\
-    __x3 = (USItype) __uh * __vh;					\
-									\
-    __x1 += __ll_highpart (__x0);/* this can't give carry */		\
-    __x1 += __x2;		/* but this indeed can */		\
-    if (__x1 < __x2)		/* did we get it? */			\
-      __x3 += __ll_B;		/* yes, add it in the proper pos. */	\
-									\
-    (w1) = __x3 + __ll_highpart (__x1);					\
-    (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);		\
-  } while (0)
-
-#else
-
-#define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("mulu%.l %3,%1:%0"						\
-           : "=d" ((USItype)(w0)),					\
-             "=d" ((USItype)(w1))					\
-           : "%0" ((USItype)(u)),					\
-             "dmi" ((USItype)(v)))
-
-#endif
-
-#define __umulsidi3(u, v) \
-  ({DIunion __w;							\
-    umul_ppmm (__w.s.high, __w.s.low, u, v);				\
-    __w.ll; })
-
-typedef 	 int SItype	__mode(SI);
-typedef unsigned int USItype	__mode(SI);
-typedef		 int DItype	__mode(DI);
-typedef int word_type           __mode(__word__);
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__muldi3 (DItype u, DItype v)
-{
-  DIunion w;
-  DIunion uu, vv;
-
-  uu.ll = u;
-  vv.ll = v;
-
-  w.ll = __umulsidi3 (uu.s.low, vv.s.low);
-  w.s.high += ((USItype) uu.s.low * (USItype) vv.s.high
-	       + (USItype) uu.s.high * (USItype) vv.s.low);
-
-  return w.ll;
-}
-EXPORT_SYMBOL(__muldi3);
diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index 45ddbaa6c123..dae856fb3e5b 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -749,7 +749,7 @@ static void bridge_remove(struct platform_device *pdev)
 
 static struct platform_driver bridge_driver = {
 	.probe = bridge_probe,
-	.remove_new = bridge_remove,
+	.remove = bridge_remove,
 	.driver = {
 		.name = "xtalk-bridge",
 	}
diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c
index 338849c430a5..7b1e8f9128e9 100644
--- a/arch/nios2/kernel/cpuinfo.c
+++ b/arch/nios2/kernel/cpuinfo.c
@@ -143,11 +143,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		   " DIV:\t\t%s\n"
 		   " BMX:\t\t%s\n"
 		   " CDX:\t\t%s\n",
-		   cpuinfo.has_mul ? "yes" : "no",
-		   cpuinfo.has_mulx ? "yes" : "no",
-		   cpuinfo.has_div ? "yes" : "no",
-		   cpuinfo.has_bmx ? "yes" : "no",
-		   cpuinfo.has_cdx ? "yes" : "no");
+		   str_yes_no(cpuinfo.has_mul),
+		   str_yes_no(cpuinfo.has_mulx),
+		   str_yes_no(cpuinfo.has_div),
+		   str_yes_no(cpuinfo.has_bmx),
+		   str_yes_no(cpuinfo.has_cdx));
 
 	seq_printf(m,
 		   "Icache:\t\t%ukB, line length: %u\n",
diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index 440711d7bf40..ce6f2b08a35e 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -239,6 +239,8 @@ handler:							;\
 
 /* =====================================================[ exceptions] === */
 
+	__REF
+
 /* ---[ 0x100: RESET exception ]----------------------------------------- */
 
 EXCEPTION_ENTRY(_tng_kernel_start)
diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S
index 439e00f81e5d..bd760066f1cd 100644
--- a/arch/openrisc/kernel/head.S
+++ b/arch/openrisc/kernel/head.S
@@ -26,15 +26,15 @@
 #include <asm/asm-offsets.h>
 #include <linux/of_fdt.h>
 
-#define tophys(rd,rs)				\
-	l.movhi	rd,hi(-KERNELBASE)		;\
+#define tophys(rd,rs)						\
+	l.movhi	rd,hi(-KERNELBASE)				;\
 	l.add	rd,rd,rs
 
-#define CLEAR_GPR(gpr)				\
+#define CLEAR_GPR(gpr)						\
 	l.movhi	gpr,0x0
 
-#define LOAD_SYMBOL_2_GPR(gpr,symbol)		\
-	l.movhi gpr,hi(symbol)			;\
+#define LOAD_SYMBOL_2_GPR(gpr,symbol)				\
+	l.movhi gpr,hi(symbol)					;\
 	l.ori   gpr,gpr,lo(symbol)
 
 
@@ -326,21 +326,21 @@
 	l.addi  r1,r1,-(INT_FRAME_SIZE)				;\
 	/* r1 is KSP, r30 is __pa(KSP) */			;\
 	tophys  (r30,r1)					;\
-	l.sw    PT_GPR12(r30),r12					;\
+	l.sw    PT_GPR12(r30),r12				;\
 	l.mfspr r12,r0,SPR_EPCR_BASE				;\
 	l.sw    PT_PC(r30),r12					;\
 	l.mfspr r12,r0,SPR_ESR_BASE				;\
 	l.sw    PT_SR(r30),r12					;\
 	/* save r31 */						;\
 	EXCEPTION_T_LOAD_GPR30(r12)				;\
-	l.sw	PT_GPR30(r30),r12					;\
+	l.sw	PT_GPR30(r30),r12				;\
 	/* save r10 as was prior to exception */		;\
 	EXCEPTION_T_LOAD_GPR10(r12)				;\
-	l.sw	PT_GPR10(r30),r12					;\
-	/* save PT_SP as was prior to exception */			;\
+	l.sw	PT_GPR10(r30),r12				;\
+	/* save PT_SP as was prior to exception */		;\
 	EXCEPTION_T_LOAD_SP(r12)				;\
 	l.sw	PT_SP(r30),r12					;\
-	l.sw    PT_GPR13(r30),r13					;\
+	l.sw    PT_GPR13(r30),r13				;\
 	/* --> */						;\
 	/* save exception r4, set r4 = EA */			;\
 	l.sw	PT_GPR4(r30),r4					;\
@@ -357,6 +357,8 @@
 
 /* =====================================================[ exceptions] === */
 
+	__HEAD
+
 /* ---[ 0x100: RESET exception ]----------------------------------------- */
     .org 0x100
 	/* Jump to .init code at _start which lives in the .head section
@@ -394,7 +396,7 @@ _dispatch_do_ipage_fault:
     .org 0x500
 	EXCEPTION_HANDLE(_timer_handler)
 
-/* ---[ 0x600: Alignment exception ]-------------------------------------- */
+/* ---[ 0x600: Alignment exception ]------------------------------------- */
     .org 0x600
 	EXCEPTION_HANDLE(_alignment_handler)
 
@@ -424,7 +426,7 @@ _dispatch_do_ipage_fault:
     .org 0xc00
 	EXCEPTION_HANDLE(_sys_call_handler)
 
-/* ---[ 0xd00: Floating point exception ]--------------------------------- */
+/* ---[ 0xd00: Floating point exception ]-------------------------------- */
     .org 0xd00
 	EXCEPTION_HANDLE(_fpe_trap_handler)
 
@@ -506,10 +508,10 @@ _dispatch_do_ipage_fault:
 
 /*    .text*/
 
-/* This early stuff belongs in HEAD, but some of the functions below definitely
+/* This early stuff belongs in the .init.text section, but some of the functions below definitely
  * don't... */
 
-	__HEAD
+	__INIT
 	.global _start
 _start:
 	/* Init r0 to zero as per spec */
@@ -816,7 +818,7 @@ secondary_start:
 
 #endif
 
-/* ========================================[ cache ]=== */
+/* ==========================================================[ cache ]=== */
 
 	/* alignment here so we don't change memory offsets with
 	 * memory controller defined
diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S
index bc1306047837..049bff45f612 100644
--- a/arch/openrisc/kernel/vmlinux.lds.S
+++ b/arch/openrisc/kernel/vmlinux.lds.S
@@ -50,6 +50,7 @@ SECTIONS
         .text                   : AT(ADDR(.text) - LOAD_OFFSET)
 	{
           _stext = .;
+	  HEAD_TEXT
 	  TEXT_TEXT
 	  SCHED_TEXT
 	  LOCK_TEXT
@@ -83,8 +84,6 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	__init_begin = .;
 
-	HEAD_TEXT_SECTION
-
 	/* Page aligned */
 	INIT_TEXT_SECTION(PAGE_SIZE)
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a0ce777f9706..db9f7b2d07bf 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -145,6 +145,7 @@ config PPC
 	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
 	select ARCH_HAS_PHYS_TO_DMA
 	select ARCH_HAS_PMEM_API
+	select ARCH_HAS_PREEMPT_LAZY
 	select ARCH_HAS_PTE_DEVMAP		if PPC_BOOK3S_64
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 57ded82c2840..e8b3f67bf3f5 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -208,6 +208,7 @@ CONFIG_FB_ATY=y
 CONFIG_FB_ATY_CT=y
 CONFIG_FB_ATY_GX=y
 CONFIG_FB_3DFX=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 4d77e17541e9..ca0c90e95837 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -716,6 +716,7 @@ CONFIG_FB_TRIDENT=m
 CONFIG_FB_SM501=m
 CONFIG_FB_IBM_GXT4500=y
 CONFIG_LCD_PLATFORM=m
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
 CONFIG_LOGO=y
diff --git a/arch/powerpc/crypto/vmx.c b/arch/powerpc/crypto/vmx.c
index 7eb713cc87c8..0b725e826388 100644
--- a/arch/powerpc/crypto/vmx.c
+++ b/arch/powerpc/crypto/vmx.c
@@ -74,4 +74,4 @@ MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions "
 		   "support on Power 8");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("1.0.0");
-MODULE_IMPORT_NS(CRYPTO_INTERNAL);
+MODULE_IMPORT_NS("CRYPTO_INTERNAL");
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 18a3028ac3b6..dad2e7980f24 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -15,6 +15,15 @@
 
 extern bool hugetlb_disabled;
 
+static inline bool hugepages_supported(void)
+{
+	if (hugetlb_disabled)
+		return false;
+
+	return HPAGE_SHIFT != 0;
+}
+#define hugepages_supported hugepages_supported
+
 void __init hugetlbpage_init_defaultsize(void);
 
 int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 6ebca2996f18..2785c7462ebf 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -103,6 +103,7 @@ void arch_setup_new_exec(void);
 #define TIF_PATCH_PENDING	6	/* pending live patching update */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SINGLESTEP		8	/* singlestepping active */
+#define TIF_NEED_RESCHED_LAZY	9       /* Scheduler driven lazy preemption */
 #define TIF_SECCOMP		10	/* secure computing */
 #define TIF_RESTOREALL		11	/* Restore all regs (implies NOERROR) */
 #define TIF_NOERROR		12	/* Force successful syscall return */
@@ -122,6 +123,7 @@ void arch_setup_new_exec(void);
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
+#define _TIF_NEED_RESCHED_LAZY	(1<<TIF_NEED_RESCHED_LAZY)
 #define _TIF_NOTIFY_SIGNAL	(1<<TIF_NOTIFY_SIGNAL)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_32BIT		(1<<TIF_32BIT)
@@ -142,9 +144,10 @@ void arch_setup_new_exec(void);
 				 _TIF_SYSCALL_EMU)
 
 #define _TIF_USER_WORK_MASK	(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
-				 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
-				 _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
-				 _TIF_NOTIFY_SIGNAL)
+				 _TIF_NEED_RESCHED_LAZY | _TIF_NOTIFY_RESUME | \
+				 _TIF_UPROBE | _TIF_RESTORE_TM | \
+				 _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL)
+
 #define _TIF_PERSYSCALL_MASK	(_TIF_RESTOREALL|_TIF_NOERROR)
 
 /* Bits in local_flags */
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 221c8f8ff89b..9bdd8080299b 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -86,7 +86,7 @@ static inline unsigned long tb_ticks_since(unsigned long tstamp)
 #define mulhdu(x,y) \
 ({unsigned long z; asm ("mulhdu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;})
 #else
-extern u64 mulhdu(u64, u64);
+#define mulhdu(x, y)	mul_u64_u64_shr(x, y, 64)
 #endif
 
 extern void div128_by_32(u64 dividend_high, u64 dividend_low,
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index af62ec974b97..8f4acc55407b 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -185,7 +185,7 @@ again:
 	ti_flags = read_thread_flags();
 	while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
 		local_irq_enable();
-		if (ti_flags & _TIF_NEED_RESCHED) {
+		if (ti_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) {
 			schedule();
 		} else {
 			/*
@@ -396,7 +396,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
 		/* Returning to a kernel context with local irqs enabled. */
 		WARN_ON_ONCE(!(regs->msr & MSR_EE));
 again:
-		if (IS_ENABLED(CONFIG_PREEMPT)) {
+		if (IS_ENABLED(CONFIG_PREEMPTION)) {
 			/* Return to preemptible kernel context */
 			if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
 				if (preempt_count() == 0)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 76381e14e800..0ebae6e4c19d 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -687,7 +687,7 @@ void iommu_table_clear(struct iommu_table *tbl)
 void iommu_table_reserve_pages(struct iommu_table *tbl,
 		unsigned long res_start, unsigned long res_end)
 {
-	int i;
+	unsigned long i;
 
 	WARN_ON_ONCE(res_end < res_start);
 	/*
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 033cd00aa0fc..acb727f54e9d 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -28,32 +28,6 @@
 	.text
 
 /*
- * This returns the high 64 bits of the product of two 64-bit numbers.
- */
-_GLOBAL(mulhdu)
-	cmpwi	r6,0
-	cmpwi	cr1,r3,0
-	mr	r10,r4
-	mulhwu	r4,r4,r5
-	beq	1f
-	mulhwu	r0,r10,r6
-	mullw	r7,r10,r5
-	addc	r7,r0,r7
-	addze	r4,r4
-1:	beqlr	cr1		/* all done if high part of A is 0 */
-	mullw	r9,r3,r5
-	mulhwu	r10,r3,r5
-	beq	2f
-	mullw	r0,r3,r6
-	mulhwu	r8,r3,r6
-	addc	r7,r0,r7
-	adde	r4,r4,r8
-	addze	r10,r10
-2:	addc	r4,r4,r9
-	addze	r3,r10
-	blr
-
-/*
  * reloc_got2 runs through the .got2 section adding an offset
  * to each entry.
  */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7b739b9a91ab..ef91f71e07c4 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1960,8 +1960,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 			 * address of _start and the second entry is the TOC
 			 * value we need to use.
 			 */
-			__get_user(entry, (unsigned long __user *)start);
-			__get_user(toc, (unsigned long __user *)start+1);
+			get_user(entry, (unsigned long __user *)start);
+			get_user(toc, (unsigned long __user *)start+1);
 
 			/* Check whether the e_entry function descriptor entries
 			 * need to be relocated before we can use them.
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 8e776ba39497..57082fac4668 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2792,7 +2792,6 @@ static void __init flatten_device_tree(void)
 		    dt_struct_start, dt_struct_end);
 }
 
-#ifdef CONFIG_PPC_CHRP
 /*
  * Pegasos and BriQ lacks the "ranges" property in the isa node
  * Pegasos needs decimal IRQ 14/15, not hexadecimal
@@ -2843,11 +2842,7 @@ static void __init fixup_device_tree_chrp(void)
 		}
 	}
 }
-#else
-#define fixup_device_tree_chrp()
-#endif
 
-#if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC)
 static void __init fixup_device_tree_pmac64(void)
 {
 	phandle u3, i2c, mpic;
@@ -2887,11 +2882,7 @@ static void __init fixup_device_tree_pmac64(void)
 	prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent",
 		     &parent, sizeof(parent));
 }
-#else
-#define fixup_device_tree_pmac64()
-#endif
 
-#ifdef CONFIG_PPC_PMAC
 static void __init fixup_device_tree_pmac(void)
 {
 	__be32 val = 1;
@@ -2911,11 +2902,7 @@ static void __init fixup_device_tree_pmac(void)
 		prom_setprop(node, NULL, "#size-cells", &val, sizeof(val));
 	}
 }
-#else
-static inline void fixup_device_tree_pmac(void) { }
-#endif
 
-#ifdef CONFIG_PPC_EFIKA
 /*
  * The MPC5200 FEC driver requires an phy-handle property to tell it how
  * to talk to the phy.  If the phy-handle property is missing, then this
@@ -3047,11 +3034,7 @@ static void __init fixup_device_tree_efika(void)
 	/* Make sure ethernet phy-handle property exists */
 	fixup_device_tree_efika_add_phy();
 }
-#else
-#define fixup_device_tree_efika()
-#endif
 
-#ifdef CONFIG_PPC_PASEMI_NEMO
 /*
  * CFE supplied on Nemo is broken in several ways, biggest
  * problem is that it reassigns ISA interrupts to unused mpic ints.
@@ -3127,17 +3110,23 @@ static void __init fixup_device_tree_pasemi(void)
 
 	prom_setprop(iob, name, "device_type", "isa", sizeof("isa"));
 }
-#else	/* !CONFIG_PPC_PASEMI_NEMO */
-static inline void fixup_device_tree_pasemi(void) { }
-#endif
 
 static void __init fixup_device_tree(void)
 {
-	fixup_device_tree_chrp();
-	fixup_device_tree_pmac();
-	fixup_device_tree_pmac64();
-	fixup_device_tree_efika();
-	fixup_device_tree_pasemi();
+	if (IS_ENABLED(CONFIG_PPC_CHRP))
+		fixup_device_tree_chrp();
+
+	if (IS_ENABLED(CONFIG_PPC_PMAC))
+		fixup_device_tree_pmac();
+
+	if (IS_ENABLED(CONFIG_PPC_PMAC) && IS_ENABLED(CONFIG_PPC64))
+		fixup_device_tree_pmac64();
+
+	if (IS_ENABLED(CONFIG_PPC_EFIKA))
+		fixup_device_tree_efika();
+
+	if (IS_ENABLED(CONFIG_PPC_PASEMI_NEMO))
+		fixup_device_tree_pasemi();
 }
 
 static void __init prom_find_boot_cpu(void)
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 6fa179448c33..f7d7a93f07fc 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -834,8 +834,8 @@ static int __init check_cache_coherency(void)
 	if (devtree_coherency != KERNEL_COHERENCY) {
 		printk(KERN_ERR
 			"kernel coherency:%s != device tree_coherency:%s\n",
-			KERNEL_COHERENCY ? "on" : "off",
-			devtree_coherency ? "on" : "off");
+			str_on_off(KERNEL_COHERENCY),
+			str_on_off(devtree_coherency));
 		BUG();
 	}
 
diff --git a/arch/powerpc/kernel/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S
index 10f92f265d51..20bca3548b44 100644
--- a/arch/powerpc/kernel/vdso32_wrapper.S
+++ b/arch/powerpc/kernel/vdso32_wrapper.S
@@ -2,7 +2,7 @@
 #include <linux/linkage.h>
 #include <asm/page.h>
 
-	__PAGE_ALIGNED_DATA
+	.section ".data..ro_after_init", "aw"
 
 	.globl vdso32_start, vdso32_end
 	.balign PAGE_SIZE
diff --git a/arch/powerpc/kernel/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S
index 839d1a61411d..1912936fa227 100644
--- a/arch/powerpc/kernel/vdso64_wrapper.S
+++ b/arch/powerpc/kernel/vdso64_wrapper.S
@@ -2,7 +2,7 @@
 #include <linux/linkage.h>
 #include <asm/page.h>
 
-	__PAGE_ALIGNED_DATA
+	.section ".data..ro_after_init", "aw"
 
 	.globl vdso64_start, vdso64_end
 	.balign PAGE_SIZE
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 6d0d329cbb35..f9acf866c709 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -34,6 +34,8 @@ enum vcpu_ftr {
 #define E500_TLB_BITMAP		(1 << 30)
 /* TLB1 entry is mapped by host TLB0 */
 #define E500_TLB_TLB0		(1 << 29)
+/* entry is writable on the host */
+#define E500_TLB_WRITABLE	(1 << 28)
 /* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */
 #define E500_TLB_MAS2_ATTR	(0x7f)
 
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index e5a145b578a4..06caf8bbbe2b 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -45,11 +45,14 @@ static inline unsigned int tlb1_max_shadow_size(void)
 	return host_tlb_params[1].entries - tlbcam_index - 1;
 }
 
-static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
+static inline u32 e500_shadow_mas3_attrib(u32 mas3, bool writable, int usermode)
 {
 	/* Mask off reserved bits. */
 	mas3 &= MAS3_ATTRIB_MASK;
 
+	if (!writable)
+		mas3 &= ~(MAS3_UW|MAS3_SW);
+
 #ifndef CONFIG_KVM_BOOKE_HV
 	if (!usermode) {
 		/* Guest is in supervisor mode,
@@ -242,17 +245,18 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
 	return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
 }
 
-static inline bool kvmppc_e500_ref_setup(struct tlbe_ref *ref,
+static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
 					 struct kvm_book3e_206_tlb_entry *gtlbe,
-					 kvm_pfn_t pfn, unsigned int wimg)
+					 kvm_pfn_t pfn, unsigned int wimg,
+					 bool writable)
 {
 	ref->pfn = pfn;
 	ref->flags = E500_TLB_VALID;
+	if (writable)
+		ref->flags |= E500_TLB_WRITABLE;
 
 	/* Use guest supplied MAS2_G and MAS2_E */
 	ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg;
-
-	return tlbe_is_writable(gtlbe);
 }
 
 static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
@@ -305,6 +309,7 @@ static void kvmppc_e500_setup_stlbe(
 {
 	kvm_pfn_t pfn = ref->pfn;
 	u32 pr = vcpu->arch.shared->msr & MSR_PR;
+	bool writable = !!(ref->flags & E500_TLB_WRITABLE);
 
 	BUG_ON(!(ref->flags & E500_TLB_VALID));
 
@@ -312,7 +317,7 @@ static void kvmppc_e500_setup_stlbe(
 	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
 	stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
 	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
-			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
+			e500_shadow_mas3_attrib(gtlbe->mas7_3, writable, pr);
 }
 
 static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -321,15 +326,14 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	struct tlbe_ref *ref)
 {
 	struct kvm_memory_slot *slot;
-	unsigned long pfn = 0; /* silence GCC warning */
+	unsigned int psize;
+	unsigned long pfn;
 	struct page *page = NULL;
 	unsigned long hva;
-	int pfnmap = 0;
 	int tsize = BOOK3E_PAGESZ_4K;
 	int ret = 0;
 	unsigned long mmu_seq;
 	struct kvm *kvm = vcpu_e500->vcpu.kvm;
-	unsigned long tsize_pages = 0;
 	pte_t *ptep;
 	unsigned int wimg = 0;
 	pgd_t *pgdir;
@@ -351,110 +355,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
 	hva = gfn_to_hva_memslot(slot, gfn);
 
-	if (tlbsel == 1) {
-		struct vm_area_struct *vma;
-		mmap_read_lock(kvm->mm);
-
-		vma = find_vma(kvm->mm, hva);
-		if (vma && hva >= vma->vm_start &&
-		    (vma->vm_flags & VM_PFNMAP)) {
-			/*
-			 * This VMA is a physically contiguous region (e.g.
-			 * /dev/mem) that bypasses normal Linux page
-			 * management.  Find the overlap between the
-			 * vma and the memslot.
-			 */
-
-			unsigned long start, end;
-			unsigned long slot_start, slot_end;
-
-			pfnmap = 1;
-
-			start = vma->vm_pgoff;
-			end = start +
-			      vma_pages(vma);
-
-			pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
-
-			slot_start = pfn - (gfn - slot->base_gfn);
-			slot_end = slot_start + slot->npages;
-
-			if (start < slot_start)
-				start = slot_start;
-			if (end > slot_end)
-				end = slot_end;
-
-			tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
-				MAS1_TSIZE_SHIFT;
-
-			/*
-			 * e500 doesn't implement the lowest tsize bit,
-			 * or 1K pages.
-			 */
-			tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
-
-			/*
-			 * Now find the largest tsize (up to what the guest
-			 * requested) that will cover gfn, stay within the
-			 * range, and for which gfn and pfn are mutually
-			 * aligned.
-			 */
-
-			for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
-				unsigned long gfn_start, gfn_end;
-				tsize_pages = 1UL << (tsize - 2);
-
-				gfn_start = gfn & ~(tsize_pages - 1);
-				gfn_end = gfn_start + tsize_pages;
-
-				if (gfn_start + pfn - gfn < start)
-					continue;
-				if (gfn_end + pfn - gfn > end)
-					continue;
-				if ((gfn & (tsize_pages - 1)) !=
-				    (pfn & (tsize_pages - 1)))
-					continue;
-
-				gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
-				pfn &= ~(tsize_pages - 1);
-				break;
-			}
-		} else if (vma && hva >= vma->vm_start &&
-			   is_vm_hugetlb_page(vma)) {
-			unsigned long psize = vma_kernel_pagesize(vma);
-
-			tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
-				MAS1_TSIZE_SHIFT;
-
-			/*
-			 * Take the largest page size that satisfies both host
-			 * and guest mapping
-			 */
-			tsize = min(__ilog2(psize) - 10, tsize);
-
-			/*
-			 * e500 doesn't implement the lowest tsize bit,
-			 * or 1K pages.
-			 */
-			tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
-		}
-
-		mmap_read_unlock(kvm->mm);
-	}
-
-	if (likely(!pfnmap)) {
-		tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT);
-		pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, NULL, &page);
-		if (is_error_noslot_pfn(pfn)) {
-			if (printk_ratelimit())
-				pr_err("%s: real page not found for gfn %lx\n",
-				       __func__, (long)gfn);
-			return -EINVAL;
-		}
-
-		/* Align guest and physical address to page map boundaries */
-		pfn &= ~(tsize_pages - 1);
-		gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+	pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, &writable, &page);
+	if (is_error_noslot_pfn(pfn)) {
+		if (printk_ratelimit())
+			pr_err("%s: real page not found for gfn %lx\n",
+			       __func__, (long)gfn);
+		return -EINVAL;
 	}
 
 	spin_lock(&kvm->mmu_lock);
@@ -472,14 +378,13 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	 * can't run hence pfn won't change.
 	 */
 	local_irq_save(flags);
-	ptep = find_linux_pte(pgdir, hva, NULL, NULL);
+	ptep = find_linux_pte(pgdir, hva, NULL, &psize);
 	if (ptep) {
 		pte_t pte = READ_ONCE(*ptep);
 
 		if (pte_present(pte)) {
 			wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
 				MAS2_WIMGE_MASK;
-			local_irq_restore(flags);
 		} else {
 			local_irq_restore(flags);
 			pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
@@ -488,10 +393,72 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 			goto out;
 		}
 	}
-	writable = kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
+	local_irq_restore(flags);
+
+	if (psize && tlbsel == 1) {
+		unsigned long psize_pages, tsize_pages;
+		unsigned long start, end;
+		unsigned long slot_start, slot_end;
+
+		psize_pages = 1UL << (psize - PAGE_SHIFT);
+		start = pfn & ~(psize_pages - 1);
+		end = start + psize_pages;
+
+		slot_start = pfn - (gfn - slot->base_gfn);
+		slot_end = slot_start + slot->npages;
+
+		if (start < slot_start)
+			start = slot_start;
+		if (end > slot_end)
+			end = slot_end;
+
+		tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
+			MAS1_TSIZE_SHIFT;
+
+		/*
+		 * Any page size that doesn't satisfy the host mapping
+		 * will fail the start and end tests.
+		 */
+		tsize = min(psize - PAGE_SHIFT + BOOK3E_PAGESZ_4K, tsize);
+
+		/*
+		 * e500 doesn't implement the lowest tsize bit,
+		 * or 1K pages.
+		 */
+		tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
+
+		/*
+		 * Now find the largest tsize (up to what the guest
+		 * requested) that will cover gfn, stay within the
+		 * range, and for which gfn and pfn are mutually
+		 * aligned.
+		 */
+
+		for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
+			unsigned long gfn_start, gfn_end;
+			tsize_pages = 1UL << (tsize - 2);
+
+			gfn_start = gfn & ~(tsize_pages - 1);
+			gfn_end = gfn_start + tsize_pages;
+
+			if (gfn_start + pfn - gfn < start)
+				continue;
+			if (gfn_end + pfn - gfn > end)
+				continue;
+			if ((gfn & (tsize_pages - 1)) !=
+			    (pfn & (tsize_pages - 1)))
+				continue;
+
+			gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+			pfn &= ~(tsize_pages - 1);
+			break;
+		}
+	}
 
+	kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg, writable);
 	kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
 				ref, gvaddr, stlbe);
+	writable = tlbe_is_writable(stlbe);
 
 	/* Clear i-cache for new pages */
 	kvmppc_mmu_flush_icache(pfn);
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
index d491da8d1838..58ed6bd613a6 100644
--- a/arch/powerpc/lib/vmx-helper.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -45,7 +45,7 @@ int exit_vmx_usercopy(void)
 	 * set and we are preemptible. The hack here is to schedule a
 	 * decrementer to fire here and reschedule for us if necessary.
 	 */
-	if (IS_ENABLED(CONFIG_PREEMPT) && need_resched())
+	if (IS_ENABLED(CONFIG_PREEMPTION) && need_resched())
 		set_dec(1);
 	return 0;
 }
diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c
index b24d4102fbf6..1dc095ad48fc 100644
--- a/arch/powerpc/platforms/8xx/cpm1.c
+++ b/arch/powerpc/platforms/8xx/cpm1.c
@@ -45,7 +45,7 @@
 #include <sysdev/fsl_soc.h>
 
 #ifdef CONFIG_8xx_GPIO
-#include <linux/gpio/legacy-of-mm-gpiochip.h>
+#include <linux/gpio/driver.h>
 #endif
 
 #define CPM_MAP_SIZE    (0x4000)
@@ -376,7 +376,8 @@ int __init cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode)
 #ifdef CONFIG_8xx_GPIO
 
 struct cpm1_gpio16_chip {
-	struct of_mm_gpio_chip mm_gc;
+	struct gpio_chip gc;
+	void __iomem *regs;
 	spinlock_t lock;
 
 	/* shadowed data register to clear/set bits safely */
@@ -386,19 +387,17 @@ struct cpm1_gpio16_chip {
 	int irq[16];
 };
 
-static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc)
+static void cpm1_gpio16_save_regs(struct cpm1_gpio16_chip *cpm1_gc)
 {
-	struct cpm1_gpio16_chip *cpm1_gc =
-		container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc);
-	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+	struct cpm_ioport16 __iomem *iop = cpm1_gc->regs;
 
 	cpm1_gc->cpdata = in_be16(&iop->dat);
 }
 
 static int cpm1_gpio16_get(struct gpio_chip *gc, unsigned int gpio)
 {
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc);
+	struct cpm_ioport16 __iomem *iop = cpm1_gc->regs;
 	u16 pin_mask;
 
 	pin_mask = 1 << (15 - gpio);
@@ -406,11 +405,9 @@ static int cpm1_gpio16_get(struct gpio_chip *gc, unsigned int gpio)
 	return !!(in_be16(&iop->dat) & pin_mask);
 }
 
-static void __cpm1_gpio16_set(struct of_mm_gpio_chip *mm_gc, u16 pin_mask,
-	int value)
+static void __cpm1_gpio16_set(struct cpm1_gpio16_chip *cpm1_gc, u16 pin_mask, int value)
 {
-	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
-	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+	struct cpm_ioport16 __iomem *iop = cpm1_gc->regs;
 
 	if (value)
 		cpm1_gc->cpdata |= pin_mask;
@@ -422,38 +419,35 @@ static void __cpm1_gpio16_set(struct of_mm_gpio_chip *mm_gc, u16 pin_mask,
 
 static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
 {
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc);
 	unsigned long flags;
 	u16 pin_mask = 1 << (15 - gpio);
 
 	spin_lock_irqsave(&cpm1_gc->lock, flags);
 
-	__cpm1_gpio16_set(mm_gc, pin_mask, value);
+	__cpm1_gpio16_set(cpm1_gc, pin_mask, value);
 
 	spin_unlock_irqrestore(&cpm1_gc->lock, flags);
 }
 
 static int cpm1_gpio16_to_irq(struct gpio_chip *gc, unsigned int gpio)
 {
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc);
 
 	return cpm1_gc->irq[gpio] ? : -ENXIO;
 }
 
 static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 {
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
-	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc);
+	struct cpm_ioport16 __iomem *iop = cpm1_gc->regs;
 	unsigned long flags;
 	u16 pin_mask = 1 << (15 - gpio);
 
 	spin_lock_irqsave(&cpm1_gc->lock, flags);
 
 	setbits16(&iop->dir, pin_mask);
-	__cpm1_gpio16_set(mm_gc, pin_mask, val);
+	__cpm1_gpio16_set(cpm1_gc, pin_mask, val);
 
 	spin_unlock_irqrestore(&cpm1_gc->lock, flags);
 
@@ -462,9 +456,8 @@ static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 
 static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio)
 {
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
-	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc);
+	struct cpm_ioport16 __iomem *iop = cpm1_gc->regs;
 	unsigned long flags;
 	u16 pin_mask = 1 << (15 - gpio);
 
@@ -481,11 +474,10 @@ int cpm1_gpiochip_add16(struct device *dev)
 {
 	struct device_node *np = dev->of_node;
 	struct cpm1_gpio16_chip *cpm1_gc;
-	struct of_mm_gpio_chip *mm_gc;
 	struct gpio_chip *gc;
 	u16 mask;
 
-	cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL);
+	cpm1_gc = devm_kzalloc(dev, sizeof(*cpm1_gc), GFP_KERNEL);
 	if (!cpm1_gc)
 		return -ENOMEM;
 
@@ -499,10 +491,8 @@ int cpm1_gpiochip_add16(struct device *dev)
 				cpm1_gc->irq[i] = irq_of_parse_and_map(np, j++);
 	}
 
-	mm_gc = &cpm1_gc->mm_gc;
-	gc = &mm_gc->gc;
-
-	mm_gc->save_regs = cpm1_gpio16_save_regs;
+	gc = &cpm1_gc->gc;
+	gc->base = -1;
 	gc->ngpio = 16;
 	gc->direction_input = cpm1_gpio16_dir_in;
 	gc->direction_output = cpm1_gpio16_dir_out;
@@ -512,30 +502,39 @@ int cpm1_gpiochip_add16(struct device *dev)
 	gc->parent = dev;
 	gc->owner = THIS_MODULE;
 
-	return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
+	gc->label = devm_kasprintf(dev, GFP_KERNEL, "%pOF", np);
+	if (!gc->label)
+		return -ENOMEM;
+
+	cpm1_gc->regs = devm_of_iomap(dev, np, 0, NULL);
+	if (IS_ERR(cpm1_gc->regs))
+		return PTR_ERR(cpm1_gc->regs);
+
+	cpm1_gpio16_save_regs(cpm1_gc);
+
+	return devm_gpiochip_add_data(dev, gc, cpm1_gc);
 }
 
 struct cpm1_gpio32_chip {
-	struct of_mm_gpio_chip mm_gc;
+	struct gpio_chip gc;
+	void __iomem *regs;
 	spinlock_t lock;
 
 	/* shadowed data register to clear/set bits safely */
 	u32 cpdata;
 };
 
-static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
+static void cpm1_gpio32_save_regs(struct cpm1_gpio32_chip *cpm1_gc)
 {
-	struct cpm1_gpio32_chip *cpm1_gc =
-		container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc);
-	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+	struct cpm_ioport32b __iomem *iop = cpm1_gc->regs;
 
 	cpm1_gc->cpdata = in_be32(&iop->dat);
 }
 
 static int cpm1_gpio32_get(struct gpio_chip *gc, unsigned int gpio)
 {
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc);
+	struct cpm_ioport32b __iomem *iop = cpm1_gc->regs;
 	u32 pin_mask;
 
 	pin_mask = 1 << (31 - gpio);
@@ -543,11 +542,9 @@ static int cpm1_gpio32_get(struct gpio_chip *gc, unsigned int gpio)
 	return !!(in_be32(&iop->dat) & pin_mask);
 }
 
-static void __cpm1_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask,
-	int value)
+static void __cpm1_gpio32_set(struct cpm1_gpio32_chip *cpm1_gc, u32 pin_mask, int value)
 {
-	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
-	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+	struct cpm_ioport32b __iomem *iop = cpm1_gc->regs;
 
 	if (value)
 		cpm1_gc->cpdata |= pin_mask;
@@ -559,30 +556,28 @@ static void __cpm1_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask,
 
 static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
 {
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc);
 	unsigned long flags;
 	u32 pin_mask = 1 << (31 - gpio);
 
 	spin_lock_irqsave(&cpm1_gc->lock, flags);
 
-	__cpm1_gpio32_set(mm_gc, pin_mask, value);
+	__cpm1_gpio32_set(cpm1_gc, pin_mask, value);
 
 	spin_unlock_irqrestore(&cpm1_gc->lock, flags);
 }
 
 static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 {
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
-	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc);
+	struct cpm_ioport32b __iomem *iop = cpm1_gc->regs;
 	unsigned long flags;
 	u32 pin_mask = 1 << (31 - gpio);
 
 	spin_lock_irqsave(&cpm1_gc->lock, flags);
 
 	setbits32(&iop->dir, pin_mask);
-	__cpm1_gpio32_set(mm_gc, pin_mask, val);
+	__cpm1_gpio32_set(cpm1_gc, pin_mask, val);
 
 	spin_unlock_irqrestore(&cpm1_gc->lock, flags);
 
@@ -591,9 +586,8 @@ static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 
 static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
 {
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
-	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc);
+	struct cpm_ioport32b __iomem *iop = cpm1_gc->regs;
 	unsigned long flags;
 	u32 pin_mask = 1 << (31 - gpio);
 
@@ -610,19 +604,16 @@ int cpm1_gpiochip_add32(struct device *dev)
 {
 	struct device_node *np = dev->of_node;
 	struct cpm1_gpio32_chip *cpm1_gc;
-	struct of_mm_gpio_chip *mm_gc;
 	struct gpio_chip *gc;
 
-	cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL);
+	cpm1_gc = devm_kzalloc(dev, sizeof(*cpm1_gc), GFP_KERNEL);
 	if (!cpm1_gc)
 		return -ENOMEM;
 
 	spin_lock_init(&cpm1_gc->lock);
 
-	mm_gc = &cpm1_gc->mm_gc;
-	gc = &mm_gc->gc;
-
-	mm_gc->save_regs = cpm1_gpio32_save_regs;
+	gc = &cpm1_gc->gc;
+	gc->base = -1;
 	gc->ngpio = 32;
 	gc->direction_input = cpm1_gpio32_dir_in;
 	gc->direction_output = cpm1_gpio32_dir_out;
@@ -631,7 +622,17 @@ int cpm1_gpiochip_add32(struct device *dev)
 	gc->parent = dev;
 	gc->owner = THIS_MODULE;
 
-	return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
+	gc->label = devm_kasprintf(dev, GFP_KERNEL, "%pOF", np);
+	if (!gc->label)
+		return -ENOMEM;
+
+	cpm1_gc->regs = devm_of_iomap(dev, np, 0, NULL);
+	if (IS_ERR(cpm1_gc->regs))
+		return PTR_ERR(cpm1_gc->regs);
+
+	cpm1_gpio32_save_regs(cpm1_gc);
+
+	return devm_gpiochip_add_data(dev, gc, cpm1_gc);
 }
 
 #endif /* CONFIG_8xx_GPIO */
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
index f381b177ea06..0b6365d85d11 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -464,7 +464,43 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
+/*
+ * During mmap() paste address, mapping VMA is saved in VAS window
+ * struct which is used to unmap during migration if the window is
+ * still open. But the user space can remove this mapping with
+ * munmap() before closing the window and the VMA address will
+ * be invalid. Set VAS window VMA to NULL in this function which
+ * is called before VMA free.
+ */
+static void vas_mmap_close(struct vm_area_struct *vma)
+{
+	struct file *fp = vma->vm_file;
+	struct coproc_instance *cp_inst = fp->private_data;
+	struct vas_window *txwin;
+
+	/* Should not happen */
+	if (!cp_inst || !cp_inst->txwin) {
+		pr_err("No attached VAS window for the paste address mmap\n");
+		return;
+	}
+
+	txwin = cp_inst->txwin;
+	/*
+	 * task_ref.vma is set in coproc_mmap() during mmap paste
+	 * address. So it has to be the same VMA that is getting freed.
+	 */
+	if (WARN_ON(txwin->task_ref.vma != vma)) {
+		pr_err("Invalid paste address mmaping\n");
+		return;
+	}
+
+	mutex_lock(&txwin->task_ref.mmap_mutex);
+	txwin->task_ref.vma = NULL;
+	mutex_unlock(&txwin->task_ref.mmap_mutex);
+}
+
 static const struct vm_operations_struct vas_vm_ops = {
+	.close = vas_mmap_close,
 	.fault = vas_mmap_fault,
 };
 
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 534cd159e9ab..29f1a0cc59cd 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1650,7 +1650,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 		iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn,
 					    dynamic_addr, dynamic_len, page_shift, NULL,
 					    &iommu_table_lpar_multi_ops);
-		iommu_init_table(newtbl, pci->phb->node, start, end);
+		iommu_init_table(newtbl, pci->phb->node,
+				 start >> page_shift, end >> page_shift);
 
 		pci->table_group->tables[default_win_removed ? 0 : 1] = newtbl;
 
@@ -2065,7 +2066,9 @@ static long spapr_tce_create_table(struct iommu_table_group *table_group, int nu
 							    offset, 1UL << window_shift,
 							    IOMMU_PAGE_SHIFT_4K, NULL,
 							    &iommu_table_lpar_multi_ops);
-				iommu_init_table(tbl, pci->phb->node, start, end);
+				iommu_init_table(tbl, pci->phb->node,
+						 start >> IOMMU_PAGE_SHIFT_4K,
+						 end >> IOMMU_PAGE_SHIFT_4K);
 
 				table_group->tables[0] = tbl;
 
@@ -2136,7 +2139,7 @@ static long spapr_tce_create_table(struct iommu_table_group *table_group, int nu
 	/* New table for using DDW instead of the default DMA window */
 	iommu_table_setparms_common(tbl, pci->phb->bus->number, create.liobn, win_addr,
 				    1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops);
-	iommu_init_table(tbl, pci->phb->node, start, end);
+	iommu_init_table(tbl, pci->phb->node, start >> page_shift, end >> page_shift);
 
 	pci->table_group->tables[num] = tbl;
 	set_iommu_table_base(&pdev->dev, tbl);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index f4e841a36458..268859e4df87 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2623,9 +2623,9 @@ static void dump_one_paca(int cpu)
 
 	printf("paca for cpu 0x%x @ %px:\n", cpu, p);
 
-	printf(" %-*s = %s\n", 25, "possible", cpu_possible(cpu) ? "yes" : "no");
-	printf(" %-*s = %s\n", 25, "present", cpu_present(cpu) ? "yes" : "no");
-	printf(" %-*s = %s\n", 25, "online", cpu_online(cpu) ? "yes" : "no");
+	printf(" %-*s = %s\n", 25, "possible", str_yes_no(cpu_possible(cpu)));
+	printf(" %-*s = %s\n", 25, "present", str_yes_no(cpu_present(cpu)));
+	printf(" %-*s = %s\n", 25, "online", str_yes_no(cpu_online(cpu)));
 
 #define DUMP(paca, name, format)				\
 	printf(" %-*s = "format"\t(0x%lx)\n", 25, #name, 18, paca->name, \
diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h
index 7388edd88986..d08bf7fb3aee 100644
--- a/arch/riscv/include/asm/kfence.h
+++ b/arch/riscv/include/asm/kfence.h
@@ -22,7 +22,9 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
 	else
 		set_pte(pte, __pte(pte_val(ptep_get(pte)) | _PAGE_PRESENT));
 
-	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+	preempt_disable();
+	local_flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+	preempt_enable();
 
 	return true;
 }
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 71aabc5c6713..125f5ecd9565 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -122,6 +122,7 @@ struct kernel_mapping {
 
 extern struct kernel_mapping kernel_map;
 extern phys_addr_t phys_ram_base;
+extern unsigned long vmemmap_start_pfn;
 
 #define is_kernel_mapping(x)	\
 	((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index d4e99eef90ac..050fdc49b5ad 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -87,7 +87,7 @@
  * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
  * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
  */
-#define vmemmap		((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
+#define vmemmap		((struct page *)VMEMMAP_START - vmemmap_start_pfn)
 
 #define PCI_IO_SIZE      SZ_16M
 #define PCI_IO_END       VMEMMAP_START
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 6c82318065cf..3d250824178b 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -159,6 +159,7 @@ struct riscv_pmu_snapshot_data {
 };
 
 #define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0)
+#define RISCV_PMU_PLAT_FW_EVENT_MASK GENMASK_ULL(61, 0)
 #define RISCV_PMU_RAW_EVENT_IDX 0x20000
 #define RISCV_PLAT_FW_EVENT	0xFFFF
 
diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
index e5121b89acea..52f11bfd0079 100644
--- a/arch/riscv/include/asm/spinlock.h
+++ b/arch/riscv/include/asm/spinlock.h
@@ -3,8 +3,11 @@
 #ifndef __ASM_RISCV_SPINLOCK_H
 #define __ASM_RISCV_SPINLOCK_H
 
-#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
+#ifdef CONFIG_QUEUED_SPINLOCKS
 #define _Q_PENDING_LOOPS	(1 << 9)
+#endif
+
+#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
 
 #define __no_arch_spinlock_redefine
 #include <asm/ticket_spinlock.h>
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index c200d329d4bd..33a5a9f2a0d4 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -23,21 +23,21 @@
 	REG_S 	a0, TASK_TI_A0(tp)
 	csrr 	a0, CSR_CAUSE
 	/* Exclude IRQs */
-	blt  	a0, zero, _new_vmalloc_restore_context_a0
+	blt  	a0, zero, .Lnew_vmalloc_restore_context_a0
 
 	REG_S 	a1, TASK_TI_A1(tp)
 	/* Only check new_vmalloc if we are in page/protection fault */
 	li   	a1, EXC_LOAD_PAGE_FAULT
-	beq  	a0, a1, _new_vmalloc_kernel_address
+	beq  	a0, a1, .Lnew_vmalloc_kernel_address
 	li   	a1, EXC_STORE_PAGE_FAULT
-	beq  	a0, a1, _new_vmalloc_kernel_address
+	beq  	a0, a1, .Lnew_vmalloc_kernel_address
 	li   	a1, EXC_INST_PAGE_FAULT
-	bne  	a0, a1, _new_vmalloc_restore_context_a1
+	bne  	a0, a1, .Lnew_vmalloc_restore_context_a1
 
-_new_vmalloc_kernel_address:
+.Lnew_vmalloc_kernel_address:
 	/* Is it a kernel address? */
 	csrr 	a0, CSR_TVAL
-	bge 	a0, zero, _new_vmalloc_restore_context_a1
+	bge 	a0, zero, .Lnew_vmalloc_restore_context_a1
 
 	/* Check if a new vmalloc mapping appeared that could explain the trap */
 	REG_S	a2, TASK_TI_A2(tp)
@@ -69,7 +69,7 @@ _new_vmalloc_kernel_address:
 	/* Check the value of new_vmalloc for this cpu */
 	REG_L	a2, 0(a0)
 	and	a2, a2, a1
-	beq	a2, zero, _new_vmalloc_restore_context
+	beq	a2, zero, .Lnew_vmalloc_restore_context
 
 	/* Atomically reset the current cpu bit in new_vmalloc */
 	amoxor.d	a0, a1, (a0)
@@ -83,11 +83,11 @@ _new_vmalloc_kernel_address:
 	csrw	CSR_SCRATCH, x0
 	sret
 
-_new_vmalloc_restore_context:
+.Lnew_vmalloc_restore_context:
 	REG_L 	a2, TASK_TI_A2(tp)
-_new_vmalloc_restore_context_a1:
+.Lnew_vmalloc_restore_context_a1:
 	REG_L 	a1, TASK_TI_A1(tp)
-_new_vmalloc_restore_context_a0:
+.Lnew_vmalloc_restore_context_a0:
 	REG_L	a0, TASK_TI_A0(tp)
 .endm
 
@@ -278,6 +278,7 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
 #else
 	sret
 #endif
+SYM_INNER_LABEL(ret_from_exception_end, SYM_L_GLOBAL)
 SYM_CODE_END(ret_from_exception)
 ASM_NOKPROBE(ret_from_exception)
 
diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c
index 6eee6f736f68..654ed159c830 100644
--- a/arch/riscv/kernel/jump_label.c
+++ b/arch/riscv/kernel/jump_label.c
@@ -36,9 +36,15 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
 		insn = RISCV_INSN_NOP;
 	}
 
-	mutex_lock(&text_mutex);
-	patch_insn_write(addr, &insn, sizeof(insn));
-	mutex_unlock(&text_mutex);
+	if (early_boot_irqs_disabled) {
+		riscv_patch_in_stop_machine = 1;
+		patch_insn_write(addr, &insn, sizeof(insn));
+		riscv_patch_in_stop_machine = 0;
+	} else {
+		mutex_lock(&text_mutex);
+		patch_insn_write(addr, &insn, sizeof(insn));
+		mutex_unlock(&text_mutex);
+	}
 
 	return true;
 }
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 1cd461f3d872..47d0ebeec93c 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -23,7 +23,7 @@ struct used_bucket {
 
 struct relocation_head {
 	struct hlist_node node;
-	struct list_head *rel_entry;
+	struct list_head rel_entry;
 	void *location;
 };
 
@@ -634,7 +634,7 @@ process_accumulated_relocations(struct module *me,
 			location = rel_head_iter->location;
 			list_for_each_entry_safe(rel_entry_iter,
 						 rel_entry_iter_tmp,
-						 rel_head_iter->rel_entry,
+						 &rel_head_iter->rel_entry,
 						 head) {
 				curr_type = rel_entry_iter->type;
 				reloc_handlers[curr_type].reloc_handler(
@@ -704,16 +704,7 @@ static int add_relocation_to_accumulate(struct module *me, int type,
 			return -ENOMEM;
 		}
 
-		rel_head->rel_entry =
-			kmalloc(sizeof(struct list_head), GFP_KERNEL);
-
-		if (!rel_head->rel_entry) {
-			kfree(entry);
-			kfree(rel_head);
-			return -ENOMEM;
-		}
-
-		INIT_LIST_HEAD(rel_head->rel_entry);
+		INIT_LIST_HEAD(&rel_head->rel_entry);
 		rel_head->location = location;
 		INIT_HLIST_NODE(&rel_head->node);
 		if (!current_head->first) {
@@ -722,7 +713,6 @@ static int add_relocation_to_accumulate(struct module *me, int type,
 
 			if (!bucket) {
 				kfree(entry);
-				kfree(rel_head->rel_entry);
 				kfree(rel_head);
 				return -ENOMEM;
 			}
@@ -735,7 +725,7 @@ static int add_relocation_to_accumulate(struct module *me, int type,
 	}
 
 	/* Add relocation to head of discovered rel_head */
-	list_add_tail(&entry->head, rel_head->rel_entry);
+	list_add_tail(&entry->head, &rel_head->rel_entry);
 
 	return 0;
 }
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index 380a0e8cecc0..c0738d6c6498 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -30,7 +30,7 @@ static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
 	p->ainsn.api.restore = (unsigned long)p->addr + len;
 
 	patch_text_nosync(p->ainsn.api.insn, &p->opcode, len);
-	patch_text_nosync(p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn));
+	patch_text_nosync((void *)p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn));
 }
 
 static void __kprobes arch_prepare_simulate(struct kprobe *p)
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 016b48fcd6f2..45010e71df86 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -227,7 +227,7 @@ static void __init init_resources(void)
 static void __init parse_dtb(void)
 {
 	/* Early scan of device tree from init memory */
-	if (early_init_dt_scan(dtb_early_va, __pa(dtb_early_va))) {
+	if (early_init_dt_scan(dtb_early_va, dtb_early_pa)) {
 		const char *name = of_flat_dt_get_machine_name();
 
 		if (name) {
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 153a2db4c5fa..d4355c770c36 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -17,6 +17,7 @@
 #ifdef CONFIG_FRAME_POINTER
 
 extern asmlinkage void handle_exception(void);
+extern unsigned long ret_from_exception_end;
 
 static inline int fp_is_valid(unsigned long fp, unsigned long sp)
 {
@@ -71,7 +72,8 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
 			fp = frame->fp;
 			pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra,
 						   &frame->ra);
-			if (pc == (unsigned long)handle_exception) {
+			if (pc >= (unsigned long)handle_exception &&
+			    pc < (unsigned long)&ret_from_exception_end) {
 				if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
 					break;
 
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 51ebfd23e007..8ff8e8b36524 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -35,7 +35,7 @@
 
 int show_unhandled_signals = 1;
 
-static DEFINE_SPINLOCK(die_lock);
+static DEFINE_RAW_SPINLOCK(die_lock);
 
 static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns)
 {
@@ -81,7 +81,7 @@ void die(struct pt_regs *regs, const char *str)
 
 	oops_enter();
 
-	spin_lock_irqsave(&die_lock, flags);
+	raw_spin_lock_irqsave(&die_lock, flags);
 	console_verbose();
 	bust_spinlocks(1);
 
@@ -100,7 +100,7 @@ void die(struct pt_regs *regs, const char *str)
 
 	bust_spinlocks(0);
 	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
-	spin_unlock_irqrestore(&die_lock, flags);
+	raw_spin_unlock_irqrestore(&die_lock, flags);
 	oops_exit();
 
 	if (in_interrupt())
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index dcced4db7fe8..19afd1f23537 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -590,7 +590,7 @@ void kvm_riscv_aia_enable(void)
 	csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
 	/* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */
 	if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
-		csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF));
+		csr_set(CSR_HVIEN, BIT(IRQ_PMU_OVF));
 }
 
 void kvm_riscv_aia_disable(void)
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 0e8c20adcd98..8d167e09f1fe 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -33,6 +33,7 @@
 #include <asm/pgtable.h>
 #include <asm/sections.h>
 #include <asm/soc.h>
+#include <asm/sparsemem.h>
 #include <asm/tlbflush.h>
 
 #include "../kernel/head.h"
@@ -62,6 +63,13 @@ EXPORT_SYMBOL(pgtable_l5_enabled);
 phys_addr_t phys_ram_base __ro_after_init;
 EXPORT_SYMBOL(phys_ram_base);
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#define VMEMMAP_ADDR_ALIGN	(1ULL << SECTION_SIZE_BITS)
+
+unsigned long vmemmap_start_pfn __ro_after_init;
+EXPORT_SYMBOL(vmemmap_start_pfn);
+#endif
+
 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
 							__page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
@@ -240,8 +248,12 @@ static void __init setup_bootmem(void)
 	 * Make sure we align the start of the memory on a PMD boundary so that
 	 * at worst, we map the linear mapping with PMD mappings.
 	 */
-	if (!IS_ENABLED(CONFIG_XIP_KERNEL))
+	if (!IS_ENABLED(CONFIG_XIP_KERNEL)) {
 		phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+		vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
+#endif
+	}
 
 	/*
 	 * In 64-bit, any use of __va/__pa before this point is wrong as we
@@ -1101,6 +1113,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
 
 	phys_ram_base = CONFIG_PHYS_RAM_BASE;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
+#endif
 	kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
 	kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
 
@@ -1566,7 +1581,7 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
 	pmd_clear(pmd);
 }
 
-static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, bool is_vmemmap)
 {
 	struct page *page = pud_page(*pud);
 	struct ptdesc *ptdesc = page_ptdesc(page);
@@ -1579,7 +1594,8 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
 			return;
 	}
 
-	pagetable_pmd_dtor(ptdesc);
+	if (!is_vmemmap)
+		pagetable_pmd_dtor(ptdesc);
 	if (PageReserved(page))
 		free_reserved_page(page);
 	else
@@ -1703,7 +1719,7 @@ static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, un
 		remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap);
 
 		if (pgtable_l4_enabled)
-			free_pmd_table(pmd_base, pudp);
+			free_pmd_table(pmd_base, pudp, is_vmemmap);
 	}
 }
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 0077969170e8..e4931a32b896 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -233,6 +233,7 @@ config S390
 	select HAVE_VIRT_CPU_ACCOUNTING_IDLE
 	select IOMMU_HELPER		if PCI
 	select IOMMU_SUPPORT		if PCI
+	select KASAN_VMALLOC if KASAN
 	select LOCK_MM_AND_FIND_VMA
 	select MMU_GATHER_MERGE_VMAS
 	select MMU_GATHER_NO_GATHER
@@ -255,6 +256,7 @@ config S390
 	select USER_STACKTRACE_SUPPORT
 	select VDSO_GETRANDOM
 	select VIRT_CPU_ACCOUNTING
+	select VMAP_STACK
 	select ZONE_DMA
 	# Note: keep the above list sorted alphabetically
 
@@ -688,32 +690,6 @@ config MAX_PHYSMEM_BITS
 	  Increasing the number of bits also increases the kernel image size.
 	  By default 46 bits (64TB) are supported.
 
-config CHECK_STACK
-	def_bool y
-	depends on !VMAP_STACK
-	prompt "Detect kernel stack overflow"
-	help
-	  This option enables the compiler option -mstack-guard and
-	  -mstack-size if they are available. If the compiler supports them
-	  it will emit additional code to each function prolog to trigger
-	  an illegal operation if the kernel stack is about to overflow.
-
-	  Say N if you are unsure.
-
-config STACK_GUARD
-	int "Size of the guard area (128-1024)"
-	range 128 1024
-	depends on CHECK_STACK
-	default "256"
-	help
-	  This allows you to specify the size of the guard area at the lower
-	  end of the kernel stack. If the kernel stack points into the guard
-	  area on function entry an illegal operation is triggered. The size
-	  needs to be a power of 2. Please keep in mind that the size of an
-	  interrupt frame is 184 bytes for 31 bit and 328 bytes on 64 bit.
-	  The minimum size for the stack guard should be 256 for 31 bit and
-	  512 for 64 bit.
-
 endmenu
 
 menu "I/O subsystem"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 7fd57398221e..3f25498dac65 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -72,15 +72,6 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
 KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
 KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
 
-ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
-  CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE)
-  ifeq ($(call cc-option,-mstack-size=8192),)
-    CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD)
-  endif
-  export CC_FLAGS_CHECK_STACK
-  cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK)
-endif
-
 ifdef CONFIG_EXPOLINE
   ifdef CONFIG_EXPOLINE_EXTERN
     CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 7521a9d75fa2..56244fe78182 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -13,7 +13,6 @@
 struct machine_info {
 	unsigned char has_edat1 : 1;
 	unsigned char has_edat2 : 1;
-	unsigned char has_nx : 1;
 };
 
 struct vmlinux_info {
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index abe6e6c0ab98..e6b06692ddc8 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -30,6 +30,9 @@ unsigned long __bootdata_preserved(vmemmap_size);
 unsigned long __bootdata_preserved(MODULES_VADDR);
 unsigned long __bootdata_preserved(MODULES_END);
 unsigned long __bootdata_preserved(max_mappable);
+unsigned long __bootdata_preserved(page_noexec_mask);
+unsigned long __bootdata_preserved(segment_noexec_mask);
+unsigned long __bootdata_preserved(region_noexec_mask);
 int __bootdata_preserved(relocate_lowcore);
 
 u64 __bootdata_preserved(stfle_fac_list[16]);
@@ -51,8 +54,14 @@ static void detect_facilities(void)
 	}
 	if (test_facility(78))
 		machine.has_edat2 = 1;
-	if (test_facility(130))
-		machine.has_nx = 1;
+	page_noexec_mask = -1UL;
+	segment_noexec_mask = -1UL;
+	region_noexec_mask = -1UL;
+	if (!test_facility(130)) {
+		page_noexec_mask &= ~_PAGE_NOEXEC;
+		segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC;
+		region_noexec_mask &= ~_REGION_ENTRY_NOEXEC;
+	}
 }
 
 static int cmma_test_essa(void)
@@ -234,6 +243,8 @@ static unsigned long get_vmem_size(unsigned long identity_size,
 	vsize = round_up(SZ_2G + max_mappable, rte_size) +
 		round_up(vmemmap_size, rte_size) +
 		FIXMAP_SIZE + MODULES_LEN + KASLR_LEN;
+	if (IS_ENABLED(CONFIG_KMSAN))
+		vsize += MODULES_LEN * 2;
 	return size_add(vsize, vmalloc_size);
 }
 
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 145035f84a0e..881a1ece422f 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -63,13 +63,10 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern
 	pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
 	p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
 	unsigned long memgap_start = 0;
-	unsigned long untracked_end;
 	unsigned long start, end;
 	int i;
 
 	pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
-	if (!machine.has_nx)
-		pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC));
 	crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
 	crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
 	crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
@@ -93,15 +90,10 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern
 	kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW);
 	kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW);
 	kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW);
-	if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
-		untracked_end = VMALLOC_START;
-		/* shallowly populate kasan shadow for vmalloc and modules */
-		kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
-	} else {
-		untracked_end = MODULES_VADDR;
-	}
+	/* shallowly populate kasan shadow for vmalloc and modules */
+	kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
 	/* populate kasan shadow for untracked memory */
-	kasan_populate((unsigned long)__identity_va(ident_map_size), untracked_end,
+	kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START,
 		       POPULATE_KASAN_ZERO_SHADOW);
 	kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
 }
@@ -300,13 +292,11 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
 				continue;
 			entry = __pte(_pa(addr, PAGE_SIZE, mode));
 			entry = set_pte_bit(entry, PAGE_KERNEL);
-			if (!machine.has_nx)
-				entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
 			set_pte(pte, entry);
 			pages++;
 		}
 	}
-	if (mode == POPULATE_DIRECT)
+	if (mode == POPULATE_IDENTITY)
 		update_page_count(PG_DIRECT_MAP_4K, pages);
 }
 
@@ -326,8 +316,6 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
 			if (can_large_pmd(pmd, addr, next, mode)) {
 				entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
 				entry = set_pmd_bit(entry, SEGMENT_KERNEL);
-				if (!machine.has_nx)
-					entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
 				set_pmd(pmd, entry);
 				pages++;
 				continue;
@@ -339,7 +327,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
 		}
 		pgtable_pte_populate(pmd, addr, next, mode);
 	}
-	if (mode == POPULATE_DIRECT)
+	if (mode == POPULATE_IDENTITY)
 		update_page_count(PG_DIRECT_MAP_1M, pages);
 }
 
@@ -359,8 +347,6 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
 			if (can_large_pud(pud, addr, next, mode)) {
 				entry = __pud(_pa(addr, _REGION3_SIZE, mode));
 				entry = set_pud_bit(entry, REGION3_KERNEL);
-				if (!machine.has_nx)
-					entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
 				set_pud(pud, entry);
 				pages++;
 				continue;
@@ -372,7 +358,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
 		}
 		pgtable_pmd_populate(pud, addr, next, mode);
 	}
-	if (mode == POPULATE_DIRECT)
+	if (mode == POPULATE_IDENTITY)
 		update_page_count(PG_DIRECT_MAP_2G, pages);
 }
 
diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config
index 84c2b551e992..cefbe2ba1228 100644
--- a/arch/s390/configs/kasan.config
+++ b/arch/s390/configs/kasan.config
@@ -1,4 +1,4 @@
 # Help: Enable KASan for debugging
 CONFIG_KASAN=y
 CONFIG_KASAN_INLINE=y
-CONFIG_KASAN_VMALLOC=y
+CONFIG_KERNEL_IMAGE_BASE=0x7FFFE0000000
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 8cc02d6e0d0f..9c46b1b630b1 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -1168,4 +1168,4 @@ MODULE_ALIAS_CRYPTO("aes-all");
 
 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
 MODULE_LICENSE("GPL");
-MODULE_IMPORT_NS(CRYPTO_INTERNAL);
+MODULE_IMPORT_NS("CRYPTO_INTERNAL");
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
index d20df8c923fc..004d17ea05cf 100644
--- a/arch/s390/include/asm/abs_lowcore.h
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_S390_ABS_LOWCORE_H
 #define _ASM_S390_ABS_LOWCORE_H
 
-#include <asm/sections.h>
+#include <linux/smp.h>
 #include <asm/lowcore.h>
 
 #define ABS_LOWCORE_MAP_SIZE	(NR_CPUS * sizeof(struct lowcore))
@@ -25,7 +25,7 @@ static inline void put_abs_lowcore(struct lowcore *lc)
 	put_cpu();
 }
 
-extern int __bootdata_preserved(relocate_lowcore);
+extern int relocate_lowcore;
 
 static inline int have_relocated_lowcore(void)
 {
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 6723fca64018..b36dd6a1d652 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -17,13 +17,13 @@
 
 static __always_inline int arch_atomic_read(const atomic_t *v)
 {
-	return __atomic_read(v);
+	return __atomic_read(&v->counter);
 }
 #define arch_atomic_read arch_atomic_read
 
 static __always_inline void arch_atomic_set(atomic_t *v, int i)
 {
-	__atomic_set(v, i);
+	__atomic_set(&v->counter, i);
 }
 #define arch_atomic_set arch_atomic_set
 
@@ -45,6 +45,36 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
 }
 #define arch_atomic_add arch_atomic_add
 
+static __always_inline void arch_atomic_inc(atomic_t *v)
+{
+	__atomic_add_const(1, &v->counter);
+}
+#define arch_atomic_inc arch_atomic_inc
+
+static __always_inline void arch_atomic_dec(atomic_t *v)
+{
+	__atomic_add_const(-1, &v->counter);
+}
+#define arch_atomic_dec arch_atomic_dec
+
+static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
+{
+	return __atomic_add_and_test_barrier(-i, &v->counter);
+}
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
+
+static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
+{
+	return __atomic_add_const_and_test_barrier(-1, &v->counter);
+}
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
+
+static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
+{
+	return __atomic_add_const_and_test_barrier(1, &v->counter);
+}
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
+
 #define arch_atomic_sub(_i, _v)		arch_atomic_add(-(int)(_i), _v)
 #define arch_atomic_sub_return(_i, _v)	arch_atomic_add_return(-(int)(_i), _v)
 #define arch_atomic_fetch_sub(_i, _v)	arch_atomic_fetch_add(-(int)(_i), _v)
@@ -94,13 +124,13 @@ static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int n
 
 static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
 {
-	return __atomic64_read(v);
+	return __atomic64_read((long *)&v->counter);
 }
 #define arch_atomic64_read arch_atomic64_read
 
 static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
 {
-	__atomic64_set(v, i);
+	__atomic64_set((long *)&v->counter, i);
 }
 #define arch_atomic64_set arch_atomic64_set
 
@@ -122,6 +152,36 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
 }
 #define arch_atomic64_add arch_atomic64_add
 
+static __always_inline void arch_atomic64_inc(atomic64_t *v)
+{
+	__atomic64_add_const(1, (long *)&v->counter);
+}
+#define arch_atomic64_inc arch_atomic64_inc
+
+static __always_inline void arch_atomic64_dec(atomic64_t *v)
+{
+	__atomic64_add_const(-1, (long *)&v->counter);
+}
+#define arch_atomic64_dec arch_atomic64_dec
+
+static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
+{
+	return __atomic64_add_and_test_barrier(-i, (long *)&v->counter);
+}
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
+
+static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v)
+{
+	return __atomic64_add_const_and_test_barrier(-1, (long *)&v->counter);
+}
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
+
+static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v)
+{
+	return __atomic64_add_const_and_test_barrier(1, (long *)&v->counter);
+}
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
+
 static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
 {
 	return arch_xchg(&v->counter, new);
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index 1d6b2056fad8..585678bbcd7a 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -10,50 +10,51 @@
 
 #include <linux/limits.h>
 #include <asm/march.h>
+#include <asm/asm.h>
 
-static __always_inline int __atomic_read(const atomic_t *v)
+static __always_inline int __atomic_read(const int *ptr)
 {
-	int c;
+	int val;
 
 	asm volatile(
-		"	l	%[c],%[counter]\n"
-		: [c] "=d" (c) : [counter] "R" (v->counter));
-	return c;
+		"	l	%[val],%[ptr]\n"
+		: [val] "=d" (val) : [ptr] "R" (*ptr));
+	return val;
 }
 
-static __always_inline void __atomic_set(atomic_t *v, int i)
+static __always_inline void __atomic_set(int *ptr, int val)
 {
-	if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) {
+	if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) {
 		asm volatile(
-			"	mvhi	%[counter], %[i]\n"
-			: [counter] "=Q" (v->counter) : [i] "K" (i));
+			"	mvhi	%[ptr],%[val]\n"
+			: [ptr] "=Q" (*ptr) : [val] "K" (val));
 	} else {
 		asm volatile(
-			"	st	%[i],%[counter]\n"
-			: [counter] "=R" (v->counter) : [i] "d" (i));
+			"	st	%[val],%[ptr]\n"
+			: [ptr] "=R" (*ptr) : [val] "d" (val));
 	}
 }
 
-static __always_inline s64 __atomic64_read(const atomic64_t *v)
+static __always_inline long __atomic64_read(const long *ptr)
 {
-	s64 c;
+	long val;
 
 	asm volatile(
-		"	lg	%[c],%[counter]\n"
-		: [c] "=d" (c) : [counter] "RT" (v->counter));
-	return c;
+		"	lg	%[val],%[ptr]\n"
+		: [val] "=d" (val) : [ptr] "RT" (*ptr));
+	return val;
 }
 
-static __always_inline void __atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void __atomic64_set(long *ptr, long val)
 {
-	if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) {
+	if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) {
 		asm volatile(
-			"	mvghi	%[counter], %[i]\n"
-			: [counter] "=Q" (v->counter) : [i] "K" (i));
+			"	mvghi	%[ptr],%[val]\n"
+			: [ptr] "=Q" (*ptr) : [val] "K" (val));
 	} else {
 		asm volatile(
-			"	stg	%[i],%[counter]\n"
-			: [counter] "=RT" (v->counter) : [i] "d" (i));
+			"	stg	%[val],%[ptr]\n"
+			: [ptr] "=RT" (*ptr) : [val] "d" (val));
 	}
 }
 
@@ -73,7 +74,7 @@ static __always_inline op_type op_name(op_type val, op_type *ptr)	\
 }									\
 
 #define __ATOMIC_OPS(op_name, op_type, op_string)			\
-	__ATOMIC_OP(op_name, op_type, op_string, "\n")			\
+	__ATOMIC_OP(op_name, op_type, op_string, "")			\
 	__ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
 
 __ATOMIC_OPS(__atomic_add, int, "laa")
@@ -99,7 +100,7 @@ static __always_inline void op_name(op_type val, op_type *ptr)		\
 }
 
 #define __ATOMIC_CONST_OPS(op_name, op_type, op_string)			\
-	__ATOMIC_CONST_OP(op_name, op_type, op_string, "\n")		\
+	__ATOMIC_CONST_OP(op_name, op_type, op_string, "")		\
 	__ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
 
 __ATOMIC_CONST_OPS(__atomic_add_const, int, "asi")
@@ -169,4 +170,76 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
 
 #endif /* MARCH_HAS_Z196_FEATURES */
 
+#if defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__)
+
+#define __ATOMIC_TEST_OP(op_name, op_type, op_string, op_barrier)	\
+static __always_inline bool op_name(op_type val, op_type *ptr)		\
+{									\
+	op_type tmp;							\
+	int cc;								\
+									\
+	asm volatile(							\
+		op_string "	%[tmp],%[val],%[ptr]\n"			\
+		op_barrier						\
+		: "=@cc" (cc), [tmp] "=d" (tmp), [ptr] "+QS" (*ptr)	\
+		: [val] "d" (val)					\
+		: "memory");						\
+	return (cc == 0) || (cc == 2);					\
+}									\
+
+#define __ATOMIC_TEST_OPS(op_name, op_type, op_string)			\
+	__ATOMIC_TEST_OP(op_name, op_type, op_string, "")		\
+	__ATOMIC_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+
+__ATOMIC_TEST_OPS(__atomic_add_and_test, int, "laal")
+__ATOMIC_TEST_OPS(__atomic64_add_and_test, long, "laalg")
+
+#undef __ATOMIC_TEST_OPS
+#undef __ATOMIC_TEST_OP
+
+#define __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, op_barrier)	\
+static __always_inline bool op_name(op_type val, op_type *ptr)		\
+{									\
+	int cc;								\
+									\
+	asm volatile(							\
+		op_string "	%[ptr],%[val]\n"			\
+		op_barrier						\
+		: "=@cc" (cc), [ptr] "+QS" (*ptr)			\
+		: [val] "i" (val)					\
+		: "memory");						\
+	return (cc == 0) || (cc == 2);					\
+}
+
+#define __ATOMIC_CONST_TEST_OPS(op_name, op_type, op_string)		\
+	__ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, "")		\
+	__ATOMIC_CONST_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+
+__ATOMIC_CONST_TEST_OPS(__atomic_add_const_and_test, int, "alsi")
+__ATOMIC_CONST_TEST_OPS(__atomic64_add_const_and_test, long, "algsi")
+
+#undef __ATOMIC_CONST_TEST_OPS
+#undef __ATOMIC_CONST_TEST_OP
+
+#else /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */
+
+#define __ATOMIC_TEST_OP(op_name, op_func, op_type)			\
+static __always_inline bool op_name(op_type val, op_type *ptr)		\
+{									\
+	return op_func(val, ptr) == -val;				\
+}
+
+__ATOMIC_TEST_OP(__atomic_add_and_test,			__atomic_add,		int)
+__ATOMIC_TEST_OP(__atomic_add_and_test_barrier,		__atomic_add_barrier,	int)
+__ATOMIC_TEST_OP(__atomic_add_const_and_test,		__atomic_add,		int)
+__ATOMIC_TEST_OP(__atomic_add_const_and_test_barrier,	__atomic_add_barrier,	int)
+__ATOMIC_TEST_OP(__atomic64_add_and_test,		__atomic64_add,		long)
+__ATOMIC_TEST_OP(__atomic64_add_and_test_barrier,	__atomic64_add_barrier, long)
+__ATOMIC_TEST_OP(__atomic64_add_const_and_test,		__atomic64_add,		long)
+__ATOMIC_TEST_OP(__atomic64_add_const_and_test_barrier,	__atomic64_add_barrier,	long)
+
+#undef __ATOMIC_TEST_OP
+
+#endif /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */
+
 #endif /* __ARCH_S390_ATOMIC_OPS__  */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 54a079cd39ed..15aa64e3020e 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -36,184 +36,41 @@
 #include <linux/typecheck.h>
 #include <linux/compiler.h>
 #include <linux/types.h>
-#include <asm/atomic_ops.h>
-#include <asm/barrier.h>
-
-#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
-
-static inline unsigned long *
-__bitops_word(unsigned long nr, const volatile unsigned long *ptr)
-{
-	unsigned long addr;
-
-	addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3);
-	return (unsigned long *)addr;
-}
-
-static inline unsigned long __bitops_mask(unsigned long nr)
-{
-	return 1UL << (nr & (BITS_PER_LONG - 1));
-}
-
-static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-
-	__atomic64_or(mask, (long *)addr);
-}
-
-static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-
-	__atomic64_and(~mask, (long *)addr);
-}
-
-static __always_inline void arch_change_bit(unsigned long nr,
-					    volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-
-	__atomic64_xor(mask, (long *)addr);
-}
-
-static inline bool arch_test_and_set_bit(unsigned long nr,
-					 volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-	unsigned long old;
-
-	old = __atomic64_or_barrier(mask, (long *)addr);
-	return old & mask;
-}
-
-static inline bool arch_test_and_clear_bit(unsigned long nr,
-					   volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-	unsigned long old;
-
-	old = __atomic64_and_barrier(~mask, (long *)addr);
-	return old & mask;
-}
-
-static inline bool arch_test_and_change_bit(unsigned long nr,
-					    volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-	unsigned long old;
-
-	old = __atomic64_xor_barrier(mask, (long *)addr);
-	return old & mask;
-}
-
-static __always_inline void
-arch___set_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long *p = __bitops_word(nr, addr);
-	unsigned long mask = __bitops_mask(nr);
-
-	*p |= mask;
-}
-
-static __always_inline void
-arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long *p = __bitops_word(nr, addr);
-	unsigned long mask = __bitops_mask(nr);
-
-	*p &= ~mask;
-}
-
-static __always_inline void
-arch___change_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long *p = __bitops_word(nr, addr);
-	unsigned long mask = __bitops_mask(nr);
-
-	*p ^= mask;
-}
-
-static __always_inline bool
-arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long *p = __bitops_word(nr, addr);
-	unsigned long mask = __bitops_mask(nr);
-	unsigned long old;
-
-	old = *p;
-	*p |= mask;
-	return old & mask;
-}
-
-static __always_inline bool
-arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long *p = __bitops_word(nr, addr);
-	unsigned long mask = __bitops_mask(nr);
-	unsigned long old;
-
-	old = *p;
-	*p &= ~mask;
-	return old & mask;
-}
-
-static __always_inline bool
-arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
-{
-	unsigned long *p = __bitops_word(nr, addr);
-	unsigned long mask = __bitops_mask(nr);
-	unsigned long old;
-
-	old = *p;
-	*p ^= mask;
-	return old & mask;
-}
-
-#define arch_test_bit generic_test_bit
-#define arch_test_bit_acquire generic_test_bit_acquire
-
-static inline bool arch_test_and_set_bit_lock(unsigned long nr,
-					      volatile unsigned long *ptr)
-{
-	if (arch_test_bit(nr, ptr))
-		return true;
-	return arch_test_and_set_bit(nr, ptr);
-}
-
-static inline void arch_clear_bit_unlock(unsigned long nr,
-					 volatile unsigned long *ptr)
-{
-	smp_mb__before_atomic();
-	arch_clear_bit(nr, ptr);
-}
-
-static inline void arch___clear_bit_unlock(unsigned long nr,
-					   volatile unsigned long *ptr)
-{
-	smp_mb();
-	arch___clear_bit(nr, ptr);
-}
-
-static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
-		volatile unsigned long *ptr)
-{
-	unsigned long old;
-
-	old = __atomic64_xor_barrier(mask, (long *)ptr);
-	return old & BIT(7);
+#include <asm/asm.h>
+
+#define arch___set_bit			generic___set_bit
+#define arch___clear_bit		generic___clear_bit
+#define arch___change_bit		generic___change_bit
+#define arch___test_and_set_bit		generic___test_and_set_bit
+#define arch___test_and_clear_bit	generic___test_and_clear_bit
+#define arch___test_and_change_bit	generic___test_and_change_bit
+#define arch_test_bit_acquire		generic_test_bit_acquire
+
+static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsigned long *ptr)
+{
+#ifdef __HAVE_ASM_FLAG_OUTPUTS__
+	const volatile unsigned char *addr;
+	unsigned long mask;
+	int cc;
+
+	if (__builtin_constant_p(nr)) {
+		addr = (const volatile unsigned char *)ptr;
+		addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE;
+		mask = 1UL << (nr & (BITS_PER_BYTE - 1));
+		asm volatile(
+			"	tm	%[addr],%[mask]\n"
+			: "=@cc" (cc)
+			: [addr] "R" (*addr), [mask] "I" (mask)
+			);
+		return cc == 3;
+	}
+#endif
+	return generic_test_bit(nr, ptr);
 }
-#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
 
-#include <asm-generic/bitops/instrumented-atomic.h>
-#include <asm-generic/bitops/instrumented-non-atomic.h>
-#include <asm-generic/bitops/instrumented-lock.h>
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-instrumented-non-atomic.h>
+#include <asm-generic/bitops/lock.h>
 
 /*
  * Functions which use MSB0 bit numbering.
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 46f5c9660616..d86dea5900e7 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -25,7 +25,7 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum)
 
 	instrument_read(buff, len);
 	kmsan_check_memory(buff, len);
-	asm volatile("\n"
+	asm volatile(
 		"0:	cksm	%[sum],%[rp]\n"
 		"	jo	0b\n"
 		: [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory");
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 638137d46c85..a03f64033760 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -25,7 +25,7 @@ struct css_general_char {
 	u64 : 2;
 
 	u64 : 3;
-	u64 aif_osa : 1; /* bit 67 */
+	u64 aif_qdio : 1;/* bit 67 */
 	u64 : 12;
 	u64 eadm_rf : 1; /* bit 80 */
 	u64 : 1;
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index a7f7bdc9e19c..6375276d94ea 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -85,6 +85,10 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
 			 int area, debug_entry_t *entry,
 			 char *out_buf, size_t out_buf_size);
 
+#define DEBUG_SPRINTF_MAX_ARGS 10
+int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+			    char *out_buf, size_t out_buf_size,
+			    const char *inbuf);
 struct debug_view {
 	char name[DEBUG_MAX_NAME_LEN];
 	debug_prolog_proc_t *prolog_proc;
@@ -114,6 +118,9 @@ debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas,
 				  int buf_size, umode_t mode, uid_t uid,
 				  gid_t gid);
 
+ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
+		   char *buf, size_t buf_size, bool reverse);
+
 void debug_unregister(debug_info_t *id);
 
 void debug_set_level(debug_info_t *id, int new_level);
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index e1316e181230..5790630e31f0 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -36,8 +36,10 @@ enum diag_stat_enum {
 	DIAG_STAT_X2FC,
 	DIAG_STAT_X304,
 	DIAG_STAT_X308,
+	DIAG_STAT_X310,
 	DIAG_STAT_X318,
 	DIAG_STAT_X320,
+	DIAG_STAT_X324,
 	DIAG_STAT_X49C,
 	DIAG_STAT_X500,
 	NR_DIAG_STAT
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
index efb50fc6866c..7164cb658435 100644
--- a/arch/s390/include/asm/ebcdic.h
+++ b/arch/s390/include/asm/ebcdic.h
@@ -22,18 +22,18 @@ extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */
 static inline void
 codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr)
 {
-	if (nr-- <= 0)
+	if (!nr--)
 		return;
 	asm volatile(
-		"	bras	1,1f\n"
-		"	tr	0(1,%0),0(%2)\n"
-		"0:	tr	0(256,%0),0(%2)\n"
+		"	j	2f\n"
+		"0:	tr	0(1,%0),0(%2)\n"
+		"1:	tr	0(256,%0),0(%2)\n"
 		"	la	%0,256(%0)\n"
-		"1:	ahi	%1,-256\n"
-		"	jnm	0b\n"
-		"	ex	%1,0(1)"
+		"2:	aghi	%1,-256\n"
+		"	jnm	1b\n"
+		"	exrl	%1,0b"
 		: "+&a" (addr), "+&a" (nr)
-		: "a" (codepage) : "cc", "memory", "1");
+		: "a" (codepage) : "cc", "memory");
 }
 
 #define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr)
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h
index c1e2e521d9af..de510c9f6efa 100644
--- a/arch/s390/include/asm/fpu-insn.h
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -103,7 +103,7 @@ static inline void fpu_lfpc_safe(unsigned int *fpc)
 	u32 tmp;
 
 	instrument_read(fpc, sizeof(*fpc));
-	asm volatile("\n"
+	asm_inline volatile(
 		"0:	lfpc	%[fpc]\n"
 		"1:	nopr	%%r7\n"
 		".pushsection .fixup, \"ax\"\n"
@@ -188,7 +188,7 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3)
 static __always_inline void fpu_vl(u8 v1, const void *vxr)
 {
 	instrument_read(vxr, sizeof(__vector128));
-	asm volatile("\n"
+	asm volatile(
 		"	la	1,%[vxr]\n"
 		"	VL	%[v1],0,,1\n"
 		:
@@ -246,7 +246,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
 
 	size = min(index + 1, sizeof(__vector128));
 	instrument_read(vxr, size);
-	asm volatile("\n"
+	asm volatile(
 		"	la	1,%[vxr]\n"
 		"	VLL	%[v1],%[index],0,1\n"
 		:
@@ -284,7 +284,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
 	} *_v = (void *)(_vxrs);					\
 									\
 	instrument_read(_v, size);					\
-	asm volatile("\n"						\
+	asm volatile(							\
 		"	la	1,%[vxrs]\n"				\
 		"	VLM	%[v1],%[v3],0,1\n"			\
 		:							\
@@ -367,7 +367,7 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3)
 static __always_inline void fpu_vst(u8 v1, const void *vxr)
 {
 	instrument_write(vxr, sizeof(__vector128));
-	asm volatile("\n"
+	asm volatile(
 		"	la	1,%[vxr]\n"
 		"	VST	%[v1],0,,1\n"
 		: [vxr] "=R" (*(__vector128 *)vxr)
@@ -396,7 +396,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
 
 	size = min(index + 1, sizeof(__vector128));
 	instrument_write(vxr, size);
-	asm volatile("\n"
+	asm volatile(
 		"	la	1,%[vxr]\n"
 		"	VSTL	%[v1],%[index],0,1\n"
 		: [vxr] "=R" (*(u8 *)vxr)
@@ -430,7 +430,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
 	} *_v = (void *)(_vxrs);					\
 									\
 	instrument_write(_v, size);					\
-	asm volatile("\n"						\
+	asm volatile(							\
 		"	la	1,%[vxrs]\n"				\
 		"	VSTM	%[v1],%[v3],0,1\n"			\
 		: [vxrs] "=R" (*_v)					\
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index eaeaeb3ff0be..752a2310f0d6 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -44,7 +44,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
 		break;
 	case FUTEX_OP_ANDN:
 		__futex_atomic_op("lr %2,%1\nnr %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
+				  ret, oldval, newval, uaddr, ~oparg);
 		break;
 	case FUTEX_OP_XOR:
 		__futex_atomic_op("lr %2,%1\nxr %2,%5\n",
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index a40664b236e9..7c52acaf9f82 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -20,12 +20,13 @@
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t pte, unsigned long sz);
 void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-		     pte_t *ptep, pte_t pte);
+		       pte_t *ptep, pte_t pte);
+
 #define __HAVE_ARCH_HUGE_PTEP_GET
-extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+
 #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
-extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-			      unsigned long addr, pte_t *ptep);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 
 static inline void arch_clear_hugetlb_flags(struct folio *folio)
 {
@@ -56,6 +57,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     pte_t pte, int dirty)
 {
 	int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte);
+
 	if (changed) {
 		huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
 		__set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
@@ -68,19 +70,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
 	pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
-	__set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
-}
 
-#define __HAVE_ARCH_HUGE_PTE_NONE
-static inline int huge_pte_none(pte_t pte)
-{
-	return pte_none(pte);
-}
-
-#define __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY
-static inline int huge_pte_none_mostly(pte_t pte)
-{
-	return huge_pte_none(pte) || is_pte_marker(pte);
+	__set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
 }
 
 #define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index 08fcbd628120..794fdb21500a 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -7,7 +7,6 @@
 #ifndef PAGE_STATES_H
 #define PAGE_STATES_H
 
-#include <asm/sections.h>
 #include <asm/page.h>
 
 #define ESSA_GET_STATE			0
@@ -21,7 +20,7 @@
 
 #define ESSA_MAX	ESSA_SET_STABLE_NODAT
 
-extern int __bootdata_preserved(cmma_flag);
+extern int cmma_flag;
 
 static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd)
 {
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 48268095b0a3..a3b51056a177 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -17,7 +17,6 @@
 #include <linux/page-flags.h>
 #include <linux/radix-tree.h>
 #include <linux/atomic.h>
-#include <asm/sections.h>
 #include <asm/ctlreg.h>
 #include <asm/bug.h>
 #include <asm/page.h>
@@ -35,7 +34,7 @@ enum {
 	PG_DIRECT_MAP_MAX
 };
 
-extern atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
+extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
 
 static inline void update_page_count(int level, long count)
 {
@@ -85,14 +84,14 @@ extern unsigned long zero_page_mask;
  * happen without trampolines and in addition the placement within a
  * 2GB frame is branch prediction unit friendly.
  */
-extern unsigned long __bootdata_preserved(VMALLOC_START);
-extern unsigned long __bootdata_preserved(VMALLOC_END);
+extern unsigned long VMALLOC_START;
+extern unsigned long VMALLOC_END;
 #define VMALLOC_DEFAULT_SIZE	((512UL << 30) - MODULES_LEN)
-extern struct page *__bootdata_preserved(vmemmap);
-extern unsigned long __bootdata_preserved(vmemmap_size);
+extern struct page *vmemmap;
+extern unsigned long vmemmap_size;
 
-extern unsigned long __bootdata_preserved(MODULES_VADDR);
-extern unsigned long __bootdata_preserved(MODULES_END);
+extern unsigned long MODULES_VADDR;
+extern unsigned long MODULES_END;
 #define MODULES_VADDR	MODULES_VADDR
 #define MODULES_END	MODULES_END
 #define MODULES_LEN	(1UL << 31)
@@ -125,6 +124,8 @@ static inline int is_module_addr(void *addr)
 #define KASLR_LEN	0UL
 #endif
 
+void setup_protection_map(void);
+
 /*
  * A 64 bit pagetable entry of S390 has following format:
  * |			 PFRA			      |0IPC|  OS  |
@@ -443,98 +444,107 @@ static inline int is_module_addr(void *addr)
 /*
  * Page protection definitions.
  */
-#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RO		__pgprot(_PAGE_PRESENT | _PAGE_READ | \
+#define __PAGE_NONE		(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
+#define __PAGE_RO		(_PAGE_PRESENT | _PAGE_READ | \
 				 _PAGE_NOEXEC  | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RX		__pgprot(_PAGE_PRESENT | _PAGE_READ | \
+#define __PAGE_RX		(_PAGE_PRESENT | _PAGE_READ | \
 				 _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RW		__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_RW		(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 				 _PAGE_NOEXEC  | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RWX	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_RWX		(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 				 _PAGE_INVALID | _PAGE_PROTECT)
-
-#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_SHARED		(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 				 _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
-#define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_KERNEL		(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 				 _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
-#define PAGE_KERNEL_RO	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+#define __PAGE_KERNEL_RO	(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
 				 _PAGE_PROTECT | _PAGE_NOEXEC)
-#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
-				  _PAGE_YOUNG |	_PAGE_DIRTY)
 
-/*
- * On s390 the page table entry has an invalid bit and a read-only bit.
- * Read permission implies execute permission and write permission
- * implies read permission.
- */
-         /*xwr*/
+extern unsigned long page_noexec_mask;
+
+#define __pgprot_page_mask(x)	__pgprot((x) & page_noexec_mask)
+
+#define PAGE_NONE		__pgprot_page_mask(__PAGE_NONE)
+#define PAGE_RO			__pgprot_page_mask(__PAGE_RO)
+#define PAGE_RX			__pgprot_page_mask(__PAGE_RX)
+#define PAGE_RW			__pgprot_page_mask(__PAGE_RW)
+#define PAGE_RWX		__pgprot_page_mask(__PAGE_RWX)
+#define PAGE_SHARED		__pgprot_page_mask(__PAGE_SHARED)
+#define PAGE_KERNEL		__pgprot_page_mask(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO		__pgprot_page_mask(__PAGE_KERNEL_RO)
 
 /*
  * Segment entry (large page) protection definitions.
  */
-#define SEGMENT_NONE	__pgprot(_SEGMENT_ENTRY_PRESENT | \
+#define __SEGMENT_NONE		(_SEGMENT_ENTRY_PRESENT | \
 				 _SEGMENT_ENTRY_INVALID | \
 				 _SEGMENT_ENTRY_PROTECT)
-#define SEGMENT_RO	__pgprot(_SEGMENT_ENTRY_PRESENT | \
+#define __SEGMENT_RO		(_SEGMENT_ENTRY_PRESENT | \
 				 _SEGMENT_ENTRY_PROTECT | \
 				 _SEGMENT_ENTRY_READ | \
 				 _SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_RX	__pgprot(_SEGMENT_ENTRY_PRESENT | \
+#define __SEGMENT_RX		(_SEGMENT_ENTRY_PRESENT | \
 				 _SEGMENT_ENTRY_PROTECT | \
 				 _SEGMENT_ENTRY_READ)
-#define SEGMENT_RW	__pgprot(_SEGMENT_ENTRY_PRESENT | \
+#define __SEGMENT_RW		(_SEGMENT_ENTRY_PRESENT | \
 				 _SEGMENT_ENTRY_READ | \
 				 _SEGMENT_ENTRY_WRITE | \
 				 _SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_RWX	__pgprot(_SEGMENT_ENTRY_PRESENT | \
+#define __SEGMENT_RWX		(_SEGMENT_ENTRY_PRESENT | \
 				 _SEGMENT_ENTRY_READ | \
 				 _SEGMENT_ENTRY_WRITE)
-#define SEGMENT_KERNEL	__pgprot(_SEGMENT_ENTRY |	\
+#define __SEGMENT_KERNEL	(_SEGMENT_ENTRY |	\
 				 _SEGMENT_ENTRY_LARGE |	\
 				 _SEGMENT_ENTRY_READ |	\
 				 _SEGMENT_ENTRY_WRITE | \
 				 _SEGMENT_ENTRY_YOUNG | \
 				 _SEGMENT_ENTRY_DIRTY | \
 				 _SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY |	\
+#define __SEGMENT_KERNEL_RO	(_SEGMENT_ENTRY |	\
 				 _SEGMENT_ENTRY_LARGE |	\
 				 _SEGMENT_ENTRY_READ |	\
 				 _SEGMENT_ENTRY_YOUNG |	\
 				 _SEGMENT_ENTRY_PROTECT | \
 				 _SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_KERNEL_EXEC __pgprot(_SEGMENT_ENTRY |	\
-				 _SEGMENT_ENTRY_LARGE |	\
-				 _SEGMENT_ENTRY_READ |	\
-				 _SEGMENT_ENTRY_WRITE | \
-				 _SEGMENT_ENTRY_YOUNG |	\
-				 _SEGMENT_ENTRY_DIRTY)
+
+extern unsigned long segment_noexec_mask;
+
+#define __pgprot_segment_mask(x) __pgprot((x) & segment_noexec_mask)
+
+#define SEGMENT_NONE		__pgprot_segment_mask(__SEGMENT_NONE)
+#define SEGMENT_RO		__pgprot_segment_mask(__SEGMENT_RO)
+#define SEGMENT_RX		__pgprot_segment_mask(__SEGMENT_RX)
+#define SEGMENT_RW		__pgprot_segment_mask(__SEGMENT_RW)
+#define SEGMENT_RWX		__pgprot_segment_mask(__SEGMENT_RWX)
+#define SEGMENT_KERNEL		__pgprot_segment_mask(__SEGMENT_KERNEL)
+#define SEGMENT_KERNEL_RO	__pgprot_segment_mask(__SEGMENT_KERNEL_RO)
 
 /*
  * Region3 entry (large page) protection definitions.
  */
 
-#define REGION3_KERNEL	__pgprot(_REGION_ENTRY_TYPE_R3 | \
+#define __REGION3_KERNEL	(_REGION_ENTRY_TYPE_R3 | \
 				 _REGION3_ENTRY_PRESENT | \
-				 _REGION3_ENTRY_LARGE |	 \
-				 _REGION3_ENTRY_READ |	 \
-				 _REGION3_ENTRY_WRITE |	 \
-				 _REGION3_ENTRY_YOUNG |	 \
+				 _REGION3_ENTRY_LARGE | \
+				 _REGION3_ENTRY_READ | \
+				 _REGION3_ENTRY_WRITE | \
+				 _REGION3_ENTRY_YOUNG | \
 				 _REGION3_ENTRY_DIRTY | \
 				 _REGION_ENTRY_NOEXEC)
-#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
-				   _REGION3_ENTRY_PRESENT | \
-				   _REGION3_ENTRY_LARGE |  \
-				   _REGION3_ENTRY_READ |   \
-				   _REGION3_ENTRY_YOUNG |  \
-				   _REGION_ENTRY_PROTECT | \
-				   _REGION_ENTRY_NOEXEC)
-#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \
+#define __REGION3_KERNEL_RO	(_REGION_ENTRY_TYPE_R3 | \
 				 _REGION3_ENTRY_PRESENT | \
-				 _REGION3_ENTRY_LARGE |	 \
-				 _REGION3_ENTRY_READ |	 \
-				 _REGION3_ENTRY_WRITE |	 \
-				 _REGION3_ENTRY_YOUNG |	 \
-				 _REGION3_ENTRY_DIRTY)
+				 _REGION3_ENTRY_LARGE | \
+				 _REGION3_ENTRY_READ | \
+				 _REGION3_ENTRY_YOUNG | \
+				 _REGION_ENTRY_PROTECT | \
+				 _REGION_ENTRY_NOEXEC)
+
+extern unsigned long region_noexec_mask;
+
+#define __pgprot_region_mask(x)	__pgprot((x) & region_noexec_mask)
+
+#define REGION3_KERNEL		__pgprot_region_mask(__REGION3_KERNEL)
+#define REGION3_KERNEL_RO	__pgprot_region_mask(__REGION3_KERNEL_RO)
 
 static inline bool mm_p4d_folded(struct mm_struct *mm)
 {
@@ -1435,8 +1445,6 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 	pte_t __pte;
 
 	__pte = __pte(physpage | pgprot_val(pgprot));
-	if (!MACHINE_HAS_NX)
-		__pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC));
 	return pte_mkyoung(__pte);
 }
 
@@ -1804,8 +1812,6 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			      pmd_t *pmdp, pmd_t entry)
 {
-	if (!MACHINE_HAS_NX)
-		entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
 	set_pmd(pmdp, entry);
 }
 
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index 2c29bdf12127..6ccd033acfe5 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -8,12 +8,19 @@
 #include <asm/cmpxchg.h>
 #include <asm/march.h>
 
-#ifdef MARCH_HAS_Z196_FEATURES
-
 /* We use the MSB mostly because its available */
 #define PREEMPT_NEED_RESCHED	0x80000000
+
+/*
+ * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
+ * that a decrement hitting 0 means we can and should reschedule.
+ */
 #define PREEMPT_ENABLED	(0 + PREEMPT_NEED_RESCHED)
 
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
 static __always_inline int preempt_count(void)
 {
 	return READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED;
@@ -29,6 +36,15 @@ static __always_inline void preempt_count_set(int pc)
 	} while (!arch_try_cmpxchg(&get_lowcore()->preempt_count, &old, new));
 }
 
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * short instruction sequence.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
 static __always_inline void set_preempt_need_resched(void)
 {
 	__atomic_and(~PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
@@ -64,67 +80,24 @@ static __always_inline void __preempt_count_sub(int val)
 	__preempt_count_add(-val);
 }
 
+/*
+ * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule
+ * a decrement which hits zero means we have no preempt_count and should
+ * reschedule.
+ */
 static __always_inline bool __preempt_count_dec_and_test(void)
 {
-	return __atomic_add(-1, &get_lowcore()->preempt_count) == 1;
-}
-
-static __always_inline bool should_resched(int preempt_offset)
-{
-	return unlikely(READ_ONCE(get_lowcore()->preempt_count) ==
-			preempt_offset);
-}
-
-#else /* MARCH_HAS_Z196_FEATURES */
-
-#define PREEMPT_ENABLED	(0)
-
-static __always_inline int preempt_count(void)
-{
-	return READ_ONCE(get_lowcore()->preempt_count);
-}
-
-static __always_inline void preempt_count_set(int pc)
-{
-	get_lowcore()->preempt_count = pc;
-}
-
-static __always_inline void set_preempt_need_resched(void)
-{
-}
-
-static __always_inline void clear_preempt_need_resched(void)
-{
-}
-
-static __always_inline bool test_preempt_need_resched(void)
-{
-	return false;
-}
-
-static __always_inline void __preempt_count_add(int val)
-{
-	get_lowcore()->preempt_count += val;
-}
-
-static __always_inline void __preempt_count_sub(int val)
-{
-	get_lowcore()->preempt_count -= val;
-}
-
-static __always_inline bool __preempt_count_dec_and_test(void)
-{
-	return !--get_lowcore()->preempt_count && tif_need_resched();
+	return __atomic_add_const_and_test(-1, &get_lowcore()->preempt_count);
 }
 
+/*
+ * Returns true when we need to resched and can (barring IRQ state).
+ */
 static __always_inline bool should_resched(int preempt_offset)
 {
-	return unlikely(preempt_count() == preempt_offset &&
-			tif_need_resched());
+	return unlikely(READ_ONCE(get_lowcore()->preempt_count) == preempt_offset);
 }
 
-#endif /* MARCH_HAS_Z196_FEATURES */
-
 #define init_task_preempt_count(p)	do { } while (0)
 /* Deferred to CPU bringup time */
 #define init_idle_preempt_count(p, cpu)	do { } while (0)
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 8761fd01a9f0..4f8d5592c298 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -163,8 +163,7 @@ static __always_inline void __stackleak_poison(unsigned long erase_low,
 		"	la	%[addr],256(%[addr])\n"
 		"	brctg	%[tmp],0b\n"
 		"1:	stg	%[poison],0(%[addr])\n"
-		"	larl	%[tmp],3f\n"
-		"	ex	%[count],0(%[tmp])\n"
+		"	exrl	%[count],3f\n"
 		"	j	4f\n"
 		"2:	stg	%[poison],0(%[addr])\n"
 		"	j	4f\n"
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index eb00fa1771da..4da3b2956285 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -16,6 +16,11 @@
 /* 24 + 16 * SCLP_MAX_CORES */
 #define EXT_SCCB_READ_CPU	(3 * PAGE_SIZE)
 
+#define SCLP_ERRNOTIFY_AQ_RESET			0
+#define SCLP_ERRNOTIFY_AQ_REPAIR		1
+#define SCLP_ERRNOTIFY_AQ_INFO_LOG		2
+#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA		3
+
 #ifndef __ASSEMBLY__
 #include <linux/uio.h>
 #include <asm/chpid.h>
@@ -87,8 +92,10 @@ struct sclp_info {
 	unsigned char has_kss : 1;
 	unsigned char has_diag204_bif : 1;
 	unsigned char has_gisaf : 1;
+	unsigned char has_diag310 : 1;
 	unsigned char has_diag318 : 1;
 	unsigned char has_diag320 : 1;
+	unsigned char has_diag324 : 1;
 	unsigned char has_sipl : 1;
 	unsigned char has_sipl_eckd : 1;
 	unsigned char has_dirq : 1;
@@ -111,6 +118,34 @@ struct sclp_info {
 };
 extern struct sclp_info sclp;
 
+struct sccb_header {
+	u16	length;
+	u8	function_code;
+	u8	control_mask[3];
+	u16	response_code;
+} __packed;
+
+struct evbuf_header {
+	u16	length;
+	u8	type;
+	u8	flags;
+	u16	_reserved;
+} __packed;
+
+struct err_notify_evbuf {
+	struct evbuf_header header;
+	u8 action;
+	u8 atype;
+	u32 fh;
+	u32 fid;
+	u8 data[];
+} __packed;
+
+struct err_notify_sccb {
+	struct sccb_header header;
+	struct err_notify_evbuf evbuf;
+} __packed;
+
 struct zpci_report_error_header {
 	u8 version;	/* Interface version byte */
 	u8 action;	/* Action qualifier byte
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index e95b2c8081eb..ea150ea83e57 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -140,11 +140,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 {
 	if (mm_pud_folded(tlb->mm))
 		return;
+	__tlb_adjust_range(tlb, address, PAGE_SIZE);
 	tlb->mm->context.flush_mm = 1;
 	tlb->freed_tables = 1;
 	tlb->cleared_p4ds = 1;
 	tlb_remove_ptdesc(tlb, pud);
 }
 
-
 #endif /* _S390_TLB_H */
diff --git a/arch/s390/include/uapi/asm/diag.h b/arch/s390/include/uapi/asm/diag.h
new file mode 100644
index 000000000000..b7e6ccb4ff6e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/diag.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Diag ioctls and its associated structures definitions.
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#ifndef __S390_UAPI_ASM_DIAG_H
+#define __S390_UAPI_ASM_DIAG_H
+
+#include <linux/types.h>
+
+#define DIAG_MAGIC_STR 'D'
+
+struct diag324_pib {
+	__u64 address;
+	__u64 sequence;
+};
+
+struct diag310_memtop {
+	__u64 address;
+	__u64 nesting_lvl;
+};
+
+/* Diag ioctl definitions */
+#define DIAG324_GET_PIBBUF	_IOWR(DIAG_MAGIC_STR, 0x77, struct diag324_pib)
+#define DIAG324_GET_PIBLEN	_IOR(DIAG_MAGIC_STR, 0x78, size_t)
+#define DIAG310_GET_STRIDE	_IOR(DIAG_MAGIC_STR, 0x79, size_t)
+#define DIAG310_GET_MEMTOPLEN	_IOWR(DIAG_MAGIC_STR, 0x7a, size_t)
+#define DIAG310_GET_MEMTOPBUF	_IOWR(DIAG_MAGIC_STR, 0x7b, struct diag310_memtop)
+
+#endif /* __S390_UAPI_ASM_DIAG_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 48caae8c7e10..db5f3a3faefb 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -38,12 +38,13 @@ CFLAGS_unwind_bc.o	+= -fno-optimize-sibling-calls
 
 obj-y	:= head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
+obj-y	+= debug.o irq.o ipl.o dis.o vdso.o cpufeature.o
 obj-y	+= sysinfo.o lgr.o os_info.o ctlreg.o
 obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
 obj-y	+= entry.o reipl.o kdebugfs.o alternative.o
 obj-y	+= nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
 obj-y	+= smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
+obj-y	+= diag/
 
 extra-y				+= vmlinux.lds
 
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
index 09cd24cbe74e..88f0b91d7a73 100644
--- a/arch/s390/kernel/abs_lowcore.c
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -2,6 +2,7 @@
 
 #include <linux/pgtable.h>
 #include <asm/abs_lowcore.h>
+#include <asm/sections.h>
 
 unsigned long __bootdata_preserved(__abs_lowcore);
 int __bootdata_preserved(relocate_lowcore);
diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c
index c8575dbc890d..4b9b34f95d72 100644
--- a/arch/s390/kernel/cpacf.c
+++ b/arch/s390/kernel/cpacf.c
@@ -14,7 +14,7 @@
 #define CPACF_QUERY(name, instruction)						\
 static ssize_t name##_query_raw_read(struct file *fp,				\
 				     struct kobject *kobj,			\
-				     struct bin_attribute *attr,		\
+				     const struct bin_attribute *attr,		\
 				     char *buf, loff_t offs,			\
 				     size_t count)				\
 {										\
@@ -24,7 +24,7 @@ static ssize_t name##_query_raw_read(struct file *fp,				\
 		return -EOPNOTSUPP;						\
 	return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask));	\
 }										\
-static BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t))
+static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t))
 
 CPACF_QUERY(km, KM);
 CPACF_QUERY(kmc, KMC);
@@ -40,20 +40,20 @@ CPACF_QUERY(prno, PRNO);
 CPACF_QUERY(kma, KMA);
 CPACF_QUERY(kdsa, KDSA);
 
-#define CPACF_QAI(name, instruction)				\
-static ssize_t name##_query_auth_info_raw_read(			\
-	struct file *fp, struct kobject *kobj,			\
-	struct bin_attribute *attr, char *buf, loff_t offs,	\
-	size_t count)						\
-{								\
-	cpacf_qai_t qai;					\
-								\
-	if (!cpacf_qai(CPACF_##instruction, &qai))		\
-		return -EOPNOTSUPP;				\
-	return memory_read_from_buffer(buf, count, &offs, &qai, \
-					sizeof(qai));		\
-}								\
-static BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t))
+#define CPACF_QAI(name, instruction)					\
+static ssize_t name##_query_auth_info_raw_read(				\
+	struct file *fp, struct kobject *kobj,				\
+	const struct bin_attribute *attr, char *buf, loff_t offs,	\
+	size_t count)							\
+{									\
+	cpacf_qai_t qai;						\
+									\
+	if (!cpacf_qai(CPACF_##instruction, &qai))			\
+		return -EOPNOTSUPP;					\
+	return memory_read_from_buffer(buf, count, &offs, &qai,		\
+					sizeof(qai));			\
+}									\
+static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t))
 
 CPACF_QAI(km, KM);
 CPACF_QAI(kmc, KMC);
@@ -69,7 +69,7 @@ CPACF_QAI(prno, PRNO);
 CPACF_QAI(kma, KMA);
 CPACF_QAI(kdsa, KDSA);
 
-static struct bin_attribute *cpacf_attrs[] = {
+static const struct bin_attribute *const cpacf_attrs[] = {
 	&bin_attr_km_query_raw,
 	&bin_attr_kmc_query_raw,
 	&bin_attr_kimd_query_raw,
@@ -101,7 +101,7 @@ static struct bin_attribute *cpacf_attrs[] = {
 
 static const struct attribute_group cpacf_attr_grp = {
 	.name = "cpacf",
-	.bin_attrs = cpacf_attrs,
+	.bin_attrs_new = cpacf_attrs,
 };
 
 static int __init cpacf_init(void)
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index de19fd8a6a95..ba6b7329a10e 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/math.h>
 #include <linux/minmax.h>
 #include <linux/debugfs.h>
 
@@ -94,9 +95,6 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
 static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
 				     char *out_buf, size_t out_buf_size,
 				     const char *in_buf);
-static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
-				   char *out_buf, size_t out_buf_size,
-				   const char *inbuf);
 static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
 static void debug_events_append(debug_info_t *dest, debug_info_t *src);
 
@@ -354,7 +352,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode)
 	for (i = 0; i < in->nr_areas; i++) {
 		for (j = 0; j < in->pages_per_area; j++)
 			memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE);
+		rc->active_pages[i] = in->active_pages[i];
+		rc->active_entries[i] = in->active_entries[i];
 	}
+	rc->active_area = in->active_area;
 out:
 	spin_unlock_irqrestore(&in->lock, flags);
 	return rc;
@@ -422,11 +423,17 @@ out:
 	return len;
 }
 
-/*
- * debug_next_entry:
- * - goto next entry in p_info
+/**
+ * debug_next_entry - Go to the next entry
+ * @p_info:	Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the next entry. If no further entry
+ * exists the current position is set to one after the end the return value
+ * indicates that no further entries exist.
+ *
+ * Return: True if there are more following entries, false otherwise
  */
-static inline int debug_next_entry(file_private_info_t *p_info)
+static inline bool debug_next_entry(file_private_info_t *p_info)
 {
 	debug_info_t *id;
 
@@ -434,10 +441,10 @@ static inline int debug_next_entry(file_private_info_t *p_info)
 	if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
 		p_info->act_entry = 0;
 		p_info->act_page  = 0;
-		goto out;
+		return true;
 	}
 	if (!id->areas)
-		return 1;
+		return false;
 	p_info->act_entry += id->entry_size;
 	/* switch to next page, if we reached the end of the page  */
 	if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) {
@@ -450,10 +457,87 @@ static inline int debug_next_entry(file_private_info_t *p_info)
 			p_info->act_page = 0;
 		}
 		if (p_info->act_area >= id->nr_areas)
-			return 1;
+			return false;
 	}
-out:
-	return 0;
+	return true;
+}
+
+/**
+ * debug_to_act_entry - Go to the currently active entry
+ * @p_info:	Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the currently active
+ * entry of @p_info->debug_info_snap
+ */
+static void debug_to_act_entry(file_private_info_t *p_info)
+{
+	debug_info_t *snap_id;
+
+	snap_id = p_info->debug_info_snap;
+	p_info->act_area = snap_id->active_area;
+	p_info->act_page = snap_id->active_pages[snap_id->active_area];
+	p_info->act_entry = snap_id->active_entries[snap_id->active_area];
+}
+
+/**
+ * debug_prev_entry - Go to the previous entry
+ * @p_info:	Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the previous entry. If no previous entry
+ * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value
+ * indicates that no previous entries exist.
+ *
+ * Return: True if there are more previous entries, false otherwise
+ */
+
+static inline bool debug_prev_entry(file_private_info_t *p_info)
+{
+	debug_info_t *id;
+
+	id = p_info->debug_info_snap;
+	if (p_info->act_entry == DEBUG_PROLOG_ENTRY)
+		debug_to_act_entry(p_info);
+	if (!id->areas)
+		return false;
+	p_info->act_entry -= id->entry_size;
+	/* switch to prev page, if we reached the beginning of the page  */
+	if (p_info->act_entry < 0) {
+		/* end of previous page */
+		p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size;
+		p_info->act_page--;
+		if (p_info->act_page < 0) {
+			/* previous area */
+			p_info->act_area--;
+			p_info->act_page = id->pages_per_area - 1;
+		}
+		if (p_info->act_area < 0)
+			p_info->act_area = (id->nr_areas - 1) % id->nr_areas;
+	}
+	/* check full circle */
+	if (id->active_area == p_info->act_area &&
+	    id->active_pages[id->active_area] == p_info->act_page &&
+	    id->active_entries[id->active_area] == p_info->act_entry)
+		return false;
+	return true;
+}
+
+/**
+ * debug_move_entry - Go to next entry in either the forward or backward direction
+ * @p_info:	Private info that is manipulated
+ * @reverse:	If true go to the next entry in reverse i.e. previous
+ *
+ * Sets the current position in @p_info to the next (@reverse == false) or
+ * previous (@reverse == true) entry.
+ *
+ * Return: True if there are further entries in that direction,
+ * false otherwise.
+ */
+static bool debug_move_entry(file_private_info_t *p_info, bool reverse)
+{
+	if (reverse)
+		return debug_prev_entry(p_info);
+	else
+		return debug_next_entry(p_info);
 }
 
 /*
@@ -495,7 +579,7 @@ static ssize_t debug_output(struct file *file,		/* file descriptor */
 		}
 		if (copy_size == formatted_line_residue) {
 			entry_offset = 0;
-			if (debug_next_entry(p_info))
+			if (!debug_next_entry(p_info))
 				goto out;
 		}
 	}
@@ -530,6 +614,42 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
 	return rc; /* number of input characters */
 }
 
+static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info,
+						     struct debug_view *view)
+{
+	debug_info_t *debug_info_snapshot;
+	file_private_info_t *p_info;
+
+	/*
+	 * Make snapshot of current debug areas to get it consistent.
+	 * To copy all the areas is only needed, if we have a view which
+	 * formats the debug areas.
+	 */
+	if (!view->format_proc && !view->header_proc)
+		debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
+	else
+		debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
+
+	if (!debug_info_snapshot)
+		return NULL;
+	p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+	if (!p_info) {
+		debug_info_free(debug_info_snapshot);
+		return NULL;
+	}
+	p_info->offset = 0;
+	p_info->debug_info_snap = debug_info_snapshot;
+	p_info->debug_info_org	= debug_info;
+	p_info->view = view;
+	p_info->act_area = 0;
+	p_info->act_page = 0;
+	p_info->act_entry = DEBUG_PROLOG_ENTRY;
+	p_info->act_entry_offset = 0;
+	debug_info_get(debug_info);
+
+	return p_info;
+}
+
 /*
  * debug_open:
  * - called for user open()
@@ -538,7 +658,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
  */
 static int debug_open(struct inode *inode, struct file *file)
 {
-	debug_info_t *debug_info, *debug_info_snapshot;
+	debug_info_t *debug_info;
 	file_private_info_t *p_info;
 	int i, rc = 0;
 
@@ -556,42 +676,26 @@ static int debug_open(struct inode *inode, struct file *file)
 	goto out;
 
 found:
-
-	/* Make snapshot of current debug areas to get it consistent.	  */
-	/* To copy all the areas is only needed, if we have a view which  */
-	/* formats the debug areas. */
-
-	if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc)
-		debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
-	else
-		debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
-
-	if (!debug_info_snapshot) {
-		rc = -ENOMEM;
-		goto out;
-	}
-	p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+	p_info = debug_file_private_alloc(debug_info, debug_info->views[i]);
 	if (!p_info) {
-		debug_info_free(debug_info_snapshot);
 		rc = -ENOMEM;
 		goto out;
 	}
-	p_info->offset = 0;
-	p_info->debug_info_snap = debug_info_snapshot;
-	p_info->debug_info_org	= debug_info;
-	p_info->view = debug_info->views[i];
-	p_info->act_area = 0;
-	p_info->act_page = 0;
-	p_info->act_entry = DEBUG_PROLOG_ENTRY;
-	p_info->act_entry_offset = 0;
 	file->private_data = p_info;
-	debug_info_get(debug_info);
 	nonseekable_open(inode, file);
 out:
 	mutex_unlock(&debug_mutex);
 	return rc;
 }
 
+static void debug_file_private_free(file_private_info_t *p_info)
+{
+	if (p_info->debug_info_snap)
+		debug_info_free(p_info->debug_info_snap);
+	debug_info_put(p_info->debug_info_org);
+	kfree(p_info);
+}
+
 /*
  * debug_close:
  * - called for user close()
@@ -602,13 +706,59 @@ static int debug_close(struct inode *inode, struct file *file)
 	file_private_info_t *p_info;
 
 	p_info = (file_private_info_t *) file->private_data;
-	if (p_info->debug_info_snap)
-		debug_info_free(p_info->debug_info_snap);
-	debug_info_put(p_info->debug_info_org);
-	kfree(file->private_data);
+	debug_file_private_free(p_info);
+	file->private_data = NULL;
 	return 0; /* success */
 }
 
+/**
+ * debug_dump - Get a textual representation of debug info, or as much as fits
+ * @id:		Debug information to use
+ * @view:	View with which to dump the debug information
+ * @buf:	Buffer the textual debug data representation is written to
+ * @buf_size:	Size of the buffer, including the trailing '\0' byte
+ * @reverse:	Go backwards from the last written entry
+ *
+ * This function may be used whenever a textual representation of the debug
+ * information is required without using an s390dbf file.
+ *
+ * Note: It is the callers responsibility to supply a view that is compatible
+ * with the debug information data.
+ *
+ * Return: On success returns the number of bytes written to the buffer not
+ * including the trailing '\0' byte. If bug_size == 0 the function returns 0.
+ * On failure an error code less than 0 is returned.
+ */
+ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
+		   char *buf, size_t buf_size, bool reverse)
+{
+	file_private_info_t *p_info;
+	size_t size, offset = 0;
+
+	/* Need space for '\0' byte */
+	if (buf_size < 1)
+		return 0;
+	buf_size--;
+
+	p_info = debug_file_private_alloc(id, view);
+	if (!p_info)
+		return -ENOMEM;
+
+	/* There is always at least the DEBUG_PROLOG_ENTRY */
+	do {
+		size = debug_format_entry(p_info);
+		size = min(size, buf_size - offset);
+		memcpy(buf + offset, p_info->temp_buf, size);
+		offset += size;
+		if (offset >= buf_size)
+			break;
+	} while (debug_move_entry(p_info, reverse));
+	debug_file_private_free(p_info);
+	buf[offset] = '\0';
+
+	return offset;
+}
+
 /* Create debugfs entries and add to internal list. */
 static void _debug_register(debug_info_t *id)
 {
@@ -1532,8 +1682,8 @@ EXPORT_SYMBOL(debug_dflt_header_fn);
 
 #define DEBUG_SPRINTF_MAX_ARGS 10
 
-static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
-				   char *out_buf, size_t out_buf_size, const char *inbuf)
+int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+			    char *out_buf, size_t out_buf_size, const char *inbuf)
 {
 	debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf;
 	int num_longs, num_used_args = 0, i, rc = 0;
@@ -1570,6 +1720,7 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
 out:
 	return rc;
 }
+EXPORT_SYMBOL(debug_sprintf_format_fn);
 
 /*
  * debug_init:
diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile
new file mode 100644
index 000000000000..956aee6c4090
--- /dev/null
+++ b/arch/s390/kernel/diag/Makefile
@@ -0,0 +1 @@
+obj-y	:= diag_misc.o diag324.o diag.o diag310.o
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag/diag.c
index cdd6e31344fa..e15b8dee3228 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag/diag.c
@@ -17,7 +17,7 @@
 #include <asm/trace/diag.h>
 #include <asm/sections.h>
 #include <asm/asm.h>
-#include "entry.h"
+#include "../entry.h"
 
 struct diag_stat {
 	unsigned int counter[NR_DIAG_STAT];
@@ -51,8 +51,10 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
 	[DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
 	[DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
 	[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+	[DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" },
 	[DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
 	[DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
+	[DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" },
 	[DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" },
 	[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
 };
diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c
new file mode 100644
index 000000000000..d6a34454aa5a
--- /dev/null
+++ b/arch/s390/kernel/diag/diag310.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request memory topology information via diag0x310.
+ *
+ * Copyright IBM Corp. 2025
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+#define DIAG310_LEVELMIN 1
+#define DIAG310_LEVELMAX 6
+
+enum diag310_sc {
+	DIAG310_SUBC_0 = 0,
+	DIAG310_SUBC_1 = 1,
+	DIAG310_SUBC_4 = 4,
+	DIAG310_SUBC_5 = 5
+};
+
+enum diag310_retcode {
+	DIAG310_RET_SUCCESS	= 0x0001,
+	DIAG310_RET_BUSY	= 0x0101,
+	DIAG310_RET_OPNOTSUPP	= 0x0102,
+	DIAG310_RET_SC4_INVAL	= 0x0401,
+	DIAG310_RET_SC4_NODATA	= 0x0402,
+	DIAG310_RET_SC5_INVAL	= 0x0501,
+	DIAG310_RET_SC5_NODATA	= 0x0502,
+	DIAG310_RET_SC5_ESIZE	= 0x0503
+};
+
+union diag310_response {
+	u64 response;
+	struct {
+		u64 result	: 32;
+		u64		: 16;
+		u64 rc		: 16;
+	};
+};
+
+union diag310_req_subcode {
+	u64 subcode;
+	struct {
+		u64		: 48;
+		u64 st		: 8;
+		u64 sc		: 8;
+	};
+};
+
+union diag310_req_size {
+	u64 size;
+	struct {
+		u64 page_count	: 32;
+		u64		: 32;
+	};
+};
+
+static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr)
+{
+	union register_pair rp = { .even = (unsigned long)addr, .odd = size };
+
+	diag_stat_inc(DIAG_STAT_X310);
+	asm volatile("diag	%[rp],%[subcode],0x310\n"
+		     : [rp] "+d" (rp.pair)
+		     : [subcode] "d" (subcode)
+		     : "memory");
+	return rp.odd;
+}
+
+static int diag310_result_to_errno(unsigned int result)
+{
+	switch (result) {
+	case DIAG310_RET_BUSY:
+		return -EBUSY;
+	case DIAG310_RET_OPNOTSUPP:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int diag310_get_subcode_mask(unsigned long *mask)
+{
+	union diag310_response res;
+
+	res.response = diag310(DIAG310_SUBC_0, 0, NULL);
+	if (res.rc != DIAG310_RET_SUCCESS)
+		return diag310_result_to_errno(res.rc);
+	*mask = res.response;
+	return 0;
+}
+
+static int diag310_get_memtop_stride(unsigned long *stride)
+{
+	union diag310_response res;
+
+	res.response = diag310(DIAG310_SUBC_1, 0, NULL);
+	if (res.rc != DIAG310_RET_SUCCESS)
+		return diag310_result_to_errno(res.rc);
+	*stride = res.result;
+	return 0;
+}
+
+static int diag310_get_memtop_size(unsigned long *pages, unsigned long level)
+{
+	union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level };
+	union diag310_response res;
+
+	res.response = diag310(req.subcode, 0, NULL);
+	switch (res.rc) {
+	case DIAG310_RET_SUCCESS:
+		*pages = res.result;
+		return 0;
+	case DIAG310_RET_SC4_NODATA:
+		return -ENODATA;
+	case DIAG310_RET_SC4_INVAL:
+		return -EINVAL;
+	default:
+		return diag310_result_to_errno(res.rc);
+	}
+}
+
+static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level)
+{
+	union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level };
+	union diag310_req_size req_size = { .page_count = pages };
+	union diag310_response res;
+
+	res.response = diag310(req_sc.subcode, req_size.size, buf);
+	switch (res.rc) {
+	case DIAG310_RET_SUCCESS:
+		return 0;
+	case DIAG310_RET_SC5_NODATA:
+		return -ENODATA;
+	case DIAG310_RET_SC5_ESIZE:
+		return -EOVERFLOW;
+	case DIAG310_RET_SC5_INVAL:
+		return -EINVAL;
+	default:
+		return diag310_result_to_errno(res.rc);
+	}
+}
+
+static int diag310_check_features(void)
+{
+	static int features_available;
+	unsigned long mask;
+	int rc;
+
+	if (READ_ONCE(features_available))
+		return 0;
+	if (!sclp.has_diag310)
+		return -EOPNOTSUPP;
+	rc = diag310_get_subcode_mask(&mask);
+	if (rc)
+		return rc;
+	if (!test_bit_inv(DIAG310_SUBC_1, &mask))
+		return -EOPNOTSUPP;
+	if (!test_bit_inv(DIAG310_SUBC_4, &mask))
+		return -EOPNOTSUPP;
+	if (!test_bit_inv(DIAG310_SUBC_5, &mask))
+		return -EOPNOTSUPP;
+	WRITE_ONCE(features_available, 1);
+	return 0;
+}
+
+static int memtop_get_stride_len(unsigned long *res)
+{
+	static unsigned long memtop_stride;
+	unsigned long stride;
+	int rc;
+
+	stride = READ_ONCE(memtop_stride);
+	if (!stride) {
+		rc = diag310_get_memtop_stride(&stride);
+		if (rc)
+			return rc;
+		WRITE_ONCE(memtop_stride, stride);
+	}
+	*res = stride;
+	return 0;
+}
+
+static int memtop_get_page_count(unsigned long *res, unsigned long level)
+{
+	static unsigned long memtop_pages[DIAG310_LEVELMAX];
+	unsigned long pages;
+	int rc;
+
+	if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN)
+		return -EINVAL;
+	pages = READ_ONCE(memtop_pages[level - 1]);
+	if (!pages) {
+		rc = diag310_get_memtop_size(&pages, level);
+		if (rc)
+			return rc;
+		WRITE_ONCE(memtop_pages[level - 1], pages);
+	}
+	*res = pages;
+	return 0;
+}
+
+long diag310_memtop_stride(unsigned long arg)
+{
+	size_t __user *argp = (void __user *)arg;
+	unsigned long stride;
+	int rc;
+
+	rc = diag310_check_features();
+	if (rc)
+		return rc;
+	rc = memtop_get_stride_len(&stride);
+	if (rc)
+		return rc;
+	if (put_user(stride, argp))
+		return -EFAULT;
+	return 0;
+}
+
+long diag310_memtop_len(unsigned long arg)
+{
+	size_t __user *argp = (void __user *)arg;
+	unsigned long pages, level;
+	int rc;
+
+	rc = diag310_check_features();
+	if (rc)
+		return rc;
+	if (get_user(level, argp))
+		return -EFAULT;
+	rc = memtop_get_page_count(&pages, level);
+	if (rc)
+		return rc;
+	if (put_user(pages * PAGE_SIZE, argp))
+		return -EFAULT;
+	return 0;
+}
+
+long diag310_memtop_buf(unsigned long arg)
+{
+	struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg;
+	unsigned long level, pages, data_size;
+	u64 address;
+	void *buf;
+	int rc;
+
+	rc = diag310_check_features();
+	if (rc)
+		return rc;
+	if (get_user(level, &udata->nesting_lvl))
+		return -EFAULT;
+	if (get_user(address, &udata->address))
+		return -EFAULT;
+	rc = memtop_get_page_count(&pages, level);
+	if (rc)
+		return rc;
+	data_size = pages * PAGE_SIZE;
+	buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT,
+			     NUMA_NO_NODE, __builtin_return_address(0));
+	if (!buf)
+		return -ENOMEM;
+	rc = diag310_store_topology_map(buf, pages, level);
+	if (rc)
+		goto out;
+	if (copy_to_user((void __user *)address, buf, data_size))
+		rc = -EFAULT;
+out:
+	vfree(buf);
+	return rc;
+}
diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c
new file mode 100644
index 000000000000..7fa4c0b7eb6c
--- /dev/null
+++ b/arch/s390/kernel/diag/diag324.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request power readings for resources in a computing environment via
+ * diag 0x324. diag 0x324 stores the power readings in the power information
+ * block (pib).
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#define pr_fmt(fmt)	"diag324: " fmt
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/ioctl.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <asm/timex.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+enum subcode {
+	DIAG324_SUBC_0 = 0,
+	DIAG324_SUBC_1 = 1,
+	DIAG324_SUBC_2 = 2,
+};
+
+enum retcode {
+	DIAG324_RET_SUCCESS		= 0x0001,
+	DIAG324_RET_SUBC_NOTAVAIL	= 0x0103,
+	DIAG324_RET_INSUFFICIENT_SIZE	= 0x0104,
+	DIAG324_RET_READING_UNAVAILABLE	= 0x0105,
+};
+
+union diag324_response {
+	u64 response;
+	struct {
+		u64 installed	: 32;
+		u64		: 16;
+		u64 rc		: 16;
+	} sc0;
+	struct {
+		u64 format	: 16;
+		u64		: 16;
+		u64 pib_len	: 16;
+		u64 rc		: 16;
+	} sc1;
+	struct {
+		u64		: 48;
+		u64 rc		: 16;
+	} sc2;
+};
+
+union diag324_request {
+	u64 request;
+	struct {
+		u64		: 32;
+		u64 allocated	: 16;
+		u64		: 12;
+		u64 sc		: 4;
+	} sc2;
+};
+
+struct pib {
+	u32		: 8;
+	u32 num		: 8;
+	u32 len		: 16;
+	u32		: 24;
+	u32 hlen	: 8;
+	u64		: 64;
+	u64 intv;
+	u8  r[];
+} __packed;
+
+struct pibdata {
+	struct pib *pib;
+	ktime_t expire;
+	u64 sequence;
+	size_t len;
+	int rc;
+};
+
+static DEFINE_MUTEX(pibmutex);
+static struct pibdata pibdata;
+
+#define PIBWORK_DELAY (5 * NSEC_PER_SEC)
+
+static void pibwork_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(pibwork, pibwork_handler);
+
+static unsigned long diag324(unsigned long subcode, void *addr)
+{
+	union register_pair rp = { .even = (unsigned long)addr };
+
+	diag_stat_inc(DIAG_STAT_X324);
+	asm volatile("diag	%[rp],%[subcode],0x324\n"
+		     : [rp] "+d" (rp.pair)
+		     : [subcode] "d" (subcode)
+		     : "memory");
+	return rp.odd;
+}
+
+static void pibwork_handler(struct work_struct *work)
+{
+	struct pibdata *data = &pibdata;
+	ktime_t timedout;
+
+	mutex_lock(&pibmutex);
+	timedout = ktime_add_ns(data->expire, PIBWORK_DELAY);
+	if (ktime_before(ktime_get(), timedout)) {
+		mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+		goto out;
+	}
+	vfree(data->pib);
+	data->pib = NULL;
+out:
+	mutex_unlock(&pibmutex);
+}
+
+static void pib_update(struct pibdata *data)
+{
+	union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len };
+	union diag324_response res;
+	int rc;
+
+	memset(data->pib, 0, data->len);
+	res.response = diag324(req.request, data->pib);
+	switch (res.sc2.rc) {
+	case DIAG324_RET_SUCCESS:
+		rc = 0;
+		break;
+	case DIAG324_RET_SUBC_NOTAVAIL:
+		rc = -ENOENT;
+		break;
+	case DIAG324_RET_INSUFFICIENT_SIZE:
+		rc = -EMSGSIZE;
+		break;
+	case DIAG324_RET_READING_UNAVAILABLE:
+		rc = -EBUSY;
+		break;
+	default:
+		rc = -EINVAL;
+	}
+	data->rc = rc;
+}
+
+long diag324_pibbuf(unsigned long arg)
+{
+	struct diag324_pib __user *udata = (struct diag324_pib __user *)arg;
+	struct pibdata *data = &pibdata;
+	static bool first = true;
+	u64 address;
+	int rc;
+
+	if (!data->len)
+		return -EOPNOTSUPP;
+	if (get_user(address, &udata->address))
+		return -EFAULT;
+	mutex_lock(&pibmutex);
+	rc = -ENOMEM;
+	if (!data->pib)
+		data->pib = vmalloc(data->len);
+	if (!data->pib)
+		goto out;
+	if (first || ktime_after(ktime_get(), data->expire)) {
+		pib_update(data);
+		data->sequence++;
+		data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv));
+		mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+		first = false;
+	}
+	rc = data->rc;
+	if (rc != 0 && rc != -EBUSY)
+		goto out;
+	rc = copy_to_user((void __user *)address, data->pib, data->pib->len);
+	rc |= put_user(data->sequence, &udata->sequence);
+	if (rc)
+		rc = -EFAULT;
+out:
+	mutex_unlock(&pibmutex);
+	return rc;
+}
+
+long diag324_piblen(unsigned long arg)
+{
+	struct pibdata *data = &pibdata;
+
+	if (!data->len)
+		return -EOPNOTSUPP;
+	if (put_user(data->len, (size_t __user *)arg))
+		return -EFAULT;
+	return 0;
+}
+
+static int __init diag324_init(void)
+{
+	union diag324_response res;
+	unsigned long installed;
+
+	if (!sclp.has_diag324)
+		return -EOPNOTSUPP;
+	res.response = diag324(DIAG324_SUBC_0, NULL);
+	if (res.sc0.rc != DIAG324_RET_SUCCESS)
+		return -EOPNOTSUPP;
+	installed = res.response;
+	if (!test_bit_inv(DIAG324_SUBC_1, &installed))
+		return -EOPNOTSUPP;
+	if (!test_bit_inv(DIAG324_SUBC_2, &installed))
+		return -EOPNOTSUPP;
+	res.response = diag324(DIAG324_SUBC_1, NULL);
+	if (res.sc1.rc != DIAG324_RET_SUCCESS)
+		return -EOPNOTSUPP;
+	pibdata.len = res.sc1.pib_len;
+	return 0;
+}
+device_initcall(diag324_init);
diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h
new file mode 100644
index 000000000000..7080be946785
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_ioctl.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _DIAG_IOCTL_H
+#define _DIAG_IOCTL_H
+
+#include <linux/types.h>
+
+long diag324_pibbuf(unsigned long arg);
+long diag324_piblen(unsigned long arg);
+
+long diag310_memtop_stride(unsigned long arg);
+long diag310_memtop_len(unsigned long arg);
+long diag310_memtop_buf(unsigned long arg);
+
+#endif /* _DIAG_IOCTL_H */
diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c
new file mode 100644
index 000000000000..efffe02ea02e
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_misc.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Provide diagnose information via misc device /dev/diag.
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/types.h>
+
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	long rc;
+
+	switch (cmd) {
+	case DIAG324_GET_PIBLEN:
+		rc = diag324_piblen(arg);
+		break;
+	case DIAG324_GET_PIBBUF:
+		rc = diag324_pibbuf(arg);
+		break;
+	case DIAG310_GET_STRIDE:
+		rc = diag310_memtop_stride(arg);
+		break;
+	case DIAG310_GET_MEMTOPLEN:
+		rc = diag310_memtop_len(arg);
+		break;
+	case DIAG310_GET_MEMTOPBUF:
+		rc = diag310_memtop_buf(arg);
+		break;
+	default:
+		rc = -ENOIOCTLCMD;
+		break;
+	}
+	return rc;
+}
+
+static const struct file_operations fops = {
+	.owner		= THIS_MODULE,
+	.open		= nonseekable_open,
+	.unlocked_ioctl	= diag_ioctl,
+};
+
+static struct miscdevice diagdev = {
+	.name	= "diag",
+	.minor	= MISC_DYNAMIC_MINOR,
+	.fops	= &fops,
+	.mode	= 0444,
+};
+
+static int diag_init(void)
+{
+	return misc_register(&diagdev);
+}
+
+device_initcall(diag_init);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 960c08700cf6..4cc3408c4dac 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -52,16 +52,7 @@ _LPP_OFFSET	= __LC_LPP
 		ALT_FACILITY(193)
 	.endm
 
-	.macro	CHECK_STACK savearea, lowcore
-#ifdef CONFIG_CHECK_STACK
-	tml	%r15,THREAD_SIZE - CONFIG_STACK_GUARD
-	la	%r14,\savearea(\lowcore)
-	jz	stack_overflow
-#endif
-	.endm
-
 	.macro	CHECK_VMAP_STACK savearea, lowcore, oklabel
-#ifdef CONFIG_VMAP_STACK
 	lgr	%r14,%r15
 	nill	%r14,0x10000 - THREAD_SIZE
 	oill	%r14,STACK_INIT_OFFSET
@@ -77,9 +68,6 @@ _LPP_OFFSET	= __LC_LPP
 	je	\oklabel
 	la	%r14,\savearea(\lowcore)
 	j	stack_overflow
-#else
-	j	\oklabel
-#endif
 	.endm
 
 	/*
@@ -326,8 +314,7 @@ SYM_CODE_START(pgm_check_handler)
 	jnz	2f			# -> enabled, can't be a double fault
 	tm	__LC_PGM_ILC+3(%r13),0x80	# check for per exception
 	jnz	.Lpgm_svcper		# -> single stepped svc
-2:	CHECK_STACK __LC_SAVE_AREA,%r13
-	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+2:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	# CHECK_VMAP_STACK branches to stack_overflow or 4f
 	CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f
 3:	lg	%r15,__LC_KERNEL_STACK(%r13)
@@ -394,8 +381,7 @@ SYM_CODE_START(\name)
 	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
 	SIEEXIT __SF_SIE_CONTROL(%r15),%r13
 #endif
-0:	CHECK_STACK __LC_SAVE_AREA,%r13
-	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+0:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	2f
 1:	lctlg	%c1,%c1,__LC_KERNEL_ASCE(%r13)
 	lg	%r15,__LC_KERNEL_STACK(%r13)
@@ -603,7 +589,6 @@ SYM_CODE_END(early_pgm_check_handler)
 
 	.section .kprobes.text, "ax"
 
-#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
 /*
  * The synchronous or the asynchronous stack overflowed. We are dead.
  * No need to properly save the registers, we are going to panic anyway.
@@ -621,7 +606,6 @@ SYM_CODE_START(stack_overflow)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	jg	kernel_stack_overflow
 SYM_CODE_END(stack_overflow)
-#endif
 
 	.section .data, "aw"
 	.balign	4
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index edbb52ce3f1e..69be2309cde0 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -270,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
 	if (len >= sizeof(_value))					\
 		return -E2BIG;						\
 	len = strscpy(_value, buf, sizeof(_value));			\
-	if (len < 0)							\
+	if ((ssize_t)len < 0)						\
 		return len;						\
 	strim(_value);							\
 	return len;							\
@@ -280,58 +280,58 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
 			sys_##_prefix##_##_name##_show,			\
 			sys_##_prefix##_##_name##_store)
 
-#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block)			\
-static ssize_t sys_##_prefix##_scp_data_show(struct file *filp,		\
-					    struct kobject *kobj,	\
-					    struct bin_attribute *attr,	\
-					    char *buf, loff_t off,	\
-					    size_t count)		\
-{									\
-	size_t size = _ipl_block.scp_data_len;				\
-	void *scp_data = _ipl_block.scp_data;				\
-									\
-	return memory_read_from_buffer(buf, count, &off,		\
-				       scp_data, size);			\
+#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block)				\
+static ssize_t sys_##_prefix##_scp_data_show(struct file *filp,			\
+					    struct kobject *kobj,		\
+					    const struct bin_attribute *attr,	\
+					    char *buf, loff_t off,		\
+					    size_t count)			\
+{										\
+	size_t size = _ipl_block.scp_data_len;					\
+	void *scp_data = _ipl_block.scp_data;					\
+										\
+	return memory_read_from_buffer(buf, count, &off,			\
+				       scp_data, size);				\
 }
 
 #define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
-static ssize_t sys_##_prefix##_scp_data_store(struct file *filp,	\
-					struct kobject *kobj,		\
-					struct bin_attribute *attr,	\
-					char *buf, loff_t off,		\
-					size_t count)			\
-{									\
-	size_t scpdata_len = count;					\
-	size_t padding;							\
-									\
-	if (off)							\
-		return -EINVAL;						\
-									\
-	memcpy(_ipl_block.scp_data, buf, count);			\
-	if (scpdata_len % 8) {						\
-		padding = 8 - (scpdata_len % 8);			\
-		memset(_ipl_block.scp_data + scpdata_len,		\
-		       0, padding);					\
-		scpdata_len += padding;					\
-	}								\
-									\
-	_ipl_block_hdr.len = _ipl_bp_len + scpdata_len;			\
-	_ipl_block.len = _ipl_bp0_len + scpdata_len;			\
-	_ipl_block.scp_data_len = scpdata_len;				\
-									\
-	return count;							\
+static ssize_t sys_##_prefix##_scp_data_store(struct file *filp,		\
+					struct kobject *kobj,			\
+					const struct bin_attribute *attr,	\
+					char *buf, loff_t off,			\
+					size_t count)				\
+{										\
+	size_t scpdata_len = count;						\
+	size_t padding;								\
+										\
+	if (off)								\
+		return -EINVAL;							\
+										\
+	memcpy(_ipl_block.scp_data, buf, count);				\
+	if (scpdata_len % 8) {							\
+		padding = 8 - (scpdata_len % 8);				\
+		memset(_ipl_block.scp_data + scpdata_len,			\
+		       0, padding);						\
+		scpdata_len += padding;						\
+	}									\
+										\
+	_ipl_block_hdr.len = _ipl_bp_len + scpdata_len;				\
+	_ipl_block.len = _ipl_bp0_len + scpdata_len;				\
+	_ipl_block.scp_data_len = scpdata_len;					\
+										\
+	return count;								\
 }
 
 #define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size)		\
 IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block)				\
-static struct bin_attribute sys_##_prefix##_scp_data_attr =		\
+static const struct bin_attribute sys_##_prefix##_scp_data_attr =	\
 	__BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show,	\
 		   NULL, _size)
 
 #define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\
 IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block)					\
 IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
-static struct bin_attribute sys_##_prefix##_scp_data_attr =			\
+static const struct bin_attribute sys_##_prefix##_scp_data_attr =		\
 	__BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show,		\
 		   sys_##_prefix##_scp_data_store, _size)
 
@@ -434,19 +434,19 @@ static struct kobj_attribute sys_ipl_device_attr =
 	__ATTR(device, 0444, sys_ipl_device_show, NULL);
 
 static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj,
-				      struct bin_attribute *attr, char *buf,
+				      const struct bin_attribute *attr, char *buf,
 				      loff_t off, size_t count)
 {
 	return memory_read_from_buffer(buf, count, &off, &ipl_block,
 				       ipl_block.hdr.len);
 }
-static struct bin_attribute sys_ipl_parameter_attr =
+static const struct bin_attribute sys_ipl_parameter_attr =
 	__BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL,
 		   PAGE_SIZE);
 
 DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE);
 
-static struct bin_attribute *ipl_fcp_bin_attrs[] = {
+static const struct bin_attribute *const ipl_fcp_bin_attrs[] = {
 	&sys_ipl_parameter_attr,
 	&sys_ipl_fcp_scp_data_attr,
 	NULL,
@@ -454,7 +454,7 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = {
 
 DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE);
 
-static struct bin_attribute *ipl_nvme_bin_attrs[] = {
+static const struct bin_attribute *const ipl_nvme_bin_attrs[] = {
 	&sys_ipl_parameter_attr,
 	&sys_ipl_nvme_scp_data_attr,
 	NULL,
@@ -462,7 +462,7 @@ static struct bin_attribute *ipl_nvme_bin_attrs[] = {
 
 DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE);
 
-static struct bin_attribute *ipl_eckd_bin_attrs[] = {
+static const struct bin_attribute *const ipl_eckd_bin_attrs[] = {
 	&sys_ipl_parameter_attr,
 	&sys_ipl_eckd_scp_data_attr,
 	NULL,
@@ -593,9 +593,9 @@ static struct attribute *ipl_fcp_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group ipl_fcp_attr_group = {
+static const struct attribute_group ipl_fcp_attr_group = {
 	.attrs = ipl_fcp_attrs,
-	.bin_attrs = ipl_fcp_bin_attrs,
+	.bin_attrs_new = ipl_fcp_bin_attrs,
 };
 
 static struct attribute *ipl_nvme_attrs[] = {
@@ -607,9 +607,9 @@ static struct attribute *ipl_nvme_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group ipl_nvme_attr_group = {
+static const struct attribute_group ipl_nvme_attr_group = {
 	.attrs = ipl_nvme_attrs,
-	.bin_attrs = ipl_nvme_bin_attrs,
+	.bin_attrs_new = ipl_nvme_bin_attrs,
 };
 
 static struct attribute *ipl_eckd_attrs[] = {
@@ -620,9 +620,9 @@ static struct attribute *ipl_eckd_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group ipl_eckd_attr_group = {
+static const struct attribute_group ipl_eckd_attr_group = {
 	.attrs = ipl_eckd_attrs,
-	.bin_attrs = ipl_eckd_bin_attrs,
+	.bin_attrs_new = ipl_eckd_bin_attrs,
 };
 
 /* CCW ipl device attributes */
@@ -640,11 +640,11 @@ static struct attribute *ipl_ccw_attrs_lpar[] = {
 	NULL,
 };
 
-static struct attribute_group ipl_ccw_attr_group_vm = {
+static const struct attribute_group ipl_ccw_attr_group_vm = {
 	.attrs = ipl_ccw_attrs_vm,
 };
 
-static struct attribute_group ipl_ccw_attr_group_lpar = {
+static const struct attribute_group ipl_ccw_attr_group_lpar = {
 	.attrs = ipl_ccw_attrs_lpar
 };
 
@@ -655,7 +655,7 @@ static struct attribute *ipl_common_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group ipl_common_attr_group = {
+static const struct attribute_group ipl_common_attr_group = {
 	.attrs = ipl_common_attrs,
 };
 
@@ -808,7 +808,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr,
 			    IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN,
 			    DIAG308_SCPDATA_SIZE);
 
-static struct bin_attribute *reipl_fcp_bin_attrs[] = {
+static const struct bin_attribute *const reipl_fcp_bin_attrs[] = {
 	&sys_reipl_fcp_scp_data_attr,
 	NULL,
 };
@@ -917,9 +917,9 @@ static struct attribute *reipl_fcp_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group reipl_fcp_attr_group = {
+static const struct attribute_group reipl_fcp_attr_group = {
 	.attrs = reipl_fcp_attrs,
-	.bin_attrs = reipl_fcp_bin_attrs,
+	.bin_attrs_new = reipl_fcp_bin_attrs,
 };
 
 static struct kobj_attribute sys_reipl_fcp_clear_attr =
@@ -932,7 +932,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr,
 			    IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN,
 			    DIAG308_SCPDATA_SIZE);
 
-static struct bin_attribute *reipl_nvme_bin_attrs[] = {
+static const struct bin_attribute *const reipl_nvme_bin_attrs[] = {
 	&sys_reipl_nvme_scp_data_attr,
 	NULL,
 };
@@ -955,9 +955,9 @@ static struct attribute *reipl_nvme_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group reipl_nvme_attr_group = {
+static const struct attribute_group reipl_nvme_attr_group = {
 	.attrs = reipl_nvme_attrs,
-	.bin_attrs = reipl_nvme_bin_attrs
+	.bin_attrs_new = reipl_nvme_bin_attrs
 };
 
 static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
@@ -1031,7 +1031,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr,
 			    IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN,
 			    DIAG308_SCPDATA_SIZE);
 
-static struct bin_attribute *reipl_eckd_bin_attrs[] = {
+static const struct bin_attribute *const reipl_eckd_bin_attrs[] = {
 	&sys_reipl_eckd_scp_data_attr,
 	NULL,
 };
@@ -1048,9 +1048,9 @@ static struct attribute *reipl_eckd_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group reipl_eckd_attr_group = {
+static const struct attribute_group reipl_eckd_attr_group = {
 	.attrs = reipl_eckd_attrs,
-	.bin_attrs = reipl_eckd_bin_attrs
+	.bin_attrs_new = reipl_eckd_bin_attrs
 };
 
 static ssize_t reipl_eckd_clear_show(struct kobject *kobj,
@@ -1587,15 +1587,15 @@ static struct attribute *dump_fcp_attrs[] = {
 	NULL,
 };
 
-static struct bin_attribute *dump_fcp_bin_attrs[] = {
+static const struct bin_attribute *const dump_fcp_bin_attrs[] = {
 	&sys_dump_fcp_scp_data_attr,
 	NULL,
 };
 
-static struct attribute_group dump_fcp_attr_group = {
+static const struct attribute_group dump_fcp_attr_group = {
 	.name  = IPL_FCP_STR,
 	.attrs = dump_fcp_attrs,
-	.bin_attrs = dump_fcp_bin_attrs,
+	.bin_attrs_new = dump_fcp_bin_attrs,
 };
 
 /* NVME dump device attributes */
@@ -1621,15 +1621,15 @@ static struct attribute *dump_nvme_attrs[] = {
 	NULL,
 };
 
-static struct bin_attribute *dump_nvme_bin_attrs[] = {
+static const struct bin_attribute *const dump_nvme_bin_attrs[] = {
 	&sys_dump_nvme_scp_data_attr,
 	NULL,
 };
 
-static struct attribute_group dump_nvme_attr_group = {
+static const struct attribute_group dump_nvme_attr_group = {
 	.name  = IPL_NVME_STR,
 	.attrs = dump_nvme_attrs,
-	.bin_attrs = dump_nvme_bin_attrs,
+	.bin_attrs_new = dump_nvme_bin_attrs,
 };
 
 /* ECKD dump device attributes */
@@ -1655,15 +1655,15 @@ static struct attribute *dump_eckd_attrs[] = {
 	NULL,
 };
 
-static struct bin_attribute *dump_eckd_bin_attrs[] = {
+static const struct bin_attribute *const dump_eckd_bin_attrs[] = {
 	&sys_dump_eckd_scp_data_attr,
 	NULL,
 };
 
-static struct attribute_group dump_eckd_attr_group = {
+static const struct attribute_group dump_eckd_attr_group = {
 	.name  = IPL_ECKD_STR,
 	.attrs = dump_eckd_attrs,
-	.bin_attrs = dump_eckd_bin_attrs,
+	.bin_attrs_new = dump_eckd_bin_attrs,
 };
 
 /* CCW dump device attributes */
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index 29080d6d5d8d..c2a468986212 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -18,6 +18,7 @@
 #include <asm/physmem_info.h>
 #include <asm/maccess.h>
 #include <asm/asm-offsets.h>
+#include <asm/sections.h>
 #include <asm/ipl.h>
 
 /*
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index a3fea683b227..0ce550faf073 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -157,18 +157,18 @@ u64 __bootdata_preserved(stfle_fac_list[16]);
 EXPORT_SYMBOL(stfle_fac_list);
 struct oldmem_data __bootdata_preserved(oldmem_data);
 
-unsigned long VMALLOC_START;
+unsigned long __bootdata_preserved(VMALLOC_START);
 EXPORT_SYMBOL(VMALLOC_START);
 
-unsigned long VMALLOC_END;
+unsigned long __bootdata_preserved(VMALLOC_END);
 EXPORT_SYMBOL(VMALLOC_END);
 
-struct page *vmemmap;
+struct page *__bootdata_preserved(vmemmap);
 EXPORT_SYMBOL(vmemmap);
-unsigned long vmemmap_size;
+unsigned long __bootdata_preserved(vmemmap_size);
 
-unsigned long MODULES_VADDR;
-unsigned long MODULES_END;
+unsigned long __bootdata_preserved(MODULES_VADDR);
+unsigned long __bootdata_preserved(MODULES_END);
 
 /* An array with a pointer to the lowcore of every CPU. */
 struct lowcore *lowcore_ptr[NR_CPUS];
@@ -359,25 +359,17 @@ void *restart_stack;
 
 unsigned long stack_alloc(void)
 {
-#ifdef CONFIG_VMAP_STACK
-	void *ret;
+	void *stack;
 
-	ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
-			     NUMA_NO_NODE, __builtin_return_address(0));
-	kmemleak_not_leak(ret);
-	return (unsigned long)ret;
-#else
-	return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
-#endif
+	stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
+			       NUMA_NO_NODE, __builtin_return_address(0));
+	kmemleak_not_leak(stack);
+	return (unsigned long)stack;
 }
 
 void stack_free(unsigned long stack)
 {
-#ifdef CONFIG_VMAP_STACK
-	vfree((void *) stack);
-#else
-	free_pages(stack, THREAD_SIZE_ORDER);
-#endif
+	vfree((void *)stack);
 }
 
 static unsigned long __init stack_alloc_early(void)
@@ -979,6 +971,7 @@ void __init setup_arch(char **cmdline_p)
 	if (test_facility(193))
 		static_branch_enable(&cpu_has_bear);
 
+	setup_protection_map();
 	/*
 	 * Create kernel page tables.
 	 */
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
index c0a70efa2426..26f2981aa09e 100644
--- a/arch/s390/kernel/text_amode31.S
+++ b/arch/s390/kernel/text_amode31.S
@@ -18,8 +18,7 @@
  * affects a few functions that are not performance-relevant.
  */
 	.macro BR_EX_AMODE31_r14
-	larl	%r1,0f
-	ex	0,0(%r1)
+	exrl	0,0f
 	j	.
 0:	br	%r14
 	.endm
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 4f9c301a705b..39f5ed21f31a 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -556,6 +556,16 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info,
 	}
 }
 
+static int __init detect_polarization(union topology_entry *tle)
+{
+	struct topology_core *tl_core;
+
+	while (tle->nl)
+		tle = next_tle(tle);
+	tl_core = (struct topology_core *)tle;
+	return tl_core->pp != POLARIZATION_HRZ;
+}
+
 void __init topology_init_early(void)
 {
 	struct sysinfo_15_1_x *info;
@@ -575,6 +585,7 @@ void __init topology_init_early(void)
 		      __func__, PAGE_SIZE, PAGE_SIZE);
 	info = tl_info;
 	store_topology(info);
+	cpu_management = detect_polarization(info->tle);
 	pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
 		info->mag[0], info->mag[1], info->mag[2], info->mag[3],
 		info->mag[4], info->mag[5], info->mnest);
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 37bb4b761229..ad206f2068d8 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -5,7 +5,7 @@
 include $(srctree)/lib/vdso/Makefile
 obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o
 obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o
-VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
 CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
 CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE)
 CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c
index 23f7d7619a99..cc8933e04ff7 100644
--- a/arch/s390/kernel/vmcore_info.c
+++ b/arch/s390/kernel/vmcore_info.c
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
 #include <linux/vmcore_info.h>
-#include <asm/abs_lowcore.h>
 #include <linux/mm.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
 #include <asm/setup.h>
 
 void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index ea8dce299954..d4f031e086fc 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2678,9 +2678,13 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		kvm_s390_clear_float_irqs(dev->kvm);
 		break;
 	case KVM_DEV_FLIC_APF_ENABLE:
+		if (kvm_is_ucontrol(dev->kvm))
+			return -EINVAL;
 		dev->kvm->arch.gmap->pfault_enabled = 1;
 		break;
 	case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+		if (kvm_is_ucontrol(dev->kvm))
+			return -EINVAL;
 		dev->kvm->arch.gmap->pfault_enabled = 0;
 		/*
 		 * Make sure no async faults are in transition when
@@ -2894,6 +2898,8 @@ int kvm_set_routing_entry(struct kvm *kvm,
 	switch (ue->type) {
 	/* we store the userspace addresses instead of the guest addresses */
 	case KVM_IRQ_ROUTING_S390_ADAPTER:
+		if (kvm_is_ucontrol(kvm))
+			return -EINVAL;
 		e->set = set_adapter_int;
 		uaddr =  gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
 		if (uaddr == -EFAULT)
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 150b9387860a..a687695d8f68 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -854,7 +854,7 @@ unpin:
 static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
 		      gpa_t gpa)
 {
-	hpa_t hpa = (hpa_t) vsie_page->scb_o;
+	hpa_t hpa = virt_to_phys(vsie_page->scb_o);
 
 	if (hpa)
 		unpin_guest_page(vcpu->kvm, gpa, hpa);
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index 08f60a42b9a6..d026debf250c 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -34,8 +34,7 @@ SYM_FUNC_START(__memmove)
 	la	%r3,256(%r3)
 	brctg	%r0,.Lmemmove_forward_loop
 .Lmemmove_forward_remainder:
-	larl	%r5,.Lmemmove_mvc
-	ex	%r4,0(%r5)
+	exrl	%r4,.Lmemmove_mvc
 .Lmemmove_exit:
 	BR_EX	%r14
 .Lmemmove_reverse:
@@ -83,8 +82,7 @@ SYM_FUNC_START(__memset)
 	la	%r1,256(%r1)
 	brctg	%r3,.Lmemset_clear_loop
 .Lmemset_clear_remainder:
-	larl	%r3,.Lmemset_xc
-	ex	%r4,0(%r3)
+	exrl	%r4,.Lmemset_xc
 .Lmemset_exit:
 	BR_EX	%r14
 .Lmemset_fill:
@@ -102,8 +100,7 @@ SYM_FUNC_START(__memset)
 	brctg	%r5,.Lmemset_fill_loop
 .Lmemset_fill_remainder:
 	stc	%r3,0(%r1)
-	larl	%r5,.Lmemset_mvc
-	ex	%r4,0(%r5)
+	exrl	%r4,.Lmemset_mvc
 	BR_EX	%r14
 .Lmemset_fill_exit:
 	stc	%r3,0(%r1)
@@ -132,8 +129,7 @@ SYM_FUNC_START(__memcpy)
 	lgr	%r1,%r2
 	jnz	.Lmemcpy_loop
 .Lmemcpy_remainder:
-	larl	%r5,.Lmemcpy_mvc
-	ex	%r4,0(%r5)
+	exrl	%r4,.Lmemcpy_mvc
 .Lmemcpy_exit:
 	BR_EX	%r14
 .Lmemcpy_loop:
@@ -175,8 +171,7 @@ SYM_FUNC_START(__memset\bits)
 	brctg	%r5,.L__memset_loop\bits
 .L__memset_remainder\bits:
 	\insn	%r3,0(%r1)
-	larl	%r5,.L__memset_mvc\bits
-	ex	%r4,0(%r5)
+	exrl	%r4,.L__memset_mvc\bits
 	BR_EX	%r14
 .L__memset_store\bits:
 	\insn	%r3,0(%r2)
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
index fb924a8041dc..ce7bcf7c0032 100644
--- a/arch/s390/lib/xor.c
+++ b/arch/s390/lib/xor.c
@@ -15,7 +15,6 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
 		     const unsigned long * __restrict p2)
 {
 	asm volatile(
-		"	larl	1,2f\n"
 		"	aghi	%0,-1\n"
 		"	jm	3f\n"
 		"	srlg	0,%0,8\n"
@@ -25,12 +24,12 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
 		"	la	%1,256(%1)\n"
 		"	la	%2,256(%2)\n"
 		"	brctg	0,0b\n"
-		"1:	ex	%0,0(1)\n"
+		"1:	exrl	%0,2f\n"
 		"	j	3f\n"
 		"2:	xc	0(1,%1),0(%2)\n"
 		"3:\n"
 		: : "d" (bytes), "a" (p1), "a" (p2)
-		: "0", "1", "cc", "memory");
+		: "0", "cc", "memory");
 }
 
 static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
@@ -38,9 +37,8 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
 		     const unsigned long * __restrict p3)
 {
 	asm volatile(
-		"	larl	1,2f\n"
 		"	aghi	%0,-1\n"
-		"	jm	3f\n"
+		"	jm	4f\n"
 		"	srlg	0,%0,8\n"
 		"	ltgr	0,0\n"
 		"	jz	1f\n"
@@ -50,14 +48,14 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
 		"	la	%2,256(%2)\n"
 		"	la	%3,256(%3)\n"
 		"	brctg	0,0b\n"
-		"1:	ex	%0,0(1)\n"
-		"	ex	%0,6(1)\n"
-		"	j	3f\n"
+		"1:	exrl	%0,2f\n"
+		"	exrl	%0,3f\n"
+		"	j	4f\n"
 		"2:	xc	0(1,%1),0(%2)\n"
-		"	xc	0(1,%1),0(%3)\n"
-		"3:\n"
+		"3:	xc	0(1,%1),0(%3)\n"
+		"4:\n"
 		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
-		: : "0", "1", "cc", "memory");
+		: : "0", "cc", "memory");
 }
 
 static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
@@ -66,9 +64,8 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
 		     const unsigned long * __restrict p4)
 {
 	asm volatile(
-		"	larl	1,2f\n"
 		"	aghi	%0,-1\n"
-		"	jm	3f\n"
+		"	jm	5f\n"
 		"	srlg	0,%0,8\n"
 		"	ltgr	0,0\n"
 		"	jz	1f\n"
@@ -80,16 +77,16 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
 		"	la	%3,256(%3)\n"
 		"	la	%4,256(%4)\n"
 		"	brctg	0,0b\n"
-		"1:	ex	%0,0(1)\n"
-		"	ex	%0,6(1)\n"
-		"	ex	%0,12(1)\n"
-		"	j	3f\n"
+		"1:	exrl	%0,2f\n"
+		"	exrl	%0,3f\n"
+		"	exrl	%0,4f\n"
+		"	j	5f\n"
 		"2:	xc	0(1,%1),0(%2)\n"
-		"	xc	0(1,%1),0(%3)\n"
-		"	xc	0(1,%1),0(%4)\n"
-		"3:\n"
+		"3:	xc	0(1,%1),0(%3)\n"
+		"4:	xc	0(1,%1),0(%4)\n"
+		"5:\n"
 		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
-		: : "0", "1", "cc", "memory");
+		: : "0", "cc", "memory");
 }
 
 static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
@@ -101,7 +98,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
 	asm volatile(
 		"	larl	1,2f\n"
 		"	aghi	%0,-1\n"
-		"	jm	3f\n"
+		"	jm	6f\n"
 		"	srlg	0,%0,8\n"
 		"	ltgr	0,0\n"
 		"	jz	1f\n"
@@ -115,19 +112,19 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
 		"	la	%4,256(%4)\n"
 		"	la	%5,256(%5)\n"
 		"	brctg	0,0b\n"
-		"1:	ex	%0,0(1)\n"
-		"	ex	%0,6(1)\n"
-		"	ex	%0,12(1)\n"
-		"	ex	%0,18(1)\n"
-		"	j	3f\n"
+		"1:	exrl	%0,2f\n"
+		"	exrl	%0,3f\n"
+		"	exrl	%0,4f\n"
+		"	exrl	%0,5f\n"
+		"	j	6f\n"
 		"2:	xc	0(1,%1),0(%2)\n"
-		"	xc	0(1,%1),0(%3)\n"
-		"	xc	0(1,%1),0(%4)\n"
-		"	xc	0(1,%1),0(%5)\n"
-		"3:\n"
+		"3:	xc	0(1,%1),0(%3)\n"
+		"4:	xc	0(1,%1),0(%4)\n"
+		"5:	xc	0(1,%1),0(%5)\n"
+		"6:\n"
 		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
 		  "+a" (p5)
-		: : "0", "1", "cc", "memory");
+		: : "0", "cc", "memory");
 }
 
 struct xor_block_template xor_block_xc = {
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 7a96623a9d2e..f2298f7a3f21 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -56,6 +56,15 @@ pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
 
 struct ctlreg __bootdata_preserved(s390_invalid_asce);
 
+unsigned long __bootdata_preserved(page_noexec_mask);
+EXPORT_SYMBOL(page_noexec_mask);
+
+unsigned long __bootdata_preserved(segment_noexec_mask);
+EXPORT_SYMBOL(segment_noexec_mask);
+
+unsigned long __bootdata_preserved(region_noexec_mask);
+EXPORT_SYMBOL(region_noexec_mask);
+
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(zero_page_mask);
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 28a18c42ba99..44426e0f2944 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -17,6 +17,7 @@
 #include <asm/asm-extable.h>
 #include <asm/abs_lowcore.h>
 #include <asm/stacktrace.h>
+#include <asm/sections.h>
 #include <asm/maccess.h>
 #include <asm/ctlreg.h>
 
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 33f3504be90b..76f376876e0d 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -196,22 +196,28 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 	}
 }
 
-static const pgprot_t protection_map[16] = {
-	[VM_NONE]					= PAGE_NONE,
-	[VM_READ]					= PAGE_RO,
-	[VM_WRITE]					= PAGE_RO,
-	[VM_WRITE | VM_READ]				= PAGE_RO,
-	[VM_EXEC]					= PAGE_RX,
-	[VM_EXEC | VM_READ]				= PAGE_RX,
-	[VM_EXEC | VM_WRITE]				= PAGE_RX,
-	[VM_EXEC | VM_WRITE | VM_READ]			= PAGE_RX,
-	[VM_SHARED]					= PAGE_NONE,
-	[VM_SHARED | VM_READ]				= PAGE_RO,
-	[VM_SHARED | VM_WRITE]				= PAGE_RW,
-	[VM_SHARED | VM_WRITE | VM_READ]		= PAGE_RW,
-	[VM_SHARED | VM_EXEC]				= PAGE_RX,
-	[VM_SHARED | VM_EXEC | VM_READ]			= PAGE_RX,
-	[VM_SHARED | VM_EXEC | VM_WRITE]		= PAGE_RWX,
-	[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ]	= PAGE_RWX
-};
+static pgprot_t protection_map[16] __ro_after_init;
+
+void __init setup_protection_map(void)
+{
+	pgprot_t *pm = protection_map;
+
+	pm[VM_NONE]					= PAGE_NONE;
+	pm[VM_READ]					= PAGE_RO;
+	pm[VM_WRITE]					= PAGE_RO;
+	pm[VM_WRITE | VM_READ]				= PAGE_RO;
+	pm[VM_EXEC]					= PAGE_RX;
+	pm[VM_EXEC | VM_READ]				= PAGE_RX;
+	pm[VM_EXEC | VM_WRITE]				= PAGE_RX;
+	pm[VM_EXEC | VM_WRITE | VM_READ]		= PAGE_RX;
+	pm[VM_SHARED]					= PAGE_NONE;
+	pm[VM_SHARED | VM_READ]				= PAGE_RO;
+	pm[VM_SHARED | VM_WRITE]			= PAGE_RW;
+	pm[VM_SHARED | VM_WRITE | VM_READ]		= PAGE_RW;
+	pm[VM_SHARED | VM_EXEC]				= PAGE_RX;
+	pm[VM_SHARED | VM_EXEC | VM_READ]		= PAGE_RX;
+	pm[VM_SHARED | VM_EXEC | VM_WRITE]		= PAGE_RWX;
+	pm[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ]	= PAGE_RWX;
+}
+
 DECLARE_VM_GET_PAGE_PROT
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 8f56a21a077f..eae97fb61712 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -109,8 +109,6 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
 		} else if (flags & SET_MEMORY_DEF) {
 			new = __pte(pte_val(new) & PAGE_MASK);
 			new = set_pte_bit(new, PAGE_KERNEL);
-			if (!MACHINE_HAS_NX)
-				new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
 		}
 		pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
 		ptep++;
@@ -167,8 +165,6 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
 	} else if (flags & SET_MEMORY_DEF) {
 		new = __pmd(pmd_val(new) & PMD_MASK);
 		new = set_pmd_bit(new, SEGMENT_KERNEL);
-		if (!MACHINE_HAS_NX)
-			new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
 	}
 	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
 }
@@ -256,8 +252,6 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
 	} else if (flags & SET_MEMORY_DEF) {
 		new = __pud(pud_val(new) & PUD_MASK);
 		new = set_pud_bit(new, REGION3_KERNEL);
-		if (!MACHINE_HAS_NX)
-			new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
 	}
 	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
 }
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index cea5dba80468..f05e62e037c2 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -360,8 +360,6 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
 	pgste_t pgste;
 	struct mm_struct *mm = vma->vm_mm;
 
-	if (!MACHINE_HAS_NX)
-		pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC));
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get(ptep);
 		pgste_set_key(ptep, pgste, pte, mm);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 665b8228afeb..7c684c54e721 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -171,9 +171,6 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
 	pte_t *pte;
 
 	prot = pgprot_val(PAGE_KERNEL);
-	if (!MACHINE_HAS_NX)
-		prot &= ~_PAGE_NOEXEC;
-
 	pte = pte_offset_kernel(pmd, addr);
 	for (; addr < end; addr += PAGE_SIZE, pte++) {
 		if (!add) {
@@ -230,9 +227,6 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
 	pte_t *pte;
 
 	prot = pgprot_val(SEGMENT_KERNEL);
-	if (!MACHINE_HAS_NX)
-		prot &= ~_SEGMENT_ENTRY_NOEXEC;
-
 	pmd = pmd_offset(pud, addr);
 	for (; addr < end; addr = next, pmd++) {
 		next = pmd_addr_end(addr, end);
@@ -324,8 +318,6 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
 	pmd_t *pmd;
 
 	prot = pgprot_val(REGION3_KERNEL);
-	if (!MACHINE_HAS_NX)
-		prot &= ~_REGION_ENTRY_NOEXEC;
 	pud = pud_offset(p4d, addr);
 	for (; addr < end; addr = next, pud++) {
 		next = pud_addr_end(addr, end);
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 2c21f0394c9a..df73c5182990 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -5,6 +5,6 @@
 
 obj-$(CONFIG_PCI)	+= pci.o pci_irq.o pci_clp.o \
 			   pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
-			   pci_bus.o pci_kvm_hook.o
+			   pci_bus.o pci_kvm_hook.o pci_report.o
 obj-$(CONFIG_PCI_IOV)	+= pci_iov.o
 obj-$(CONFIG_SYSFS)	+= pci_sysfs.o
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 7f7b732b3f3e..7bd7721c1239 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -16,6 +16,7 @@
 #include <asm/sclp.h>
 
 #include "pci_bus.h"
+#include "pci_report.h"
 
 /* Content Code Description for PCI Function Error */
 struct zpci_ccdf_err {
@@ -169,6 +170,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
 static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
 {
 	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+	struct zpci_dev *zdev = to_zpci(pdev);
+	char *status_str = "success";
 	struct pci_driver *driver;
 
 	/*
@@ -186,29 +189,37 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
 	if (is_passed_through(pdev)) {
 		pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
 			pci_name(pdev));
+		status_str = "failed (pass-through)";
 		goto out_unlock;
 	}
 
 	driver = to_pci_driver(pdev->dev.driver);
 	if (!is_driver_supported(driver)) {
-		if (!driver)
+		if (!driver) {
 			pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
 				pci_name(pdev));
-		else
+			status_str = "failed (no driver)";
+		} else {
 			pr_info("%s: The %s driver bound to the device does not support error recovery\n",
 				pci_name(pdev),
 				driver->name);
+			status_str = "failed (no driver support)";
+		}
 		goto out_unlock;
 	}
 
 	ers_res = zpci_event_notify_error_detected(pdev, driver);
-	if (ers_result_indicates_abort(ers_res))
+	if (ers_result_indicates_abort(ers_res)) {
+		status_str = "failed (abort on detection)";
 		goto out_unlock;
+	}
 
 	if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
 		ers_res = zpci_event_do_error_state_clear(pdev, driver);
-		if (ers_result_indicates_abort(ers_res))
+		if (ers_result_indicates_abort(ers_res)) {
+			status_str = "failed (abort on MMIO enable)";
 			goto out_unlock;
+		}
 	}
 
 	if (ers_res == PCI_ERS_RESULT_NEED_RESET)
@@ -217,6 +228,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
 	if (ers_res != PCI_ERS_RESULT_RECOVERED) {
 		pr_err("%s: Automatic recovery failed; operator intervention is required\n",
 		       pci_name(pdev));
+		status_str = "failed (driver can't recover)";
 		goto out_unlock;
 	}
 
@@ -225,6 +237,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
 		driver->err_handler->resume(pdev);
 out_unlock:
 	pci_dev_unlock(pdev);
+	zpci_report_status(zdev, "recovery", status_str);
 
 	return ers_res;
 }
diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c
new file mode 100644
index 000000000000..1b494e5ecc4d
--- /dev/null
+++ b/arch/s390/pci/pci_report.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ *   Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/sprintf.h>
+#include <linux/pci.h>
+
+#include <asm/sclp.h>
+#include <asm/debug.h>
+#include <asm/pci_debug.h>
+
+#include "pci_report.h"
+
+#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714
+
+struct zpci_report_error_data {
+	u64 timestamp;
+	u64 err_log_id;
+	char log_data[];
+} __packed;
+
+#define ZPCI_REPORT_SIZE	(PAGE_SIZE - sizeof(struct err_notify_sccb))
+#define ZPCI_REPORT_DATA_SIZE	(ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data))
+
+struct zpci_report_error {
+	struct zpci_report_error_header header;
+	struct zpci_report_error_data data;
+} __packed;
+
+static const char *zpci_state_str(pci_channel_state_t state)
+{
+	switch (state) {
+	case pci_channel_io_normal:
+		return "normal";
+	case pci_channel_io_frozen:
+		return "frozen";
+	case pci_channel_io_perm_failure:
+		return "permanent-failure";
+	default:
+		return "invalid";
+	};
+}
+
+static int debug_log_header_fn(debug_info_t *id, struct debug_view *view,
+			       int area, debug_entry_t *entry, char *out_buf,
+			       size_t out_buf_size)
+{
+	unsigned long sec, usec;
+	unsigned int level;
+	char *except_str;
+	int rc = 0;
+
+	level = entry->level;
+	sec = entry->clock;
+	usec = do_div(sec, USEC_PER_SEC);
+
+	if (entry->exception)
+		except_str = "*";
+	else
+		except_str = "-";
+	rc += scnprintf(out_buf, out_buf_size, "%011ld:%06lu %1u %1s %04u  ",
+			sec, usec, level, except_str,
+			entry->cpu);
+	return rc;
+}
+
+static int debug_prolog_header(debug_info_t *id, struct debug_view *view,
+			       char *out_buf, size_t out_buf_size)
+{
+	return scnprintf(out_buf, out_buf_size, "sec:usec level except cpu  msg\n");
+}
+
+static struct debug_view debug_log_view = {
+	"pci_msg_log",
+	&debug_prolog_header,
+	&debug_log_header_fn,
+	&debug_sprintf_format_fn,
+	NULL,
+	NULL
+};
+
+/**
+ * zpci_report_status - Report the status of operations on a PCI device
+ * @zdev:	The PCI device for which to report status
+ * @operation:	A string representing the operation reported
+ * @status:	A string representing the status of the operation
+ *
+ * This function creates a human readable report about an operation such as
+ * PCI device recovery and forwards this to the platform using the SCLP Write
+ * Event Data mechanism. Besides the operation and status strings the report
+ * also contains additional information about the device deemed useful for
+ * debug such as the currently bound device driver, if any, and error state.
+ * Additionally a string representation of pci_debug_msg_id, or as much as fits,
+ * is also included.
+ *
+ * Return: 0 on success an error code < 0 otherwise.
+ */
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status)
+{
+	struct zpci_report_error *report;
+	struct pci_driver *driver = NULL;
+	struct pci_dev *pdev = NULL;
+	char *buf, *end;
+	int ret;
+
+	if (!zdev || !zdev->zbus)
+		return -ENODEV;
+
+	/* Protected virtualization hosts get nothing from us */
+	if (prot_virt_guest)
+		return -ENODATA;
+
+	report = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!report)
+		return -ENOMEM;
+	if (zdev->zbus->bus)
+		pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+	if (pdev)
+		driver = to_pci_driver(pdev->dev.driver);
+
+	buf = report->data.log_data;
+	end = report->data.log_data + ZPCI_REPORT_DATA_SIZE;
+	buf += scnprintf(buf, end - buf, "report: %s\n", operation);
+	buf += scnprintf(buf, end - buf, "status: %s\n", status);
+	buf += scnprintf(buf, end - buf, "state: %s\n",
+			 (pdev) ? zpci_state_str(pdev->error_state) : "n/a");
+	buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a");
+	ret = debug_dump(pci_debug_msg_id, &debug_log_view, buf, end - buf, true);
+	if (ret < 0)
+		pr_err("Reading PCI debug messages failed with code %d\n", ret);
+	else
+		buf += ret;
+
+	report->header.version = 1;
+	report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG;
+	report->header.length = buf - (char *)&report->data;
+	report->data.timestamp = ktime_get_clocktai_seconds();
+	report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT;
+
+	ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid);
+	if (ret)
+		pr_err("Reporting PCI status failed with code %d\n", ret);
+	else
+		pr_info("Reported PCI device status\n");
+
+	free_page((unsigned long)report);
+
+	return ret;
+}
diff --git a/arch/s390/pci/pci_report.h b/arch/s390/pci/pci_report.h
new file mode 100644
index 000000000000..e08003d51a97
--- /dev/null
+++ b/arch/s390/pci/pci_report.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ *   Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+#ifndef __S390_PCI_REPORT_H
+#define __S390_PCI_REPORT_H
+
+struct zpci_dev;
+
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status);
+
+#endif /* __S390_PCI_REPORT_H */
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 5f46ad58dcd1..2de1ea6c3a8c 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -135,7 +135,7 @@ out:
 static DEVICE_ATTR_WO(recover);
 
 static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *attr, char *buf,
+				const struct bin_attribute *attr, char *buf,
 				loff_t off, size_t count)
 {
 	struct device *dev = kobj_to_dev(kobj);
@@ -145,10 +145,10 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
 	return memory_read_from_buffer(buf, count, &off, zdev->util_str,
 				       sizeof(zdev->util_str));
 }
-static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+static const BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
 
 static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *attr, char *buf,
+				  const struct bin_attribute *attr, char *buf,
 				  loff_t off, size_t count)
 {
 	struct zpci_report_error_header *report = (void *) buf;
@@ -164,7 +164,7 @@ static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
 
 	return ret ? ret : count;
 }
-static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
+static const BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
 
 static ssize_t uid_is_unique_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
@@ -203,7 +203,7 @@ const struct attribute_group zpci_ident_attr_group = {
 	.is_visible = zpci_index_is_visible,
 };
 
-static struct bin_attribute *zpci_bin_attrs[] = {
+static const struct bin_attribute *const zpci_bin_attrs[] = {
 	&bin_attr_util_string,
 	&bin_attr_report_error,
 	NULL,
@@ -227,7 +227,7 @@ static struct attribute *zpci_dev_attrs[] = {
 
 const struct attribute_group zpci_attr_group = {
 	.attrs = zpci_dev_attrs,
-	.bin_attrs = zpci_bin_attrs,
+	.bin_attrs_new = zpci_bin_attrs,
 };
 
 static struct attribute *pfip_attrs[] = {
diff --git a/arch/sh/drivers/push-switch.c b/arch/sh/drivers/push-switch.c
index 1dea43381b5a..2b51ad9d5586 100644
--- a/arch/sh/drivers/push-switch.c
+++ b/arch/sh/drivers/push-switch.c
@@ -110,7 +110,7 @@ static void switch_drv_remove(struct platform_device *pdev)
 
 static struct platform_driver switch_driver = {
 	.probe		= switch_drv_probe,
-	.remove_new	= switch_drv_remove,
+	.remove		= switch_drv_remove,
 	.driver		= {
 		.name	= DRV_NAME,
 	},
diff --git a/arch/sparc/include/asm/parport_64.h b/arch/sparc/include/asm/parport_64.h
index 4f530a270760..3068809ef9ad 100644
--- a/arch/sparc/include/asm/parport_64.h
+++ b/arch/sparc/include/asm/parport_64.h
@@ -243,7 +243,7 @@ static struct platform_driver ecpp_driver = {
 		.of_match_table = ecpp_match,
 	},
 	.probe			= ecpp_probe,
-	.remove_new		= ecpp_remove,
+	.remove			= ecpp_remove,
 };
 
 static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
diff --git a/arch/sparc/kernel/chmc.c b/arch/sparc/kernel/chmc.c
index e02074062001..d4c74d6b2e1b 100644
--- a/arch/sparc/kernel/chmc.c
+++ b/arch/sparc/kernel/chmc.c
@@ -814,7 +814,7 @@ static struct platform_driver us3mc_driver = {
 		.of_match_table = us3mc_match,
 	},
 	.probe		= us3mc_probe,
-	.remove_new	= us3mc_remove,
+	.remove		= us3mc_remove,
 };
 
 static inline bool us3mc_platform(void)
diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c
index 3a1582219c4b..134a58f93c85 100644
--- a/arch/um/drivers/rtc_kern.c
+++ b/arch/um/drivers/rtc_kern.c
@@ -176,7 +176,7 @@ static void uml_rtc_remove(struct platform_device *pdev)
 
 static struct platform_driver uml_rtc_driver = {
 	.probe = uml_rtc_probe,
-	.remove_new = uml_rtc_remove,
+	.remove = uml_rtc_remove,
 	.driver = {
 		.name = "uml-rtc",
 	},
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 66c1a8835e36..0b1e61f72fb3 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -865,7 +865,6 @@ static int ubd_add(int n, char **error_out)
 	ubd_dev->tag_set.ops = &ubd_mq_ops;
 	ubd_dev->tag_set.queue_depth = 64;
 	ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
-	ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 	ubd_dev->tag_set.driver_data = ubd_dev;
 	ubd_dev->tag_set.nr_hw_queues = 1;
 
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
index cc3be48a9d6e..65df43fa9be5 100644
--- a/arch/um/drivers/virtio_uml.c
+++ b/arch/um/drivers/virtio_uml.c
@@ -1465,7 +1465,7 @@ static int virtio_uml_resume(struct platform_device *pdev)
 
 static struct platform_driver virtio_uml_driver = {
 	.probe = virtio_uml_probe,
-	.remove_new = virtio_uml_remove,
+	.remove = virtio_uml_remove,
 	.driver = {
 		.name = "virtio-uml",
 		.of_match_table = virtio_uml_match,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9d7bd0ae48c4..ef6cfea9df73 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -83,7 +83,6 @@ config X86
 	select ARCH_HAS_DMA_OPS			if GART_IOMMU || XEN
 	select ARCH_HAS_EARLY_DEBUG		if KGDB
 	select ARCH_HAS_ELF_RANDOMIZE
-	select ARCH_HAS_EXECMEM_ROX		if X86_64
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index bb284aff7bfd..99c590da0ae2 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -429,6 +429,16 @@ static struct event_constraint intel_lnc_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
+static struct extra_reg intel_lnc_extra_regs[] __read_mostly = {
+	INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0xfffffffffffull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0xfffffffffffull, RSP_1),
+	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+	INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE),
+	INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
+	INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0xf, FE),
+	INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
+	EVENT_EXTRA_END
+};
 
 EVENT_ATTR_STR(mem-loads,	mem_ld_nhm,	"event=0x0b,umask=0x10,ldlat=3");
 EVENT_ATTR_STR(mem-loads,	mem_ld_snb,	"event=0xcd,umask=0x1,ldlat=3");
@@ -6422,7 +6432,7 @@ static __always_inline void intel_pmu_init_lnc(struct pmu *pmu)
 	intel_pmu_init_glc(pmu);
 	hybrid(pmu, event_constraints) = intel_lnc_event_constraints;
 	hybrid(pmu, pebs_constraints) = intel_lnc_pebs_event_constraints;
-	hybrid(pmu, extra_regs) = intel_rwc_extra_regs;
+	hybrid(pmu, extra_regs) = intel_lnc_extra_regs;
 }
 
 static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
@@ -7135,6 +7145,7 @@ __init int intel_pmu_init(void)
 
 	case INTEL_METEORLAKE:
 	case INTEL_METEORLAKE_L:
+	case INTEL_ARROWLAKE_U:
 		intel_pmu_init_hybrid(hybrid_big_small);
 
 		x86_pmu.pebs_latency_data = cmt_latency_data;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8afc4ad3cd16..6ba6549f26fa 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1489,7 +1489,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 			 * hence we need to drain when changing said
 			 * size.
 			 */
-			intel_pmu_drain_large_pebs(cpuc);
+			intel_pmu_drain_pebs_buffer();
 			adaptive_pebs_record_size_update();
 			wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg);
 			cpuc->active_pebs_data_cfg = pebs_data_cfg;
@@ -2517,6 +2517,7 @@ void __init intel_ds_init(void)
 			x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
 			break;
 
+		case 6:
 		case 5:
 			x86_pmu.pebs_ept = 1;
 			fallthrough;
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index d98fac567684..e7aba7349231 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1910,6 +1910,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 	X86_MATCH_VFM(INTEL_ATOM_GRACEMONT,	&adl_uncore_init),
 	X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X,	&gnr_uncore_init),
 	X86_MATCH_VFM(INTEL_ATOM_CRESTMONT,	&gnr_uncore_init),
+	X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X,	&gnr_uncore_init),
 	{},
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 17b6590748c0..645aa360628d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -452,6 +452,7 @@
 #define X86_FEATURE_SME_COHERENT	(19*32+10) /* AMD hardware-enforced cache coherency */
 #define X86_FEATURE_DEBUG_SWAP		(19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
 #define X86_FEATURE_SVSM		(19*32+28) /* "svsm" SVSM present */
+#define X86_FEATURE_HV_INUSE_WR_ALLOWED	(19*32+30) /* Allow Write to in-use hypervisor-owned pages */
 
 /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
 #define X86_FEATURE_NO_NESTED_DATA_BP	(20*32+ 0) /* No Nested Data Breakpoints */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 6f82e75b6149..4b804531b03c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -36,10 +36,12 @@
 #define _PAGE_BIT_DEVMAP	_PAGE_BIT_SOFTW4
 
 #ifdef CONFIG_X86_64
-#define _PAGE_BIT_SAVED_DIRTY	_PAGE_BIT_SOFTW5 /* Saved Dirty bit */
+#define _PAGE_BIT_SAVED_DIRTY	_PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */
+#define _PAGE_BIT_NOPTISHADOW	_PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */
 #else
 /* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
-#define _PAGE_BIT_SAVED_DIRTY	_PAGE_BIT_SOFTW2 /* Saved Dirty bit */
+#define _PAGE_BIT_SAVED_DIRTY	_PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */
+#define _PAGE_BIT_NOPTISHADOW	_PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */
 #endif
 
 /* If _PAGE_BIT_PRESENT is clear, we use these: */
@@ -139,6 +141,8 @@
 
 #define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
 
+#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW)
+
 /*
  * Set of bits not changed in pte_modify.  The pte's
  * protection key is treated like _PAGE_RW, for
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c0975815980c..20e6009381ed 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -230,6 +230,8 @@ static inline unsigned long long l1tf_pfn_limit(void)
 	return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
 }
 
+void init_cpu_devs(void);
+void get_cpu_vendor(struct cpuinfo_x86 *c);
 extern void early_cpu_init(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index bab490379c65..2de1e5a75c57 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -13,30 +13,26 @@
 #include <asm/cpufeatures.h>
 #include <asm/alternative.h>
 
-/* "Raw" instruction opcodes */
-#define __ASM_CLAC	".byte 0x0f,0x01,0xca"
-#define __ASM_STAC	".byte 0x0f,0x01,0xcb"
-
 #ifdef __ASSEMBLY__
 
 #define ASM_CLAC \
-	ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP
+	ALTERNATIVE "", "clac", X86_FEATURE_SMAP
 
 #define ASM_STAC \
-	ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP
+	ALTERNATIVE "", "stac", X86_FEATURE_SMAP
 
 #else /* __ASSEMBLY__ */
 
 static __always_inline void clac(void)
 {
 	/* Note: a barrier is implicit in alternative() */
-	alternative("", __ASM_CLAC, X86_FEATURE_SMAP);
+	alternative("", "clac", X86_FEATURE_SMAP);
 }
 
 static __always_inline void stac(void)
 {
 	/* Note: a barrier is implicit in alternative() */
-	alternative("", __ASM_STAC, X86_FEATURE_SMAP);
+	alternative("", "stac", X86_FEATURE_SMAP);
 }
 
 static __always_inline unsigned long smap_save(void)
@@ -44,7 +40,7 @@ static __always_inline unsigned long smap_save(void)
 	unsigned long flags;
 
 	asm volatile ("# smap_save\n\t"
-		      ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC "\n\t",
+		      ALTERNATIVE("", "pushf; pop %0; " "clac" "\n\t",
 				  X86_FEATURE_SMAP)
 		      : "=rm" (flags) : : "memory", "cc");
 
@@ -61,9 +57,9 @@ static __always_inline void smap_restore(unsigned long flags)
 
 /* These macros can be used in asm() statements */
 #define ASM_CLAC \
-	ALTERNATIVE("", __ASM_CLAC, X86_FEATURE_SMAP)
+	ALTERNATIVE("", "clac", X86_FEATURE_SMAP)
 #define ASM_STAC \
-	ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP)
+	ALTERNATIVE("", "stac", X86_FEATURE_SMAP)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index aec6e2d3aa1d..98bfc097389c 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -217,7 +217,7 @@ fail:
 
 #define nop() asm volatile ("nop")
 
-static inline void serialize(void)
+static __always_inline void serialize(void)
 {
 	/* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
 	asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 125c407e2abe..41502bd2afd6 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -65,4 +65,19 @@
 
 extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
 
+extern void __static_call_update_early(void *tramp, void *func);
+
+#define static_call_update_early(name, _func)				\
+({									\
+	typeof(&STATIC_CALL_TRAMP(name)) __F = (_func);			\
+	if (static_call_initialized) {					\
+		__static_call_update(&STATIC_CALL_KEY(name),		\
+				     STATIC_CALL_TRAMP_ADDR(name), __F);\
+	} else {							\
+		WRITE_ONCE(STATIC_CALL_KEY(name).func, _func);		\
+		__static_call_update_early(STATIC_CALL_TRAMP_ADDR(name),\
+					   __F);			\
+	}								\
+})
+
 #endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index ab7382f92aff..96bda43538ee 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -8,7 +8,7 @@
 #include <asm/special_insns.h>
 
 #ifdef CONFIG_X86_32
-static inline void iret_to_self(void)
+static __always_inline void iret_to_self(void)
 {
 	asm volatile (
 		"pushfl\n\t"
@@ -19,7 +19,7 @@ static inline void iret_to_self(void)
 		: ASM_CALL_CONSTRAINT : : "memory");
 }
 #else
-static inline void iret_to_self(void)
+static __always_inline void iret_to_self(void)
 {
 	unsigned int tmp;
 
@@ -55,7 +55,7 @@ static inline void iret_to_self(void)
  * Like all of Linux's memory ordering operations, this is a
  * compiler barrier as well.
  */
-static inline void sync_core(void)
+static __always_inline void sync_core(void)
 {
 	/*
 	 * The SERIALIZE instruction is the most straightforward way to
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index b0a887209400..c52f0133425b 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -63,13 +63,13 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
  */
 static inline void __user *mask_user_address(const void __user *ptr)
 {
-	unsigned long mask;
+	void __user *ret;
 	asm("cmp %1,%0\n\t"
-	    "sbb %0,%0"
-		:"=r" (mask)
-		:"r" (ptr),
-		 "0" (runtime_const_ptr(USER_PTR_MAX)));
-	return (__force void __user *)(mask | (__force unsigned long)ptr);
+	    "cmova %1,%0"
+		:"=r" (ret)
+		:"r" (runtime_const_ptr(USER_PTR_MAX)),
+		 "0" (ptr));
+	return ret;
 }
 #define masked_user_access_begin(x) ({				\
 	__auto_type __masked_ptr = (x);				\
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a2dd24947eb8..97771b9d33af 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -39,9 +39,11 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/pgtable.h>
+#include <linux/instrumentation.h>
 
 #include <trace/events/xen.h>
 
+#include <asm/alternative.h>
 #include <asm/page.h>
 #include <asm/smap.h>
 #include <asm/nospec-branch.h>
@@ -86,11 +88,20 @@ struct xen_dm_op_buf;
  * there aren't more than 5 arguments...)
  */
 
-extern struct { char _entry[32]; } hypercall_page[];
+void xen_hypercall_func(void);
+DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func);
 
-#define __HYPERCALL		"call hypercall_page+%c[offset]"
-#define __HYPERCALL_ENTRY(x)						\
-	[offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0]))
+#ifdef MODULE
+#define __ADDRESSABLE_xen_hypercall
+#else
+#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall)
+#endif
+
+#define __HYPERCALL					\
+	__ADDRESSABLE_xen_hypercall			\
+	"call __SCT__xen_hypercall"
+
+#define __HYPERCALL_ENTRY(x)	"a" (x)
 
 #ifdef CONFIG_X86_32
 #define __HYPERCALL_RETREG	"eax"
@@ -148,7 +159,7 @@ extern struct { char _entry[32]; } hypercall_page[];
 	__HYPERCALL_0ARG();						\
 	asm volatile (__HYPERCALL					\
 		      : __HYPERCALL_0PARAM				\
-		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name)	\
 		      : __HYPERCALL_CLOBBER0);				\
 	(type)__res;							\
 })
@@ -159,7 +170,7 @@ extern struct { char _entry[32]; } hypercall_page[];
 	__HYPERCALL_1ARG(a1);						\
 	asm volatile (__HYPERCALL					\
 		      : __HYPERCALL_1PARAM				\
-		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name)	\
 		      : __HYPERCALL_CLOBBER1);				\
 	(type)__res;							\
 })
@@ -170,7 +181,7 @@ extern struct { char _entry[32]; } hypercall_page[];
 	__HYPERCALL_2ARG(a1, a2);					\
 	asm volatile (__HYPERCALL					\
 		      : __HYPERCALL_2PARAM				\
-		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name)	\
 		      : __HYPERCALL_CLOBBER2);				\
 	(type)__res;							\
 })
@@ -181,7 +192,7 @@ extern struct { char _entry[32]; } hypercall_page[];
 	__HYPERCALL_3ARG(a1, a2, a3);					\
 	asm volatile (__HYPERCALL					\
 		      : __HYPERCALL_3PARAM				\
-		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name)	\
 		      : __HYPERCALL_CLOBBER3);				\
 	(type)__res;							\
 })
@@ -192,7 +203,7 @@ extern struct { char _entry[32]; } hypercall_page[];
 	__HYPERCALL_4ARG(a1, a2, a3, a4);				\
 	asm volatile (__HYPERCALL					\
 		      : __HYPERCALL_4PARAM				\
-		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name)	\
 		      : __HYPERCALL_CLOBBER4);				\
 	(type)__res;							\
 })
@@ -206,12 +217,9 @@ xen_single_call(unsigned int call,
 	__HYPERCALL_DECLS;
 	__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
 
-	if (call >= PAGE_SIZE / sizeof(hypercall_page[0]))
-		return -EINVAL;
-
-	asm volatile(CALL_NOSPEC
+	asm volatile(__HYPERCALL
 		     : __HYPERCALL_5PARAM
-		     : [thunk_target] "a" (&hypercall_page[call])
+		     : __HYPERCALL_ENTRY(call)
 		     : __HYPERCALL_CLOBBER5);
 
 	return (long)__res;
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 465647456753..f17d16607882 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -143,11 +143,6 @@ static bool skip_addr(void *dest)
 	    dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE)
 		return true;
 #endif
-#ifdef CONFIG_XEN
-	if (dest >= (void *)hypercall_page &&
-	    dest < (void*)hypercall_page + PAGE_SIZE)
-		return true;
-#endif
 	return false;
 }
 
diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c
index d2c732a34e5d..303bf74d175b 100644
--- a/arch/x86/kernel/cet.c
+++ b/arch/x86/kernel/cet.c
@@ -81,6 +81,34 @@ static void do_user_cp_fault(struct pt_regs *regs, unsigned long error_code)
 
 static __ro_after_init bool ibt_fatal = true;
 
+/*
+ * By definition, all missing-ENDBRANCH #CPs are a result of WFE && !ENDBR.
+ *
+ * For the kernel IBT no ENDBR selftest where #CPs are deliberately triggered,
+ * the WFE state of the interrupted context needs to be cleared to let execution
+ * continue.  Otherwise when the CPU resumes from the instruction that just
+ * caused the previous #CP, another missing-ENDBRANCH #CP is raised and the CPU
+ * enters a dead loop.
+ *
+ * This is not a problem with IDT because it doesn't preserve WFE and IRET doesn't
+ * set WFE.  But FRED provides space on the entry stack (in an expanded CS area)
+ * to save and restore the WFE state, thus the WFE state is no longer clobbered,
+ * so software must clear it.
+ */
+static void ibt_clear_fred_wfe(struct pt_regs *regs)
+{
+	/*
+	 * No need to do any FRED checks.
+	 *
+	 * For IDT event delivery, the high-order 48 bits of CS are pushed
+	 * as 0s into the stack, and later IRET ignores these bits.
+	 *
+	 * For FRED, a test to check if fred_cs.wfe is set would be dropped
+	 * by compilers.
+	 */
+	regs->fred_cs.wfe = 0;
+}
+
 static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	if ((error_code & CP_EC) != CP_ENDBR) {
@@ -90,6 +118,7 @@ static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code)
 
 	if (unlikely(regs->ip == (unsigned long)&ibt_selftest_noendbr)) {
 		regs->ax = 0;
+		ibt_clear_fred_wfe(regs);
 		return;
 	}
 
@@ -97,6 +126,7 @@ static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code)
 	if (!ibt_fatal) {
 		printk(KERN_DEFAULT CUT_HERE);
 		__warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL);
+		ibt_clear_fred_wfe(regs);
 		return;
 	}
 	BUG();
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d8408aafeed9..79d2e17f6582 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1065,7 +1065,7 @@ static void init_amd(struct cpuinfo_x86 *c)
 	 */
 	if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
 	    cpu_has(c, X86_FEATURE_AUTOIBRS))
-		WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
+		WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS) < 0);
 
 	/* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
 	clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 392d09c936d6..e6fa03ed9172 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -178,8 +178,6 @@ struct _cpuid4_info_regs {
 	struct amd_northbridge *nb;
 };
 
-static unsigned short num_cache_leaves;
-
 /* AMD doesn't have CPUID4. Emulate it here to report the same
    information to the user.  This makes some assumptions about the machine:
    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
@@ -717,20 +715,23 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
 
 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 {
+	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
 
 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
-		num_cache_leaves = find_num_cache_leaves(c);
+		ci->num_leaves = find_num_cache_leaves(c);
 	} else if (c->extended_cpuid_level >= 0x80000006) {
 		if (cpuid_edx(0x80000006) & 0xf000)
-			num_cache_leaves = 4;
+			ci->num_leaves = 4;
 		else
-			num_cache_leaves = 3;
+			ci->num_leaves = 3;
 	}
 }
 
 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
 {
-	num_cache_leaves = find_num_cache_leaves(c);
+	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
+
+	ci->num_leaves = find_num_cache_leaves(c);
 }
 
 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
@@ -740,21 +741,21 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
+	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
 
 	if (c->cpuid_level > 3) {
-		static int is_initialized;
-
-		if (is_initialized == 0) {
-			/* Init num_cache_leaves from boot CPU */
-			num_cache_leaves = find_num_cache_leaves(c);
-			is_initialized++;
-		}
+		/*
+		 * There should be at least one leaf. A non-zero value means
+		 * that the number of leaves has been initialized.
+		 */
+		if (!ci->num_leaves)
+			ci->num_leaves = find_num_cache_leaves(c);
 
 		/*
 		 * Whenever possible use cpuid(4), deterministic cache
 		 * parameters cpuid leaf to find the cache details
 		 */
-		for (i = 0; i < num_cache_leaves; i++) {
+		for (i = 0; i < ci->num_leaves; i++) {
 			struct _cpuid4_info_regs this_leaf = {};
 			int retval;
 
@@ -790,14 +791,14 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 	 * trace cache
 	 */
-	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
+	if ((!ci->num_leaves || c->x86 == 15) && c->cpuid_level > 1) {
 		/* supports eax=2  call */
 		int j, n;
 		unsigned int regs[4];
 		unsigned char *dp = (unsigned char *)regs;
 		int only_trace = 0;
 
-		if (num_cache_leaves != 0 && c->x86 == 15)
+		if (ci->num_leaves && c->x86 == 15)
 			only_trace = 1;
 
 		/* Number of times to iterate */
@@ -991,14 +992,12 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 
 int init_cache_level(unsigned int cpu)
 {
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
 
-	if (!num_cache_leaves)
+	/* There should be at least one leaf. */
+	if (!ci->num_leaves)
 		return -ENOENT;
-	if (!this_cpu_ci)
-		return -EINVAL;
-	this_cpu_ci->num_levels = 3;
-	this_cpu_ci->num_leaves = num_cache_leaves;
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a5c28975c608..3e9037690814 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -867,7 +867,7 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c)
 		tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
 }
 
-static void get_cpu_vendor(struct cpuinfo_x86 *c)
+void get_cpu_vendor(struct cpuinfo_x86 *c)
 {
 	char *v = c->x86_vendor_id;
 	int i;
@@ -1649,15 +1649,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	detect_nopl();
 }
 
-void __init early_cpu_init(void)
+void __init init_cpu_devs(void)
 {
 	const struct cpu_dev *const *cdev;
 	int count = 0;
 
-#ifdef CONFIG_PROCESSOR_SELECT
-	pr_info("KERNEL supported cpus:\n");
-#endif
-
 	for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
 		const struct cpu_dev *cpudev = *cdev;
 
@@ -1665,20 +1661,30 @@ void __init early_cpu_init(void)
 			break;
 		cpu_devs[count] = cpudev;
 		count++;
+	}
+}
 
+void __init early_cpu_init(void)
+{
 #ifdef CONFIG_PROCESSOR_SELECT
-		{
-			unsigned int j;
-
-			for (j = 0; j < 2; j++) {
-				if (!cpudev->c_ident[j])
-					continue;
-				pr_info("  %s %s\n", cpudev->c_vendor,
-					cpudev->c_ident[j]);
-			}
-		}
+	unsigned int i, j;
+
+	pr_info("KERNEL supported cpus:\n");
 #endif
+
+	init_cpu_devs();
+
+#ifdef CONFIG_PROCESSOR_SELECT
+	for (i = 0; i < X86_VENDOR_NUM && cpu_devs[i]; i++) {
+		for (j = 0; j < 2; j++) {
+			if (!cpu_devs[i]->c_ident[j])
+				continue;
+			pr_info("  %s %s\n", cpu_devs[i]->c_vendor,
+				cpu_devs[i]->c_ident[j]);
+		}
 	}
+#endif
+
 	early_identify_cpu(&boot_cpu_data);
 }
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d1de300af173..8ded9f859a3a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -555,7 +555,9 @@ static void init_intel(struct cpuinfo_x86 *c)
 	     c->x86_vfm == INTEL_WESTMERE_EX))
 		set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
 
-	if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
+	if (boot_cpu_has(X86_FEATURE_MWAIT) &&
+	    (c->x86_vfm == INTEL_ATOM_GOLDMONT ||
+	     c->x86_vfm == INTEL_LUNARLAKE_M))
 		set_cpu_bug(c, X86_BUG_MONITOR);
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index d18078834ded..dc12fe5ef3ca 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -223,6 +223,63 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs)
 	hyperv_cleanup();
 }
 #endif /* CONFIG_CRASH_DUMP */
+
+static u64 hv_ref_counter_at_suspend;
+static void (*old_save_sched_clock_state)(void);
+static void (*old_restore_sched_clock_state)(void);
+
+/*
+ * Hyper-V clock counter resets during hibernation. Save and restore clock
+ * offset during suspend/resume, while also considering the time passed
+ * before suspend. This is to make sure that sched_clock using hv tsc page
+ * based clocksource, proceeds from where it left off during suspend and
+ * it shows correct time for the timestamps of kernel messages after resume.
+ */
+static void save_hv_clock_tsc_state(void)
+{
+	hv_ref_counter_at_suspend = hv_read_reference_counter();
+}
+
+static void restore_hv_clock_tsc_state(void)
+{
+	/*
+	 * Adjust the offsets used by hv tsc clocksource to
+	 * account for the time spent before hibernation.
+	 * adjusted value = reference counter (time) at suspend
+	 *                - reference counter (time) now.
+	 */
+	hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter());
+}
+
+/*
+ * Functions to override save_sched_clock_state and restore_sched_clock_state
+ * functions of x86_platform. The Hyper-V clock counter is reset during
+ * suspend-resume and the offset used to measure time needs to be
+ * corrected, post resume.
+ */
+static void hv_save_sched_clock_state(void)
+{
+	old_save_sched_clock_state();
+	save_hv_clock_tsc_state();
+}
+
+static void hv_restore_sched_clock_state(void)
+{
+	restore_hv_clock_tsc_state();
+	old_restore_sched_clock_state();
+}
+
+static void __init x86_setup_ops_for_tsc_pg_clock(void)
+{
+	if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
+		return;
+
+	old_save_sched_clock_state = x86_platform.save_sched_clock_state;
+	x86_platform.save_sched_clock_state = hv_save_sched_clock_state;
+
+	old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
+	x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
+}
 #endif /* CONFIG_HYPERV */
 
 static uint32_t  __init ms_hyperv_platform(void)
@@ -579,6 +636,7 @@ static void __init ms_hyperv_init_platform(void)
 
 	/* Register Hyper-V specific clocksource */
 	hv_init_clocksource();
+	x86_setup_ops_for_tsc_pg_clock();
 	hv_vtl_init_platform();
 #endif
 	/*
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 621a151ccf7d..b2e313ea17bf 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -428,8 +428,8 @@ void __init topology_apply_cmdline_limits_early(void)
 {
 	unsigned int possible = nr_cpu_ids;
 
-	/* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
-	if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
+	/* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' */
+	if (!setup_max_cpus || apic_is_disabled)
 		possible = 1;
 
 	/* 'possible_cpus=N' */
@@ -443,7 +443,7 @@ void __init topology_apply_cmdline_limits_early(void)
 
 static __init bool restrict_to_up(void)
 {
-	if (!smp_found_config || ioapic_is_disabled)
+	if (!smp_found_config)
 		return true;
 	/*
 	 * XEN PV is special as it does not advertise the local APIC
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 6bc1eb2a21bd..887b0b8e21e3 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -190,7 +190,8 @@ int ssp_get(struct task_struct *target, const struct user_regset *regset,
 	struct fpu *fpu = &target->thread.fpu;
 	struct cet_user_state *cetregs;
 
-	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+	if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+	    !ssp_active(target, regset))
 		return -ENODEV;
 
 	sync_fpstate(fpu);
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 1065ab995305..8f62e0666dea 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -64,16 +64,6 @@ setfx:
 }
 
 /*
- * Update the value of PKRU register that was already pushed onto the signal frame.
- */
-static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru)
-{
-	if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
-		return 0;
-	return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU));
-}
-
-/*
  * Signal frame handlers.
  */
 static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf)
@@ -168,14 +158,8 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
 
 static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru)
 {
-	int err = 0;
-
-	if (use_xsave()) {
-		err = xsave_to_user_sigframe(buf);
-		if (!err)
-			err = update_pkru_in_sigframe(buf, pkru);
-		return err;
-	}
+	if (use_xsave())
+		return xsave_to_user_sigframe(buf, pkru);
 
 	if (use_fxsr())
 		return fxsave_to_user_sigframe((struct fxregs_state __user *) buf);
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 0b86a5002c84..aa16f1a1bbcf 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -69,6 +69,28 @@ static inline u64 xfeatures_mask_independent(void)
 	return fpu_kernel_cfg.independent_features;
 }
 
+/*
+ * Update the value of PKRU register that was already pushed onto the signal frame.
+ */
+static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 mask, u32 pkru)
+{
+	u64 xstate_bv;
+	int err;
+
+	if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
+		return 0;
+
+	/* Mark PKRU as in-use so that it is restored correctly. */
+	xstate_bv = (mask & xfeatures_in_use()) | XFEATURE_MASK_PKRU;
+
+	err =  __put_user(xstate_bv, &buf->header.xfeatures);
+	if (err)
+		return err;
+
+	/* Update PKRU value in the userspace xsave buffer. */
+	return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU));
+}
+
 /* XSAVE/XRSTOR wrapper functions */
 
 #ifdef CONFIG_X86_64
@@ -256,7 +278,7 @@ static inline u64 xfeatures_need_sigframe_write(void)
  * The caller has to zero buf::header before calling this because XSAVE*
  * does not touch the reserved fields in the header.
  */
-static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
+static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkru)
 {
 	/*
 	 * Include the features which are not xsaved/rstored by the kernel
@@ -281,6 +303,9 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
 	XSTATE_OP(XSAVE, buf, lmask, hmask, err);
 	clac();
 
+	if (!err)
+		err = update_pkru_in_sigframe(buf, mask, pkru);
+
 	return err;
 }
 
diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c
index 8d32c3f48abc..5e2cd1004980 100644
--- a/arch/x86/kernel/fred.c
+++ b/arch/x86/kernel/fred.c
@@ -50,7 +50,13 @@ void cpu_init_fred_exceptions(void)
 	       FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user));
 
 	wrmsrl(MSR_IA32_FRED_STKLVLS, 0);
-	wrmsrl(MSR_IA32_FRED_RSP0, 0);
+
+	/*
+	 * Ater a CPU offline/online cycle, the FRED RSP0 MSR should be
+	 * resynchronized with its per-CPU cache.
+	 */
+	wrmsrl(MSR_IA32_FRED_RSP0, __this_cpu_read(fred_rsp0));
+
 	wrmsrl(MSR_IA32_FRED_RSP1, 0);
 	wrmsrl(MSR_IA32_FRED_RSP2, 0);
 	wrmsrl(MSR_IA32_FRED_RSP3, 0);
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index e9e88c342f75..540443d699e3 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -13,6 +13,7 @@
 #include <asm/pgtable_types.h>
 #include <asm/nospec-branch.h>
 #include <asm/unwind_hints.h>
+#include <asm/asm-offsets.h>
 
 /*
  * Must be relocatable PIC code callable as a C function, in particular
@@ -242,6 +243,13 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
 	movq	CR0(%r8), %r8
 	movq	%rax, %cr3
 	movq	%r8, %cr0
+
+#ifdef CONFIG_KEXEC_JUMP
+	/* Saved in save_processor_state. */
+	movq    $saved_context, %rax
+	lgdt    saved_context_gdt_desc(%rax)
+#endif
+
 	movq	%rbp, %rax
 
 	popf
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index 4eefaac64c6c..9e51242ed125 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -172,6 +172,14 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
 }
 EXPORT_SYMBOL_GPL(arch_static_call_transform);
 
+noinstr void __static_call_update_early(void *tramp, void *func)
+{
+	BUG_ON(system_state != SYSTEM_BOOTING);
+	BUG_ON(static_call_initialized);
+	__text_gen_insn(tramp, JMP32_INSN_OPCODE, tramp, func, JMP32_INSN_SIZE);
+	sync_core();
+}
+
 #ifdef CONFIG_MITIGATION_RETHUNK
 /*
  * This is called by apply_returns() to fix up static call trampolines,
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index fab3ac9a4574..6a17396c8174 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -519,14 +519,10 @@ INIT_PER_CPU(irq_stack_backing_store);
  * linker will never mark as relocatable. (Using just ABSOLUTE() is not
  * sufficient for that).
  */
-#ifdef CONFIG_XEN
 #ifdef CONFIG_XEN_PV
 xen_elfnote_entry_value =
 	ABSOLUTE(xen_elfnote_entry) + ABSOLUTE(startup_xen);
 #endif
-xen_elfnote_hypercall_page_value =
-	ABSOLUTE(xen_elfnote_hypercall_page) + ABSOLUTE(hypercall_page);
-#endif
 #ifdef CONFIG_PVH
 xen_elfnote_phys32_entry_value =
 	ABSOLUTE(xen_elfnote_phys32_entry) + ABSOLUTE(pvh_start_xen - LOAD_OFFSET);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 097bdc022d0f..ae0b438a2c99 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -36,6 +36,26 @@
 u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
 EXPORT_SYMBOL_GPL(kvm_cpu_caps);
 
+struct cpuid_xstate_sizes {
+	u32 eax;
+	u32 ebx;
+	u32 ecx;
+};
+
+static struct cpuid_xstate_sizes xstate_sizes[XFEATURE_MAX] __ro_after_init;
+
+void __init kvm_init_xstate_sizes(void)
+{
+	u32 ign;
+	int i;
+
+	for (i = XFEATURE_YMM; i < ARRAY_SIZE(xstate_sizes); i++) {
+		struct cpuid_xstate_sizes *xs = &xstate_sizes[i];
+
+		cpuid_count(0xD, i, &xs->eax, &xs->ebx, &xs->ecx, &ign);
+	}
+}
+
 u32 xstate_required_size(u64 xstate_bv, bool compacted)
 {
 	int feature_bit = 0;
@@ -44,14 +64,15 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
 	xstate_bv &= XFEATURE_MASK_EXTEND;
 	while (xstate_bv) {
 		if (xstate_bv & 0x1) {
-		        u32 eax, ebx, ecx, edx, offset;
-		        cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
+			struct cpuid_xstate_sizes *xs = &xstate_sizes[feature_bit];
+			u32 offset;
+
 			/* ECX[1]: 64B alignment in compacted form */
 			if (compacted)
-				offset = (ecx & 0x2) ? ALIGN(ret, 64) : ret;
+				offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret;
 			else
-				offset = ebx;
-			ret = max(ret, offset + eax);
+				offset = xs->ebx;
+			ret = max(ret, offset + xs->eax);
 		}
 
 		xstate_bv >>= 1;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c8dc66eddefd..f16a7b2c2adc 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -31,6 +31,7 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
 bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
 	       u32 *ecx, u32 *edx, bool exact_only);
 
+void __init kvm_init_xstate_sizes(void);
 u32 xstate_required_size(u64 xstate_bv, bool compacted);
 
 int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 22e7ad235123..2401606db260 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3364,18 +3364,6 @@ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu,
 	return true;
 }
 
-static bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
-{
-	if (fault->exec)
-		return is_executable_pte(spte);
-
-	if (fault->write)
-		return is_writable_pte(spte);
-
-	/* Fault was on Read access */
-	return spte & PT_PRESENT_MASK;
-}
-
 /*
  * Returns the last level spte pointer of the shadow page walk for the given
  * gpa, and sets *spte to the spte value. This spte may be non-preset. If no
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index f332b33bc817..af10bc0380a3 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -462,6 +462,23 @@ static inline bool is_mmu_writable_spte(u64 spte)
 }
 
 /*
+ * Returns true if the access indicated by @fault is allowed by the existing
+ * SPTE protections.  Note, the caller is responsible for checking that the
+ * SPTE is a shadow-present, leaf SPTE (either before or after).
+ */
+static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
+{
+	if (fault->exec)
+		return is_executable_pte(spte);
+
+	if (fault->write)
+		return is_writable_pte(spte);
+
+	/* Fault was on Read access */
+	return spte & PT_PRESENT_MASK;
+}
+
+/*
  * If the MMU-writable flag is cleared, i.e. the SPTE is write-protected for
  * write-tracking, remote TLBs must be flushed, even if the SPTE was read-only,
  * as KVM allows stale Writable TLB entries to exist.  When dirty logging, KVM
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 4508d868f1cd..2f15e0e33903 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -985,6 +985,11 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
 	if (fault->prefetch && is_shadow_present_pte(iter->old_spte))
 		return RET_PF_SPURIOUS;
 
+	if (is_shadow_present_pte(iter->old_spte) &&
+	    is_access_allowed(fault, iter->old_spte) &&
+	    is_last_spte(iter->old_spte, iter->level))
+		return RET_PF_SPURIOUS;
+
 	if (unlikely(!fault->slot))
 		new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
 	else
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 4b74ea91f4e6..65fd245a9953 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -1199,6 +1199,12 @@ bool avic_hardware_setup(void)
 		return false;
 	}
 
+	if (cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
+	    !boot_cpu_has(X86_FEATURE_HV_INUSE_WR_ALLOWED)) {
+		pr_warn("AVIC disabled: missing HvInUseWrAllowed on SNP-enabled system\n");
+		return false;
+	}
+
 	if (boot_cpu_has(X86_FEATURE_AVIC)) {
 		pr_info("AVIC enabled\n");
 	} else if (force_avic) {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index dd15cc635655..21dacd312779 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3201,15 +3201,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		if (data & ~supported_de_cfg)
 			return 1;
 
-		/*
-		 * Don't let the guest change the host-programmed value.  The
-		 * MSR is very model specific, i.e. contains multiple bits that
-		 * are completely unknown to KVM, and the one bit known to KVM
-		 * is simply a reflection of hardware capabilities.
-		 */
-		if (!msr->host_initiated && data != svm->msr_decfg)
-			return 1;
-
 		svm->msr_decfg = data;
 		break;
 	}
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index 1715d2ab07be..ad9116a99bcc 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -2,7 +2,7 @@
 #ifndef __KVM_X86_VMX_POSTED_INTR_H
 #define __KVM_X86_VMX_POSTED_INTR_H
 
-#include <linux/find.h>
+#include <linux/bitmap.h>
 #include <asm/posted_intr.h>
 
 void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2e713480933a..c79a8cc57ba4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9976,7 +9976,7 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
 {
 	u64 ret = vcpu->run->hypercall.ret;
 
-	if (!is_64_bit_mode(vcpu))
+	if (!is_64_bit_hypercall(vcpu))
 		ret = (u32)ret;
 	kvm_rax_write(vcpu, ret);
 	++vcpu->stat.hypercalls;
@@ -12724,6 +12724,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_hv_init_vm(kvm);
 	kvm_xen_init_vm(kvm);
 
+	if (ignore_msrs && !report_ignored_msrs) {
+		pr_warn_once("Running KVM with ignore_msrs=1 and report_ignored_msrs=0 is not a\n"
+			     "a supported configuration.  Lying to the guest about the existence of MSRs\n"
+			     "may cause the guest operating system to hang or produce errors.  If a guest\n"
+			     "does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n");
+	}
+
 	return 0;
 
 out_uninit_mmu:
@@ -13997,6 +14004,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_rmp_fault);
 
 static int __init kvm_x86_init(void)
 {
+	kvm_init_xstate_sizes();
+
 	kvm_mmu_x86_module_init();
 	mitigate_smt_rsb &= boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible();
 	return 0;
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 4357ec2a0bfc..89ecd57c9d42 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -44,9 +44,8 @@
   .pushsection runtime_ptr_USER_PTR_MAX,"a"
 	.long 1b - 8 - .
   .popsection
-	cmp %rax, %rdx
-	sbb %rdx, %rdx
-	or %rdx, %rax
+	cmp %rdx, %rax
+	cmova %rdx, %rax
 .else
 	cmp $TASK_SIZE_MAX-\size+1, %eax
 	jae .Lbad_get_user
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 437e96fb4977..5ab7bd2f1983 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -174,7 +174,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
 		if (result)
 			return result;
 
-		set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag));
+		set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW));
 	}
 
 	return 0;
@@ -218,14 +218,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 		if (result)
 			return result;
 		if (pgtable_l5_enabled()) {
-			set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
+			set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag | _PAGE_NOPTISHADOW));
 		} else {
 			/*
 			 * With p4d folded, pgd is equal to p4d.
 			 * The pgd entry has to point to the pud page table in this case.
 			 */
 			pud_t *pud = pud_offset(p4d, 0);
-			set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag));
+			set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW));
 		}
 	}
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index c6d29f283001..62aa4d66a032 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1080,7 +1080,8 @@ struct execmem_info __init *execmem_arch_setup(void)
 
 	start = MODULES_VADDR + offset;
 
-	if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX)) {
+	if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) &&
+	    cpu_feature_enabled(X86_FEATURE_PSE)) {
 		pgprot = PAGE_KERNEL_ROX;
 		flags = EXECMEM_KASAN_SHADOW | EXECMEM_ROX_CACHE;
 	} else {
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 069e421c2247..95bc50a8541c 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -354,7 +354,7 @@ bool cpu_cache_has_invalidate_memregion(void)
 {
 	return !cpu_feature_enabled(X86_FEATURE_HYPERVISOR);
 }
-EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, DEVMEM);
+EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, "DEVMEM");
 
 int cpu_cache_invalidate_memregion(int res_desc)
 {
@@ -363,7 +363,7 @@ int cpu_cache_invalidate_memregion(int res_desc)
 	wbinvd_on_all_cpus();
 	return 0;
 }
-EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, DEVMEM);
+EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, "DEVMEM");
 #endif
 
 static void __cpa_flush_all(void *arg)
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 851ec8f1363a..5f0d579932c6 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -132,7 +132,7 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
 	 * Top-level entries added to init_mm's usermode pgd after boot
 	 * will not be automatically propagated to other mms.
 	 */
-	if (!pgdp_maps_userspace(pgdp))
+	if (!pgdp_maps_userspace(pgdp) || (pgd.pgd & _PAGE_NOPTISHADOW))
 		return pgd;
 
 	/*
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 84e5adbd0925..43dcd8c7badc 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -2,6 +2,7 @@
 
 #include <linux/console.h>
 #include <linux/cpu.h>
+#include <linux/instrumentation.h>
 #include <linux/kexec.h>
 #include <linux/memblock.h>
 #include <linux/slab.h>
@@ -21,7 +22,8 @@
 
 #include "xen-ops.h"
 
-EXPORT_SYMBOL_GPL(hypercall_page);
+DEFINE_STATIC_CALL(xen_hypercall, xen_hypercall_hvm);
+EXPORT_STATIC_CALL_TRAMP(xen_hypercall);
 
 /*
  * Pointer to the xen_vcpu_info structure or
@@ -68,6 +70,67 @@ EXPORT_SYMBOL(xen_start_flags);
  */
 struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
 
+static __ref void xen_get_vendor(void)
+{
+	init_cpu_devs();
+	cpu_detect(&boot_cpu_data);
+	get_cpu_vendor(&boot_cpu_data);
+}
+
+void xen_hypercall_setfunc(void)
+{
+	if (static_call_query(xen_hypercall) != xen_hypercall_hvm)
+		return;
+
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+	     boot_cpu_data.x86_vendor == X86_VENDOR_HYGON))
+		static_call_update(xen_hypercall, xen_hypercall_amd);
+	else
+		static_call_update(xen_hypercall, xen_hypercall_intel);
+}
+
+/*
+ * Evaluate processor vendor in order to select the correct hypercall
+ * function for HVM/PVH guests.
+ * Might be called very early in boot before vendor has been set by
+ * early_cpu_init().
+ */
+noinstr void *__xen_hypercall_setfunc(void)
+{
+	void (*func)(void);
+
+	/*
+	 * Xen is supported only on CPUs with CPUID, so testing for
+	 * X86_FEATURE_CPUID is a test for early_cpu_init() having been
+	 * run.
+	 *
+	 * Note that __xen_hypercall_setfunc() is noinstr only due to a nasty
+	 * dependency chain: it is being called via the xen_hypercall static
+	 * call when running as a PVH or HVM guest. Hypercalls need to be
+	 * noinstr due to PV guests using hypercalls in noinstr code. So we
+	 * can safely tag the function body as "instrumentation ok", since
+	 * the PV guest requirement is not of interest here (xen_get_vendor()
+	 * calls noinstr functions, and static_call_update_early() might do
+	 * so, too).
+	 */
+	instrumentation_begin();
+
+	if (!boot_cpu_has(X86_FEATURE_CPUID))
+		xen_get_vendor();
+
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+	     boot_cpu_data.x86_vendor == X86_VENDOR_HYGON))
+		func = xen_hypercall_amd;
+	else
+		func = xen_hypercall_intel;
+
+	static_call_update_early(xen_hypercall, func);
+
+	instrumentation_end();
+
+	return func;
+}
+
 static int xen_cpu_up_online(unsigned int cpu)
 {
 	xen_init_lock_cpu(cpu);
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 24d2957a4726..fe57ff85d004 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -106,15 +106,8 @@ static void __init init_hvm_pv_info(void)
 	/* PVH set up hypercall page in xen_prepare_pvh(). */
 	if (xen_pvh_domain())
 		pv_info.name = "Xen PVH";
-	else {
-		u64 pfn;
-		uint32_t msr;
-
+	else
 		pv_info.name = "Xen HVM";
-		msr = cpuid_ebx(base + 2);
-		pfn = __pa(hypercall_page);
-		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
-	}
 
 	xen_setup_features();
 
@@ -300,6 +293,10 @@ static uint32_t __init xen_platform_hvm(void)
 	if (xen_pv_domain())
 		return 0;
 
+	/* Set correct hypercall function. */
+	if (xen_domain)
+		xen_hypercall_setfunc();
+
 	if (xen_pvh_domain() && nopv) {
 		/* Guest booting via the Xen-PVH boot entry goes here */
 		pr_info("\"nopv\" parameter is ignored in PVH guest\n");
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index d6818c6cafda..a8eb7e0c473c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1341,6 +1341,9 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
 
 	xen_domain_type = XEN_PV_DOMAIN;
 	xen_start_flags = xen_start_info->flags;
+	/* Interrupts are guaranteed to be off initially. */
+	early_boot_irqs_disabled = true;
+	static_call_update_early(xen_hypercall, xen_hypercall_pv);
 
 	xen_setup_features();
 
@@ -1431,7 +1434,6 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
 	WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
 
 	local_irq_disable();
-	early_boot_irqs_disabled = true;
 
 	xen_raw_console_write("mapping kernel into physical memory\n");
 	xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index bf68c329fc01..0e3d930bcb89 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -129,17 +129,10 @@ static void __init pvh_arch_setup(void)
 
 void __init xen_pvh_init(struct boot_params *boot_params)
 {
-	u32 msr;
-	u64 pfn;
-
 	xen_pvh = 1;
 	xen_domain_type = XEN_HVM_DOMAIN;
 	xen_start_flags = pvh_start_info.flags;
 
-	msr = cpuid_ebx(xen_cpuid_base() + 2);
-	pfn = __pa(hypercall_page);
-	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
-
 	x86_init.oem.arch_setup = pvh_arch_setup;
 	x86_init.oem.banner = xen_banner;
 
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 83189cf5cdce..b518f36d1ca2 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -20,10 +20,33 @@
 
 #include <linux/init.h>
 #include <linux/linkage.h>
+#include <linux/objtool.h>
 #include <../entry/calling.h>
 
 .pushsection .noinstr.text, "ax"
 /*
+ * PV hypercall interface to the hypervisor.
+ *
+ * Called via inline asm(), so better preserve %rcx and %r11.
+ *
+ * Input:
+ *	%eax: hypercall number
+ *	%rdi, %rsi, %rdx, %r10, %r8: args 1..5 for the hypercall
+ * Output: %rax
+ */
+SYM_FUNC_START(xen_hypercall_pv)
+	ANNOTATE_NOENDBR
+	push %rcx
+	push %r11
+	UNWIND_HINT_SAVE
+	syscall
+	UNWIND_HINT_RESTORE
+	pop %r11
+	pop %rcx
+	RET
+SYM_FUNC_END(xen_hypercall_pv)
+
+/*
  * Disabling events is simply a matter of making the event mask
  * non-zero.
  */
@@ -176,7 +199,6 @@ SYM_CODE_START(xen_early_idt_handler_array)
 SYM_CODE_END(xen_early_idt_handler_array)
 	__FINIT
 
-hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
 /*
  * Xen64 iret frame:
  *
@@ -186,17 +208,28 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
  *	cs
  *	rip		<-- standard iret frame
  *
- *	flags
+ *	flags		<-- xen_iret must push from here on
  *
- *	rcx		}
- *	r11		}<-- pushed by hypercall page
- * rsp->rax		}
+ *	rcx
+ *	r11
+ * rsp->rax
  */
+.macro xen_hypercall_iret
+	pushq $0	/* Flags */
+	push %rcx
+	push %r11
+	push %rax
+	mov  $__HYPERVISOR_iret, %eax
+	syscall		/* Do the IRET. */
+#ifdef CONFIG_MITIGATION_SLS
+	int3
+#endif
+.endm
+
 SYM_CODE_START(xen_iret)
 	UNWIND_HINT_UNDEFINED
 	ANNOTATE_NOENDBR
-	pushq $0
-	jmp hypercall_iret
+	xen_hypercall_iret
 SYM_CODE_END(xen_iret)
 
 /*
@@ -301,8 +334,7 @@ SYM_CODE_START(xen_entry_SYSENTER_compat)
 	ENDBR
 	lea 16(%rsp), %rsp	/* strip %rcx, %r11 */
 	mov $-ENOSYS, %rax
-	pushq $0
-	jmp hypercall_iret
+	xen_hypercall_iret
 SYM_CODE_END(xen_entry_SYSENTER_compat)
 SYM_CODE_END(xen_entry_SYSCALL_compat)
 
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 7f6c69dbb816..9252652afe59 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -6,9 +6,11 @@
 
 #include <linux/elfnote.h>
 #include <linux/init.h>
+#include <linux/instrumentation.h>
 
 #include <asm/boot.h>
 #include <asm/asm.h>
+#include <asm/frame.h>
 #include <asm/msr.h>
 #include <asm/page_types.h>
 #include <asm/percpu.h>
@@ -20,28 +22,6 @@
 #include <xen/interface/xen-mca.h>
 #include <asm/xen/interface.h>
 
-.pushsection .noinstr.text, "ax"
-	.balign PAGE_SIZE
-SYM_CODE_START(hypercall_page)
-	.rept (PAGE_SIZE / 32)
-		UNWIND_HINT_FUNC
-		ANNOTATE_NOENDBR
-		ANNOTATE_UNRET_SAFE
-		ret
-		/*
-		 * Xen will write the hypercall page, and sort out ENDBR.
-		 */
-		.skip 31, 0xcc
-	.endr
-
-#define HYPERCALL(n) \
-	.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
-	.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
-#include <asm/xen-hypercalls.h>
-#undef HYPERCALL
-SYM_CODE_END(hypercall_page)
-.popsection
-
 #ifdef CONFIG_XEN_PV
 	__INIT
 SYM_CODE_START(startup_xen)
@@ -87,6 +67,87 @@ SYM_CODE_END(xen_cpu_bringup_again)
 #endif
 #endif
 
+	.pushsection .noinstr.text, "ax"
+/*
+ * Xen hypercall interface to the hypervisor.
+ *
+ * Input:
+ *     %eax: hypercall number
+ *   32-bit:
+ *     %ebx, %ecx, %edx, %esi, %edi: args 1..5 for the hypercall
+ *   64-bit:
+ *     %rdi, %rsi, %rdx, %r10, %r8: args 1..5 for the hypercall
+ * Output: %[er]ax
+ */
+SYM_FUNC_START(xen_hypercall_hvm)
+	ENDBR
+	FRAME_BEGIN
+	/* Save all relevant registers (caller save and arguments). */
+#ifdef CONFIG_X86_32
+	push %eax
+	push %ebx
+	push %ecx
+	push %edx
+	push %esi
+	push %edi
+#else
+	push %rax
+	push %rcx
+	push %rdx
+	push %rdi
+	push %rsi
+	push %r11
+	push %r10
+	push %r9
+	push %r8
+#ifdef CONFIG_FRAME_POINTER
+	pushq $0	/* Dummy push for stack alignment. */
+#endif
+#endif
+	/* Set the vendor specific function. */
+	call __xen_hypercall_setfunc
+	/* Set ZF = 1 if AMD, Restore saved registers. */
+#ifdef CONFIG_X86_32
+	lea xen_hypercall_amd, %ebx
+	cmp %eax, %ebx
+	pop %edi
+	pop %esi
+	pop %edx
+	pop %ecx
+	pop %ebx
+	pop %eax
+#else
+	lea xen_hypercall_amd(%rip), %rbx
+	cmp %rax, %rbx
+#ifdef CONFIG_FRAME_POINTER
+	pop %rax	/* Dummy pop. */
+#endif
+	pop %r8
+	pop %r9
+	pop %r10
+	pop %r11
+	pop %rsi
+	pop %rdi
+	pop %rdx
+	pop %rcx
+	pop %rax
+#endif
+	/* Use correct hypercall function. */
+	jz xen_hypercall_amd
+	jmp xen_hypercall_intel
+SYM_FUNC_END(xen_hypercall_hvm)
+
+SYM_FUNC_START(xen_hypercall_amd)
+	vmmcall
+	RET
+SYM_FUNC_END(xen_hypercall_amd)
+
+SYM_FUNC_START(xen_hypercall_intel)
+	vmcall
+	RET
+SYM_FUNC_END(xen_hypercall_intel)
+	.popsection
+
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz "2.6")
 	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz "xen-3.0")
@@ -116,8 +177,6 @@ SYM_CODE_END(xen_cpu_bringup_again)
 #else
 # define FEATURES_DOM0 0
 #endif
-	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .globl xen_elfnote_hypercall_page;
-		xen_elfnote_hypercall_page: _ASM_PTR xen_elfnote_hypercall_page_value - .)
 	ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
 		.long FEATURES_PV | FEATURES_PVH | FEATURES_DOM0)
 	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index e1b782e823e6..63c13a2ccf55 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -326,4 +326,13 @@ static inline void xen_smp_intr_free_pv(unsigned int cpu) {}
 static inline void xen_smp_count_cpus(void) { }
 #endif /* CONFIG_SMP */
 
+#ifdef CONFIG_XEN_PV
+void xen_hypercall_pv(void);
+#endif
+void xen_hypercall_hvm(void);
+void xen_hypercall_amd(void);
+void xen_hypercall_intel(void);
+void xen_hypercall_setfunc(void);
+void *__xen_hypercall_setfunc(void);
+
 #endif /* XEN_OPS_H */