From 0b4edf111870b83ea77b1d7e16b8ceac29f9f388 Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Wed, 13 May 2020 02:08:04 +0530 Subject: ARM: dts: Move am33xx and am43xx mmc nodes to sdhci-omap driver Move mmc nodes to be compatible with the sdhci-omap driver. The following modifications are required for omap_hsmmc specific properties: ti,non-removable: convert to the generic mmc non-removable ti,needs-special-reset: co-opted into the sdhci-omap driver ti,dual-volt: removed. Legacy property not used in am335x or am43xx ti,needs-special-hs-handling: removed. Legacy property not used in am335x or am43xx Also since the sdhci-omap driver does not support runtime PM, explicitly disable the mmc3 instance in the dtsi. Signed-off-by: Faiz Abbas Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-baltos.dtsi | 2 +- arch/arm/boot/dts/am335x-boneblack-common.dtsi | 1 + arch/arm/boot/dts/am335x-boneblack-wireless.dts | 1 - arch/arm/boot/dts/am335x-boneblue.dts | 1 - arch/arm/boot/dts/am335x-bonegreen-wireless.dts | 1 - arch/arm/boot/dts/am335x-evm.dts | 3 +-- arch/arm/boot/dts/am335x-evmsk.dts | 2 +- arch/arm/boot/dts/am335x-lxm.dts | 2 +- arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi | 2 +- arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts | 2 +- arch/arm/boot/dts/am335x-pepper.dts | 4 ++-- arch/arm/boot/dts/am335x-phycore-som.dtsi | 2 +- arch/arm/boot/dts/am33xx-l4.dtsi | 6 ++---- arch/arm/boot/dts/am33xx.dtsi | 3 ++- arch/arm/boot/dts/am4372.dtsi | 3 ++- arch/arm/boot/dts/am437x-cm-t43.dts | 2 +- arch/arm/boot/dts/am437x-gp-evm.dts | 4 ++-- arch/arm/boot/dts/am437x-l4.dtsi | 5 ++--- arch/arm/boot/dts/am437x-sk-evm.dts | 2 +- 19 files changed, 22 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am335x-baltos.dtsi b/arch/arm/boot/dts/am335x-baltos.dtsi index 05e7b5d4a95b..04f0b1227efe 100644 --- a/arch/arm/boot/dts/am335x-baltos.dtsi +++ b/arch/arm/boot/dts/am335x-baltos.dtsi @@ -369,7 +369,7 @@ &mmc2 { status = "okay"; vmmc-supply = <&wl12xx_vmmc>; - ti,non-removable; + non-removable; bus-width = <4>; cap-power-off-card; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/am335x-boneblack-common.dtsi b/arch/arm/boot/dts/am335x-boneblack-common.dtsi index 91f93bc89716..dd932220a8bf 100644 --- a/arch/arm/boot/dts/am335x-boneblack-common.dtsi +++ b/arch/arm/boot/dts/am335x-boneblack-common.dtsi @@ -22,6 +22,7 @@ pinctrl-0 = <&emmc_pins>; bus-width = <8>; status = "okay"; + non-removable; }; &am33xx_pinmux { diff --git a/arch/arm/boot/dts/am335x-boneblack-wireless.dts b/arch/arm/boot/dts/am335x-boneblack-wireless.dts index 3124d94c0b3c..e07dd7979586 100644 --- a/arch/arm/boot/dts/am335x-boneblack-wireless.dts +++ b/arch/arm/boot/dts/am335x-boneblack-wireless.dts @@ -75,7 +75,6 @@ bus-width = <4>; non-removable; cap-power-off-card; - ti,needs-special-hs-handling; keep-power-in-suspend; pinctrl-names = "default"; pinctrl-0 = <&mmc3_pins &wl18xx_pins>; diff --git a/arch/arm/boot/dts/am335x-boneblue.dts b/arch/arm/boot/dts/am335x-boneblue.dts index 5811fb8d4fdf..83f9452c9cd3 100644 --- a/arch/arm/boot/dts/am335x-boneblue.dts +++ b/arch/arm/boot/dts/am335x-boneblue.dts @@ -367,7 +367,6 @@ bus-width = <4>; non-removable; cap-power-off-card; - ti,needs-special-hs-handling; keep-power-in-suspend; pinctrl-names = "default"; pinctrl-0 = <&mmc3_pins &wl18xx_pins>; diff --git a/arch/arm/boot/dts/am335x-bonegreen-wireless.dts b/arch/arm/boot/dts/am335x-bonegreen-wireless.dts index 4092cd193b8a..609c8db687ec 100644 --- a/arch/arm/boot/dts/am335x-bonegreen-wireless.dts +++ b/arch/arm/boot/dts/am335x-bonegreen-wireless.dts @@ -75,7 +75,6 @@ bus-width = <4>; non-removable; cap-power-off-card; - ti,needs-special-hs-handling; keep-power-in-suspend; pinctrl-names = "default"; pinctrl-0 = <&mmc3_pins &wl18xx_pins>; diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts index 68252dab32c3..a4fc6b168a85 100644 --- a/arch/arm/boot/dts/am335x-evm.dts +++ b/arch/arm/boot/dts/am335x-evm.dts @@ -743,8 +743,7 @@ bus-width = <4>; pinctrl-names = "default"; pinctrl-0 = <&mmc3_pins &wlan_pins>; - ti,non-removable; - ti,needs-special-hs-handling; + non-removable; cap-power-off-card; keep-power-in-suspend; diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index 32f515a295ee..78b6e1f594c9 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -655,7 +655,7 @@ &mmc2 { status = "okay"; vmmc-supply = <&wl12xx_vmmc>; - ti,non-removable; + non-removable; bus-width = <4>; cap-power-off-card; keep-power-in-suspend; diff --git a/arch/arm/boot/dts/am335x-lxm.dts b/arch/arm/boot/dts/am335x-lxm.dts index fef582852820..dbedf729205c 100644 --- a/arch/arm/boot/dts/am335x-lxm.dts +++ b/arch/arm/boot/dts/am335x-lxm.dts @@ -339,7 +339,7 @@ pinctrl-0 = <&emmc_pins>; vmmc-supply = <&vmmcsd_fixed>; bus-width = <8>; - ti,non-removable; + non-removable; status = "okay"; }; diff --git a/arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi b/arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi index 6495a125c01f..4e90f9c23d2e 100644 --- a/arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi +++ b/arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi @@ -159,7 +159,7 @@ vmmc-supply = <&vmmcsd_fixed>; bus-width = <8>; pinctrl-0 = <&mmc1_pins_default>; - ti,non-removable; + non-removable; status = "okay"; }; diff --git a/arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts b/arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts index 244df9c5a537..f03e72cada41 100644 --- a/arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts +++ b/arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts @@ -451,7 +451,7 @@ vmmc-supply = <&vmmcsd_fixed>; bus-width = <8>; pinctrl-0 = <&mmc2_pins_default>; - ti,non-removable; + non-removable; status = "okay"; }; diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts index 6d7608d9377b..f9a027b47962 100644 --- a/arch/arm/boot/dts/am335x-pepper.dts +++ b/arch/arm/boot/dts/am335x-pepper.dts @@ -341,7 +341,7 @@ pinctrl-0 = <&emmc_pins>; vmmc-supply = <&ldo3_reg>; bus-width = <8>; - ti,non-removable; + non-removable; }; &mmc3 { @@ -351,7 +351,7 @@ pinctrl-0 = <&wireless_pins>; vmmmc-supply = <&v3v3c_reg>; bus-width = <4>; - ti,non-removable; + non-removable; dmas = <&edma_xbar 12 0 1 &edma_xbar 13 0 2>; dma-names = "tx", "rx"; diff --git a/arch/arm/boot/dts/am335x-phycore-som.dtsi b/arch/arm/boot/dts/am335x-phycore-som.dtsi index 3d0672b53d77..7e46b4c02709 100644 --- a/arch/arm/boot/dts/am335x-phycore-som.dtsi +++ b/arch/arm/boot/dts/am335x-phycore-som.dtsi @@ -69,7 +69,7 @@ pinctrl-0 = <&emmc_pins>; vmmc-supply = <&vmmc_reg>; bus-width = <8>; - ti,non-removable; + non-removable; status = "disabled"; }; diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi index 5ed7f3c58c0f..573ff076178b 100644 --- a/arch/arm/boot/dts/am33xx-l4.dtsi +++ b/arch/arm/boot/dts/am33xx-l4.dtsi @@ -1337,10 +1337,8 @@ ranges = <0x0 0x60000 0x1000>; mmc1: mmc@0 { - compatible = "ti,omap4-hsmmc"; - ti,dual-volt; + compatible = "ti,am335-sdhci"; ti,needs-special-reset; - ti,needs-special-hs-handling; dmas = <&edma_xbar 24 0 0 &edma_xbar 25 0 0>; dma-names = "tx", "rx"; @@ -1818,7 +1816,7 @@ ranges = <0x0 0xd8000 0x1000>; mmc2: mmc@0 { - compatible = "ti,omap4-hsmmc"; + compatible = "ti,am335-sdhci"; ti,needs-special-reset; dmas = <&edma 2 0 &edma 3 0>; diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index a35f5052d76f..3b9d4d2d35bf 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -322,10 +322,11 @@ ranges = <0x0 0x47810000 0x1000>; mmc3: mmc@0 { - compatible = "ti,omap4-hsmmc"; + compatible = "ti,am335-sdhci"; ti,needs-special-reset; interrupts = <29>; reg = <0x0 0x1000>; + status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index dba87bfaf33e..092b3d4404f4 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -316,10 +316,11 @@ ranges = <0x0 0x47810000 0x1000>; mmc3: mmc@0 { - compatible = "ti,omap4-hsmmc"; + compatible = "ti,am437-sdhci"; ti,needs-special-reset; interrupts = ; reg = <0x0 0x1000>; + status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/am437x-cm-t43.dts b/arch/arm/boot/dts/am437x-cm-t43.dts index 063113a5da2d..a6b4fca8626a 100644 --- a/arch/arm/boot/dts/am437x-cm-t43.dts +++ b/arch/arm/boot/dts/am437x-cm-t43.dts @@ -291,7 +291,7 @@ pinctrl-0 = <&emmc_pins>; vmmc-supply = <&vmmc_3v3>; bus-width = <8>; - ti,non-removable; + non-removable; }; &spi0 { diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts index 811c8cae315b..cadf47ee337f 100644 --- a/arch/arm/boot/dts/am437x-gp-evm.dts +++ b/arch/arm/boot/dts/am437x-gp-evm.dts @@ -869,7 +869,7 @@ pinctrl-names = "default", "sleep"; pinctrl-0 = <&emmc_pins_default>; pinctrl-1 = <&emmc_pins_sleep>; - ti,non-removable; + non-removable; }; &mmc3 { @@ -886,7 +886,7 @@ pinctrl-1 = <&mmc3_pins_sleep>; cap-power-off-card; keep-power-in-suspend; - ti,non-removable; + non-removable; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index 49c6a872052e..f4eb36d8b660 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -1086,9 +1086,8 @@ ranges = <0x0 0x60000 0x1000>; mmc1: mmc@0 { - compatible = "ti,omap4-hsmmc"; + compatible = "ti,am437-sdhci"; reg = <0x0 0x1000>; - ti,dual-volt; ti,needs-special-reset; dmas = <&edma 24 0>, <&edma 25 0>; @@ -1601,7 +1600,7 @@ ranges = <0x0 0xd8000 0x1000>; mmc2: mmc@0 { - compatible = "ti,omap4-hsmmc"; + compatible = "ti,am437-sdhci"; reg = <0x0 0x1000>; ti,needs-special-reset; dmas = <&edma 2 0>, diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index 25222497f828..2416597a4f5c 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -719,7 +719,7 @@ pinctrl-1 = <&mmc3_pins_sleep>; cap-power-off-card; keep-power-in-suspend; - ti,non-removable; + non-removable; #address-cells = <1>; #size-cells = <0>; -- cgit v1.2.3 From 16accae3d97f97d7f61c4ee5d0002bccdef59088 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 28 May 2020 13:16:14 -0700 Subject: perf/x86/rapl: Fix RAPL config variable bug This patch fixes a bug introduced by: fd3ae1e1587d6 ("perf/x86/rapl: Move RAPL support to common x86 code") The Kconfig variable name was wrong. It was missing the CONFIG_ prefix. Signed-off-by: Stephane Eranian Signed-off-by: Ingo Molnar Tested-by: Kim Phillips Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200528201614.250182-1-eranian@google.com --- arch/x86/events/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index 12c42eba77ec..9933c0e8e97a 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += core.o probe.o -obj-$(PERF_EVENTS_INTEL_RAPL) += rapl.o +obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o obj-y += amd/ obj-$(CONFIG_X86_LOCAL_APIC) += msr.o obj-$(CONFIG_CPU_SUP_INTEL) += intel/ -- cgit v1.2.3 From 0df12a01f4857495816b05f048c4c31439446e35 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 1 Jun 2020 17:18:56 -0700 Subject: ARM: dts: omap4-droid4: Fix spi configuration and increase rate We can currently sometimes get "RXS timed out" errors and "EOT timed out" errors with spi transfers. These errors can be made easy to reproduce by reading the cpcap iio values in a loop while keeping the CPUs busy by also reading /dev/urandom. The "RXS timed out" errors we can fix by adding spi-cpol and spi-cpha in addition to the spi-cs-high property we already have. The "EOT timed out" errors we can fix by increasing the spi clock rate to 9.6 MHz. Looks similar MC13783 PMIC says it works at spi clock rates up to 20 MHz, so let's assume we can pick any rate up to 20 MHz also for cpcap. Cc: maemo-leste@lists.dyne.org Cc: Merlijn Wajer Cc: Pavel Machek Cc: Sebastian Reichel Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi index e39eee628afd..08a7d3ce383f 100644 --- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi +++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi @@ -13,8 +13,10 @@ #interrupt-cells = <2>; #address-cells = <1>; #size-cells = <0>; - spi-max-frequency = <3000000>; + spi-max-frequency = <9600000>; spi-cs-high; + spi-cpol; + spi-cpha; cpcap_adc: adc { compatible = "motorola,mapphone-cpcap-adc"; -- cgit v1.2.3 From 77cad9dbc957f23a73169e8a8971186744296614 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 27 May 2020 16:32:06 -0700 Subject: ARM: OMAP2+: Fix legacy mode dss_reset We must check for "dss_core" instead of "dss" to avoid also matching also "dss_dispc". This only matters for the mixed case of data configured in device tree but with legacy booting ti,hwmods property still enabled. Fixes: 8b30919a4e3c ("ARM: OMAP2+: Handle reset quirks for dynamically allocated modules") Cc: Laurent Pinchart Cc: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 82706af307de..c630457bb228 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -3489,7 +3489,7 @@ static const struct omap_hwmod_reset dra7_reset_quirks[] = { }; static const struct omap_hwmod_reset omap_reset_quirks[] = { - { .match = "dss", .len = 3, .reset = omap_dss_reset, }, + { .match = "dss_core", .len = 8, .reset = omap_dss_reset, }, { .match = "hdq1w", .len = 5, .reset = omap_hdq1w_reset, }, { .match = "i2c", .len = 3, .reset = omap_i2c_reset, }, { .match = "wd_timer", .len = 8, .reset = omap2_wd_timer_reset, }, -- cgit v1.2.3 From 38ac46002d1df5707566a73486452851341028d2 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 3 Jun 2020 17:22:37 +0100 Subject: arm: dts: vexpress: Move mcc node back into motherboard node Commit d9258898ad49 ("arm64: dts: arm: vexpress: Move fixed devices out of bus node") moved the "mcc" DT node into the root node, because it does not have any children using "reg" properties, so does violate some dtc checks about "simple-bus" nodes. However this broke the vexpress config-bus code, which walks up the device tree to find the first node with an "arm,vexpress,site" property. This gave the wrong result (matching the root node instead of the motherboard node), so broke the clocks and some other devices for VExpress boards. Move the whole node back into its original position. This re-introduces the dtc warning, but is conceptually the right thing to do. The dtc warning seems to be overzealous here, there are discussions on fixing or relaxing this check instead. Link: https://lore.kernel.org/r/20200603162237.16319-1-andre.przywara@arm.com Fixes: d9258898ad49 ("arm64: dts: vexpress: Move fixed devices out of bus node") Reported-and-tested-by: Guenter Roeck Signed-off-by: Andre Przywara Signed-off-by: Sudeep Holla --- arch/arm/boot/dts/vexpress-v2m-rs1.dtsi | 146 ++++++++++++++++---------------- 1 file changed, 73 insertions(+), 73 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi index e6308fb76183..a88ee5294d35 100644 --- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi @@ -100,79 +100,6 @@ }; }; - mcc { - compatible = "arm,vexpress,config-bus"; - arm,vexpress,config-bridge = <&v2m_sysreg>; - - oscclk0 { - /* MCC static memory clock */ - compatible = "arm,vexpress-osc"; - arm,vexpress-sysreg,func = <1 0>; - freq-range = <25000000 60000000>; - #clock-cells = <0>; - clock-output-names = "v2m:oscclk0"; - }; - - v2m_oscclk1: oscclk1 { - /* CLCD clock */ - compatible = "arm,vexpress-osc"; - arm,vexpress-sysreg,func = <1 1>; - freq-range = <23750000 65000000>; - #clock-cells = <0>; - clock-output-names = "v2m:oscclk1"; - }; - - v2m_oscclk2: oscclk2 { - /* IO FPGA peripheral clock */ - compatible = "arm,vexpress-osc"; - arm,vexpress-sysreg,func = <1 2>; - freq-range = <24000000 24000000>; - #clock-cells = <0>; - clock-output-names = "v2m:oscclk2"; - }; - - volt-vio { - /* Logic level voltage */ - compatible = "arm,vexpress-volt"; - arm,vexpress-sysreg,func = <2 0>; - regulator-name = "VIO"; - regulator-always-on; - label = "VIO"; - }; - - temp-mcc { - /* MCC internal operating temperature */ - compatible = "arm,vexpress-temp"; - arm,vexpress-sysreg,func = <4 0>; - label = "MCC"; - }; - - reset { - compatible = "arm,vexpress-reset"; - arm,vexpress-sysreg,func = <5 0>; - }; - - muxfpga { - compatible = "arm,vexpress-muxfpga"; - arm,vexpress-sysreg,func = <7 0>; - }; - - shutdown { - compatible = "arm,vexpress-shutdown"; - arm,vexpress-sysreg,func = <8 0>; - }; - - reboot { - compatible = "arm,vexpress-reboot"; - arm,vexpress-sysreg,func = <9 0>; - }; - - dvimode { - compatible = "arm,vexpress-dvimode"; - arm,vexpress-sysreg,func = <11 0>; - }; - }; - bus@8000000 { motherboard-bus { model = "V2M-P1"; @@ -435,6 +362,79 @@ }; }; }; + + mcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + oscclk0 { + /* MCC static memory clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <25000000 60000000>; + #clock-cells = <0>; + clock-output-names = "v2m:oscclk0"; + }; + + v2m_oscclk1: oscclk1 { + /* CLCD clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <23750000 65000000>; + #clock-cells = <0>; + clock-output-names = "v2m:oscclk1"; + }; + + v2m_oscclk2: oscclk2 { + /* IO FPGA peripheral clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <24000000 24000000>; + #clock-cells = <0>; + clock-output-names = "v2m:oscclk2"; + }; + + volt-vio { + /* Logic level voltage */ + compatible = "arm,vexpress-volt"; + arm,vexpress-sysreg,func = <2 0>; + regulator-name = "VIO"; + regulator-always-on; + label = "VIO"; + }; + + temp-mcc { + /* MCC internal operating temperature */ + compatible = "arm,vexpress-temp"; + arm,vexpress-sysreg,func = <4 0>; + label = "MCC"; + }; + + reset { + compatible = "arm,vexpress-reset"; + arm,vexpress-sysreg,func = <5 0>; + }; + + muxfpga { + compatible = "arm,vexpress-muxfpga"; + arm,vexpress-sysreg,func = <7 0>; + }; + + shutdown { + compatible = "arm,vexpress-shutdown"; + arm,vexpress-sysreg,func = <8 0>; + }; + + reboot { + compatible = "arm,vexpress-reboot"; + arm,vexpress-sysreg,func = <9 0>; + }; + + dvimode { + compatible = "arm,vexpress-dvimode"; + arm,vexpress-sysreg,func = <11 0>; + }; + }; }; }; }; -- cgit v1.2.3 From 3f311e8993ed18fb7325373ec0f82a7f8e8be82e Mon Sep 17 00:00:00 2001 From: Oskar Holmlund Date: Fri, 5 Jun 2020 19:49:23 +0200 Subject: ARM: dts: Fix am33xx.dtsi USB ranges length AM335x TRM: Table 2-1 defines USBSS - USB Queue Manager in memory region 0x4740 0000 to 0x4740 7FFF. Looks like the older TRM revisions list the range from 0x5000 to 0x8000 as reserved. Fixes: 0782e8572ce4 ("ARM: dts: Probe am335x musb with ti-sysc") Signed-off-by: Oskar Holmlund [tony@atomide.com: updated comments] Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am33xx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index a35f5052d76f..be76ded7e4c0 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -347,7 +347,7 @@ clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; - ranges = <0x0 0x47400000 0x5000>; + ranges = <0x0 0x47400000 0x8000>; usb0_phy: usb-phy@1300 { compatible = "ti,am335x-usb-phy"; -- cgit v1.2.3 From 9f872f924545324a06fa216ad38132804c20f2db Mon Sep 17 00:00:00 2001 From: Oskar Holmlund Date: Fri, 5 Jun 2020 19:51:09 +0200 Subject: ARM: dts: Fix am33xx.dtsi ti,sysc-mask wrong softreset flag AM335x TRM: Figure 16-23 define sysconfig register and soft_reset are in first position corresponding to SYSC_OMAP4_SOFTRESET defined in ti-sysc.h. Fixes: 0782e8572ce4 ("ARM: dts: Probe am335x musb with ti-sysc") Signed-off-by: Oskar Holmlund Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am33xx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index be76ded7e4c0..ed6634d34c3c 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -335,7 +335,7 @@ <0x47400010 0x4>; reg-names = "rev", "sysc"; ti,sysc-mask = <(SYSC_OMAP4_FREEEMU | - SYSC_OMAP2_SOFTRESET)>; + SYSC_OMAP4_SOFTRESET)>; ti,sysc-midle = , , ; -- cgit v1.2.3 From ebf89ed78b6ad6bc53f49a9dc0754fa97bb11e4a Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 5 Jun 2020 17:13:46 -0500 Subject: ARM: dts: dra7: Fix timer nodes properly for timer_sys_ck clocks The commit 5390130f3b28 ("ARM: dts: dra7: add timer_sys_ck entries for IPU/DSP timers") was added to allow the OMAP clocksource timer driver to use the clock aliases when reconfiguring the parent clock source for the timer functional clocks after the timer_sys_ck clock aliases got cleaned up in commit a8202cd5174d ("clk: ti: dra7: drop unnecessary clock aliases"). The above patch however has missed adding the entries for couple of timers (14, 15 and 16), and also added erroneously in the parent ti-sysc nodes for couple of clocks (timers 4, 5 and 6). Fix these properly, so that any of these timers can be used with OMAP remoteproc IPU and DSP devices. The always-on timers 1 and 12 are not expected to use this clock source, so they are not modified. Fixes: 5390130f3b28 ("ARM: dts: dra7: add timer_sys_ck entries for IPU/DSP timers") Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7-l4.dtsi | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 1abd455cf15a..e059054d9110 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -1210,9 +1210,8 @@ , ; /* Domains (P, C): l4per_pwrdm, l4per_clkdm */ - clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 0>, - <&timer_sys_clk_div>; - clock-names = "fck", "timer_sys_ck"; + clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 0>; + clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x36000 0x1000>; @@ -3355,8 +3354,8 @@ , ; /* Domains (P, C): ipu_pwrdm, ipu_clkdm */ - clocks = <&ipu_clkctrl DRA7_IPU_TIMER5_CLKCTRL 0>, <&timer_sys_clk_div>; - clock-names = "fck", "timer_sys_ck"; + clocks = <&ipu_clkctrl DRA7_IPU_TIMER5_CLKCTRL 0>; + clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x20000 0x1000>; @@ -3364,8 +3363,8 @@ timer5: timer@0 { compatible = "ti,omap5430-timer"; reg = <0x0 0x80>; - clocks = <&ipu_clkctrl DRA7_IPU_TIMER5_CLKCTRL 24>; - clock-names = "fck"; + clocks = <&ipu_clkctrl DRA7_IPU_TIMER5_CLKCTRL 24>, <&timer_sys_clk_div>; + clock-names = "fck", "timer_sys_ck"; interrupts = ; }; }; @@ -3382,9 +3381,8 @@ , ; /* Domains (P, C): ipu_pwrdm, ipu_clkdm */ - clocks = <&ipu_clkctrl DRA7_IPU_TIMER6_CLKCTRL 0>, - <&timer_sys_clk_div>; - clock-names = "fck", "timer_sys_ck"; + clocks = <&ipu_clkctrl DRA7_IPU_TIMER6_CLKCTRL 0>; + clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x22000 0x1000>; @@ -3392,8 +3390,8 @@ timer6: timer@0 { compatible = "ti,omap5430-timer"; reg = <0x0 0x80>; - clocks = <&ipu_clkctrl DRA7_IPU_TIMER6_CLKCTRL 24>; - clock-names = "fck"; + clocks = <&ipu_clkctrl DRA7_IPU_TIMER6_CLKCTRL 24>, <&timer_sys_clk_div>; + clock-names = "fck", "timer_sys_ck"; interrupts = ; }; }; @@ -3501,8 +3499,8 @@ timer14: timer@0 { compatible = "ti,omap5430-timer"; reg = <0x0 0x80>; - clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER14_CLKCTRL 24>; - clock-names = "fck"; + clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER14_CLKCTRL 24>, <&timer_sys_clk_div>; + clock-names = "fck", "timer_sys_ck"; interrupts = ; ti,timer-pwm; }; @@ -3529,8 +3527,8 @@ timer15: timer@0 { compatible = "ti,omap5430-timer"; reg = <0x0 0x80>; - clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>; - clock-names = "fck"; + clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>, <&timer_sys_clk_div>; + clock-names = "fck", "timer_sys_ck"; interrupts = ; ti,timer-pwm; }; @@ -3557,8 +3555,8 @@ timer16: timer@0 { compatible = "ti,omap5430-timer"; reg = <0x0 0x80>; - clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>; - clock-names = "fck"; + clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>, <&timer_sys_clk_div>; + clock-names = "fck", "timer_sys_ck"; interrupts = ; ti,timer-pwm; }; -- cgit v1.2.3 From 8e326a8bdef3fb42b92bf2742e8405d9b9209367 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 5 Jun 2020 17:13:47 -0500 Subject: ARM: dts: dra7-evm-common: Fix duplicate mailbox nodes The mailbox nodes defined in various dts files have been moved to common dra7-ipu-dsp-common.dtsi and dra74-ipu-dsp-common.dtsi files in commit a11a2f73b32d ("ARM: dts: dra7-ipu-dsp-common: Move mailboxes into common files"), but the nodes were erroneously left out in the dra7-evm-common.dtsi file. Fix this by removing these duplicate nodes. Fixes: a11a2f73b32d ("ARM: dts: dra7-ipu-dsp-common: Move mailboxes into common files") Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7-evm-common.dtsi | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/dra7-evm-common.dtsi b/arch/arm/boot/dts/dra7-evm-common.dtsi index 23244b5a9942..488201f0ac95 100644 --- a/arch/arm/boot/dts/dra7-evm-common.dtsi +++ b/arch/arm/boot/dts/dra7-evm-common.dtsi @@ -244,26 +244,6 @@ rx-num-evt = <32>; }; -&mailbox5 { - status = "okay"; - mbox_ipu1_ipc3x: mbox_ipu1_ipc3x { - status = "okay"; - }; - mbox_dsp1_ipc3x: mbox_dsp1_ipc3x { - status = "okay"; - }; -}; - -&mailbox6 { - status = "okay"; - mbox_ipu2_ipc3x: mbox_ipu2_ipc3x { - status = "okay"; - }; - mbox_dsp2_ipc3x: mbox_dsp2_ipc3x { - status = "okay"; - }; -}; - &pcie1_rc { status = "okay"; }; -- cgit v1.2.3 From f9639f9a779ac0381cabfb793915851a89424f9f Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 9 Jun 2020 13:29:58 +0300 Subject: ARM: dts: am437x-sk-evm: remove lcd timings LCD timings now come from panel-simple. Having timings in the DT will cause a WARN. Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am437x-sk-evm.dts | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index 4d5a7ca2e25d..6c83812407bf 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -134,22 +134,6 @@ enable-gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>; - panel-timing { - clock-frequency = <9000000>; - hactive = <480>; - vactive = <272>; - hfront-porch = <2>; - hback-porch = <2>; - hsync-len = <41>; - vfront-porch = <2>; - vback-porch = <2>; - vsync-len = <10>; - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <1>; - }; - port { lcd_in: endpoint { remote-endpoint = <&dpi_out>; -- cgit v1.2.3 From 3991510bf6402b534f158e164569d051bfd81f68 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 9 Jun 2020 13:29:59 +0300 Subject: ARM: dts: am437x-gp-evm: remove lcd timings LCD timings now come from panel-simple. Having timings in the DT will cause a WARN. Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am437x-gp-evm.dts | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts index d692e3b2812a..2ff9485c28a4 100644 --- a/arch/arm/boot/dts/am437x-gp-evm.dts +++ b/arch/arm/boot/dts/am437x-gp-evm.dts @@ -91,22 +91,6 @@ backlight = <&lcd_bl>; - panel-timing { - clock-frequency = <33000000>; - hactive = <800>; - vactive = <480>; - hfront-porch = <210>; - hback-porch = <16>; - hsync-len = <30>; - vback-porch = <10>; - vfront-porch = <22>; - vsync-len = <13>; - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <1>; - }; - port { lcd_in: endpoint { remote-endpoint = <&dpi_out>; -- cgit v1.2.3 From 944021e79e7ec8db9c3edbc6af32276ade4ac0db Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 9 Jun 2020 13:30:00 +0300 Subject: ARM: dts: am437x-epos-evm: remove lcd timings LCD timings now come from panel-simple. Having timings in the DT will cause a WARN. Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am43x-epos-evm.dts | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 27259fd6f741..7d4e0dffde7a 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -47,22 +47,6 @@ backlight = <&lcd_bl>; - panel-timing { - clock-frequency = <33000000>; - hactive = <800>; - vactive = <480>; - hfront-porch = <210>; - hback-porch = <16>; - hsync-len = <30>; - vback-porch = <10>; - vfront-porch = <22>; - vsync-len = <13>; - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <1>; - }; - port { lcd_in: endpoint { remote-endpoint = <&dpi_out>; -- cgit v1.2.3 From 6c58f25e6938c073198af8b1e1832f83f8f0df33 Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Thu, 11 Jun 2020 18:32:35 +0000 Subject: riscv/atomic: Fix sign extension for RV64I The argument passed to cmpxchg is not guaranteed to be sign extended, but lr.w sign extends on RV64I. This makes cmpxchg fail on clang built kernels when __old is negative. To fix this, we just cast __old to long which sign extends on RV64I. With this fix, clang built RISC-V kernels now boot. Link: https://github.com/ClangBuiltLinux/linux/issues/867 Signed-off-by: Nathan Huckleberry Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cmpxchg.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index d969bab4a26b..262e5bbb2776 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -179,7 +179,7 @@ " bnez %1, 0b\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -224,7 +224,7 @@ RISCV_ACQUIRE_BARRIER \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -270,7 +270,7 @@ " bnez %1, 0b\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -316,7 +316,7 @@ " fence rw, rw\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ -- cgit v1.2.3 From de1f6d9304c38e414552c3565d36286609ced0c1 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Mon, 8 Jun 2020 18:33:41 +0200 Subject: ARM: dts: BCM5301X: Add missing memory "device_type" for Luxul XWC-2000 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This property is needed since commit abe60a3a7afb ("ARM: dts: Kill off skeleton{64}.dtsi"). Without it booting silently hangs at: [ 0.000000] Memory policy: Data cache writealloc Fixes: 984829e2d39b ("ARM: dts: BCM5301X: Add DT for Luxul XWC-2000") Signed-off-by: Rafał Miłecki Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts b/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts index 334325390aed..29bbecd36f65 100644 --- a/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts +++ b/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts @@ -17,6 +17,7 @@ }; memory { + device_type = "memory"; reg = <0x00000000 0x08000000 0x88000000 0x18000000>; }; -- cgit v1.2.3 From 0386e9ce5877ee73e07675529d5ae594d00f0900 Mon Sep 17 00:00:00 2001 From: Matthew Hagan Date: Tue, 9 Jun 2020 17:58:31 +0100 Subject: ARM: bcm: Select ARM_TIMER_SP804 for ARCH_BCM_NSP The NSP SoC includes an SP804 timer so should be enabled here. Fixes: a0efb0d28b77 ("ARM: dts: NSP: Add SP804 Support to DT") Signed-off-by: Matthew Hagan Signed-off-by: Florian Fainelli --- arch/arm/mach-bcm/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index 6aa938b949db..1df0ee01ee02 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -53,6 +53,7 @@ config ARCH_BCM_NSP select ARM_ERRATA_754322 select ARM_ERRATA_775420 select ARM_ERRATA_764369 if SMP + select ARM_TIMER_SP804 select THERMAL select THERMAL_OF help -- cgit v1.2.3 From 2c18bd525c47f882f033b0a813ecd09c93e1ecdf Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 4 Jun 2020 14:45:16 -0500 Subject: x86/resctrl: Fix memory bandwidth counter width for AMD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Memory bandwidth is calculated reading the monitoring counter at two intervals and calculating the delta. It is the software’s responsibility to read the count often enough to avoid having the count roll over _twice_ between reads. The current code hardcodes the bandwidth monitoring counter's width to 24 bits for AMD. This is due to default base counter width which is 24. Currently, AMD does not implement the CPUID 0xF.[ECX=1]:EAX to adjust the counter width. But, the AMD hardware supports much wider bandwidth counter with the default width of 44 bits. Kernel reads these monitoring counters every 1 second and adjusts the counter value for overflow. With 24 bits and scale value of 64 for AMD, it can only measure up to 1GB/s without overflowing. For the rates above 1GB/s this will fail to measure the bandwidth. Fix the issue setting the default width to 44 bits by adjusting the offset. AMD future products will implement CPUID 0xF.[ECX=1]:EAX. [ bp: Let the line stick out and drop {}-brackets around a single statement. ] Fixes: 4d05bf71f157 ("x86/resctrl: Introduce AMD QOS feature") Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/159129975546.62538.5656031125604254041.stgit@naples-babu.amd.com --- arch/x86/kernel/cpu/resctrl/core.c | 8 ++++---- arch/x86/kernel/cpu/resctrl/internal.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 12f967c6b603..6a9df71c1b9e 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -981,10 +981,10 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) c->x86_cache_max_rmid = ecx; c->x86_cache_occ_scale = ebx; - if (c->x86_vendor == X86_VENDOR_INTEL) - c->x86_cache_mbm_width_offset = eax & 0xff; - else - c->x86_cache_mbm_width_offset = -1; + c->x86_cache_mbm_width_offset = eax & 0xff; + + if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset) + c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; } } diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index f20a47d120b1..5ffa32256b3b 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -37,6 +37,7 @@ #define MBA_IS_LINEAR 0x4 #define MBA_MAX_MBPS U32_MAX #define MAX_MBA_BW_AMD 0x800 +#define MBM_CNTR_WIDTH_OFFSET_AMD 20 #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) -- cgit v1.2.3 From 0ae705f3d2b22d9d762f67fd49aa6c290987c6a3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 15 Jun 2020 14:56:55 +0800 Subject: KVM: MIPS: Fix a build error for !CPU_LOONGSON64 During the KVM merging progress, a CONFIG_CPU_LOONGSON64 guard in commit 7f2a83f1c2a941ebfee53 ("KVM: MIPS: Add CPUCFG emulation for Loongson-3") is missing by accident. So add it to avoid building error. Fixes: 7f2a83f1c2a941ebfee53 ("KVM: MIPS: Add CPUCFG emulation for Loongson-3") Reported-by: kernel test robot Signed-off-by: Huacai Chen Message-Id: <1592204215-28704-1-git-send-email-chenhc@lemote.com> Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 521bd5891e84..666d3350b4ac 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -67,7 +67,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("vz_ghfc", vz_ghfc_exits), VCPU_STAT("vz_gpa", vz_gpa_exits), VCPU_STAT("vz_resvd", vz_resvd_exits), +#ifdef CONFIG_CPU_LOONGSON64 VCPU_STAT("vz_cpucfg", vz_cpucfg_exits), +#endif #endif VCPU_STAT("halt_successful_poll", halt_successful_poll), VCPU_STAT("halt_attempted_poll", halt_attempted_poll), -- cgit v1.2.3 From 03e62fd67d3ab33f39573fc8787d89dc9b4d7255 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 7 Jun 2020 15:10:23 +0200 Subject: MIPS: lantiq: xway: sysctrl: fix the GPHY clock alias names The dt-bindings for the GSWIP describe that the node should be named "switch". Use the same name in sysctrl.c so the GSWIP driver can actually find the "gphy0" and "gphy1" clocks. Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200") Cc: stable@vger.kernel.org Signed-off-by: Martin Blumenstingl Acked-by: Hauke Mehrtens Signed-off-by: Thomas Bogendoerfer --- arch/mips/lantiq/xway/sysctrl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index aa37545ebe8f..b10342018d19 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -514,8 +514,8 @@ void __init ltq_soc_init(void) clkdev_add_pmu("1e10b308.eth", NULL, 0, 0, PMU_SWITCH | PMU_PPE_DP | PMU_PPE_TC); clkdev_add_pmu("1da00000.usif", "NULL", 1, 0, PMU_USIF); - clkdev_add_pmu("1e108000.gswip", "gphy0", 0, 0, PMU_GPHY); - clkdev_add_pmu("1e108000.gswip", "gphy1", 0, 0, PMU_GPHY); + clkdev_add_pmu("1e108000.switch", "gphy0", 0, 0, PMU_GPHY); + clkdev_add_pmu("1e108000.switch", "gphy1", 0, 0, PMU_GPHY); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); clkdev_add_pmu("1e116000.mei", "afe", 1, 2, PMU_ANALOG_DSL_AFE); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); @@ -538,8 +538,8 @@ void __init ltq_soc_init(void) PMU_SWITCH | PMU_PPE_DPLUS | PMU_PPE_DPLUM | PMU_PPE_EMA | PMU_PPE_TC | PMU_PPE_SLL01 | PMU_PPE_QSB | PMU_PPE_TOP); - clkdev_add_pmu("1e108000.gswip", "gphy0", 0, 0, PMU_GPHY); - clkdev_add_pmu("1e108000.gswip", "gphy1", 0, 0, PMU_GPHY); + clkdev_add_pmu("1e108000.switch", "gphy0", 0, 0, PMU_GPHY); + clkdev_add_pmu("1e108000.switch", "gphy1", 0, 0, PMU_GPHY); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); -- cgit v1.2.3 From e82587336695f14283987c9aa0bfd775b520856d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2020 14:24:47 +0200 Subject: x86, kcsan: Remove __no_kcsan_or_inline usage Now that KCSAN relies on -tsan-distinguish-volatile we no longer need the annotation for constant_test_bit(). Remove it. Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/bitops.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 35460fef39b8..0367efdc5b7a 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -201,12 +201,8 @@ arch_test_and_change_bit(long nr, volatile unsigned long *addr) return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr); } -static __no_kcsan_or_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) +static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) { - /* - * Because this is a plain access, we need to disable KCSAN here to - * avoid double instrumentation via instrumented bitops. - */ return ((1UL << (nr & (BITS_PER_LONG-1))) & (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } -- cgit v1.2.3 From 14d3b376b6c3f66d62559d457d32edf565472163 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Jun 2020 13:32:48 +0200 Subject: x86/entry, cpumask: Provide non-instrumented variant of cpu_is_offline() vmlinux.o: warning: objtool: exc_nmi()+0x12: call to cpumask_test_cpu.constprop.0() leaves .noinstr.text section vmlinux.o: warning: objtool: mce_check_crashing_cpu()+0x12: call to cpumask_test_cpu.constprop.0()leaves .noinstr.text section cpumask_test_cpu() test_bit() instrument_atomic_read() arch_test_bit() Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/cpumask.h | 18 ++++++++++++++++++ arch/x86/kernel/cpu/mce/core.c | 2 +- arch/x86/kernel/nmi.c | 2 +- 3 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 6722ffcef2e6..3afa990d756b 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -11,5 +11,23 @@ extern cpumask_var_t cpu_sibling_setup_mask; extern void setup_cpu_local_masks(void); +/* + * NMI and MCE exceptions need cpu_is_offline() _really_ early, + * provide an arch_ special for them to avoid instrumentation. + */ +#if NR_CPUS > 1 +static __always_inline bool arch_cpu_online(int cpu) +{ + return arch_test_bit(cpu, cpumask_bits(cpu_online_mask)); +} +#else +static __always_inline bool arch_cpu_online(int cpu) +{ + return cpu == 0; +} +#endif + +#define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu)) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index ce9120c4f740..fbe89a92ff36 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1083,7 +1083,7 @@ static noinstr bool mce_check_crashing_cpu(void) { unsigned int cpu = smp_processor_id(); - if (cpu_is_offline(cpu) || + if (arch_cpu_is_offline(cpu) || (crashing_cpu != -1 && crashing_cpu != cpu)) { u64 mcgstatus; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 2de365f15684..d7c5e44b26f7 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -478,7 +478,7 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7); DEFINE_IDTENTRY_RAW(exc_nmi) { - if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id())) + if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) return; if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { -- cgit v1.2.3 From 8e8bb06d199a5aa7a534aa3b3fc0abbbc11ca438 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Jun 2020 11:17:40 +0200 Subject: x86/entry, bug: Comment the instrumentation_begin() usage for WARN() Explain the rationale for annotating WARN(), even though, strictly speaking printk() and friends are very much not safe in many of the places we put them. Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/bug.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index fb34ff641e0a..028189575560 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -75,6 +75,12 @@ do { \ unreachable(); \ } while (0) +/* + * This instrumentation_begin() is strictly speaking incorrect; but it + * suppresses the complaints from WARN()s in noinstr code. If such a WARN() + * were to trigger, we'd rather wreck the machine in an attempt to get the + * message out than not know about it. + */ #define __WARN_FLAGS(flags) \ do { \ instrumentation_begin(); \ -- cgit v1.2.3 From 751c263bb74fd36b5fc2589d36abc75042336444 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 15 Jun 2020 12:19:39 +0200 Subject: arm64: remove TEXT_OFFSET randomization TEXT_OFFSET was recently changed to 0x0, in preparation for its removal at a later stage, and a warning is emitted into the kernel log when the bootloader appears to have failed to take the TEXT_OFFSET image header value into account. Ironically, this warning itself fails to take TEXT_OFFSET into account, and compares the kernel image's alignment modulo 2M against a hardcoded value of 0x0, and so the warning will trigger spuriously when TEXT_OFFSET randomization is enabled. Given the intent to get rid of TEXT_OFFSET entirely, let's fix this oversight by just removing support for TEXT_OFFSET randomization. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200615101939.634391-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig.debug | 15 --------------- arch/arm64/Makefile | 6 ------ 2 files changed, 21 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index cdf7ec0b975e..265c4461031f 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -8,21 +8,6 @@ config PID_IN_CONTEXTIDR instructions during context switch. Say Y here only if you are planning to use hardware trace tools with this kernel. -config ARM64_RANDOMIZE_TEXT_OFFSET - bool "Randomize TEXT_OFFSET at build time" - help - Say Y here if you want the image load offset (AKA TEXT_OFFSET) - of the kernel to be randomized at build-time. When selected, - this option will cause TEXT_OFFSET to be randomized upon any - build of the kernel, and the offset will be reflected in the - text_offset field of the resulting Image. This can be used to - fuzz-test bootloaders which respect text_offset. - - This option is intended for bootloader and/or kernel testing - only. Bootloaders must make no assumptions regarding the value - of TEXT_OFFSET and platforms must not require a specific - value. - config DEBUG_EFI depends on EFI && DEBUG_INFO bool "UEFI debugging" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 76359cfb328a..a0d94d063fa8 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -121,13 +121,7 @@ endif head-y := arch/arm64/kernel/head.o # The byte offset of the kernel image in RAM from the start of RAM. -ifeq ($(CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET), y) -TEXT_OFFSET := $(shell awk "BEGIN {srand(); printf \"0x%06x\n\", \ - int(2 * 1024 * 1024 / (2 ^ $(CONFIG_ARM64_PAGE_SHIFT)) * \ - rand()) * (2 ^ $(CONFIG_ARM64_PAGE_SHIFT))}") -else TEXT_OFFSET := 0x0 -endif ifeq ($(CONFIG_KASAN_SW_TAGS), y) KASAN_SHADOW_SCALE_SHIFT := 4 -- cgit v1.2.3 From 5d5103595e9e53048bb7e70ee2673c897ab38300 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 8 Jun 2020 10:41:34 -0700 Subject: x86/cpu: Reinitialize IA32_FEAT_CTL MSR on BSP during wakeup Reinitialize IA32_FEAT_CTL on the BSP during wakeup to handle the case where firmware doesn't initialize or save/restore across S3. This fixes a bug where IA32_FEAT_CTL is left uninitialized and results in VMXON taking a #GP due to VMX not being fully enabled, i.e. breaks KVM. Use init_ia32_feat_ctl() to "restore" IA32_FEAT_CTL as it already deals with the case where the MSR is locked, and because APs already redo init_ia32_feat_ctl() during suspend by virtue of the SMP boot flow being used to reinitialize APs upon wakeup. Do the call in the early wakeup flow to avoid dependencies in the syscore_ops chain, e.g. simply adding a resume hook is not guaranteed to work, as KVM does VMXON in its own resume hook, kvm_resume(), when KVM has active guests. Fixes: 21bd3467a58e ("KVM: VMX: Drop initialization of IA32_FEAT_CTL MSR") Reported-by: Brad Campbell Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov Reviewed-by: Liam Merwick Reviewed-by: Maxim Levitsky Tested-by: Brad Campbell Cc: stable@vger.kernel.org # v5.6 Link: https://lkml.kernel.org/r/20200608174134.11157-1-sean.j.christopherson@intel.com --- arch/x86/include/asm/cpu.h | 5 +++++ arch/x86/kernel/cpu/centaur.c | 1 + arch/x86/kernel/cpu/cpu.h | 4 ---- arch/x86/kernel/cpu/zhaoxin.c | 1 + arch/x86/power/cpu.c | 6 ++++++ 5 files changed, 13 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index dd17c2da1af5..da78ccbd493b 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -58,4 +58,9 @@ static inline bool handle_guest_split_lock(unsigned long ip) return false; } #endif +#ifdef CONFIG_IA32_FEAT_CTL +void init_ia32_feat_ctl(struct cpuinfo_x86 *c); +#else +static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {} +#endif #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 426792565d86..c5cf336e5077 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index fb538fccd24c..9d033693519a 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -81,8 +81,4 @@ extern void update_srbds_msr(void); extern u64 x86_read_arch_cap_msr(void); -#ifdef CONFIG_IA32_FEAT_CTL -void init_ia32_feat_ctl(struct cpuinfo_x86 *c); -#endif - #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index df1358ba622b..05fa4ef63490 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -2,6 +2,7 @@ #include #include +#include #include #include "cpu.h" diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 7c65102debaf..db1378c6ff26 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -193,6 +193,8 @@ static void fix_processor_context(void) */ static void notrace __restore_processor_state(struct saved_context *ctxt) { + struct cpuinfo_x86 *c; + if (ctxt->misc_enable_saved) wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable); /* @@ -263,6 +265,10 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) mtrr_bp_restore(); perf_restore_debug_store(); msr_restore_context(ctxt); + + c = &cpu_data(smp_processor_id()); + if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL)) + init_ia32_feat_ctl(c); } /* Needed by apm.c */ -- cgit v1.2.3 From a6e2c226c3d51fd93636320e47cabc8a8f0824c5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 24 May 2020 15:08:19 +0530 Subject: powerpc: Fix kernel crash in show_instructions() w/DEBUG_VIRTUAL With CONFIG_DEBUG_VIRTUAL=y, we can hit a BUG() if we take a hard lockup watchdog interrupt when in OPAL mode. This happens in show_instructions() if the kernel takes the watchdog NMI IPI, or any other interrupt, with MSR_IR == 0. show_instructions() updates the variable pc in the loop and the second iteration will result in BUG(). We hit the BUG_ON due the below check in __va() #define __va(x) ({ VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); }) Fix it by moving the check out of the loop. Also update nip so that the nip == pc check still matches. Fixes: 4dd7554a6456 ("powerpc/64: Add VIRTUAL_BUG_ON checks for __va and __pa addresses") Signed-off-by: Aneesh Kumar K.V [mpe: Use IS_ENABLED(), massage change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200524093822.423487-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/kernel/process.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7bb7faf84490..a2f1f0e70a4b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1252,29 +1252,31 @@ struct task_struct *__switch_to(struct task_struct *prev, static void show_instructions(struct pt_regs *regs) { int i; + unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); printk("Instruction dump:"); + /* + * If we were executing with the MMU off for instructions, adjust pc + * rather than printing XXXXXXXX. + */ + if (!IS_ENABLED(CONFIG_BOOKE) && !(regs->msr & MSR_IR)) { + pc = (unsigned long)phys_to_virt(pc); + nip = (unsigned long)phys_to_virt(regs->nip); + } + for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; if (!(i % 8)) pr_cont("\n"); -#if !defined(CONFIG_BOOKE) - /* If executing with the IMMU off, adjust pc rather - * than print XXXXXXXX. - */ - if (!(regs->msr & MSR_IR)) - pc = (unsigned long)phys_to_virt(pc); -#endif - if (!__kernel_text_address(pc) || probe_kernel_address((const void *)pc, instr)) { pr_cont("XXXXXXXX "); } else { - if (regs->nip == pc) + if (nip == pc) pr_cont("<%08x> ", instr); else pr_cont("%08x ", instr); -- cgit v1.2.3 From b95273f1272398a9f7145de37703f1930244e465 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 15 Apr 2020 11:37:09 -0400 Subject: kvm/svm: disable KCSAN for svm_vcpu_run() For some reasons, running a simple qemu-kvm command with KCSAN will reset AMD hosts. It turns out svm_vcpu_run() could not be instrumented. Disable it for now. # /usr/libexec/qemu-kvm -name ubuntu-18.04-server-cloudimg -cpu host -smp 2 -m 2G -hda ubuntu-18.04-server-cloudimg.qcow2 === console output === Kernel 5.6.0-next-20200408+ on an x86_64 hp-dl385g10-05 login: <...host reset...> HPE ProLiant System BIOS A40 v1.20 (03/09/2018) (C) Copyright 1982-2018 Hewlett Packard Enterprise Development LP Early system initialization, please wait... Signed-off-by: Qian Cai Message-Id: <20200415153709.1559-1-cai@lca.pw> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8ccfa4197d9c..c0da4dd78ac5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3344,7 +3344,7 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); -static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) +static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { fastpath_t exit_fastpath; struct vcpu_svm *svm = to_svm(vcpu); -- cgit v1.2.3 From 1e570f512cbdc5e9e401ba640d9827985c1bea1e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 10 Jun 2020 18:03:10 +0100 Subject: arm64/sve: Eliminate data races on sve_default_vl sve_default_vl can be modified via the /proc/sys/abi/sve_default_vl sysctl concurrently with use, and modified concurrently by multiple threads. Adding a lock for this seems overkill, and I don't want to think any more than necessary, so just define wrappers using READ_ONCE()/ WRITE_ONCE(). This will avoid the possibility of torn accesses and repeated loads and stores. There's no evidence yet that this is going wrong in practice: this is just hygiene. For generic sysctl users, it would be better to build this kind of thing into the sysctl common code somehow. Reported-by: Will Deacon Signed-off-by: Dave Martin Link: https://lore.kernel.org/r/1591808590-20210-3-git-send-email-Dave.Martin@arm.com [will: move set_sve_default_vl() inside #ifdef to squash allnoconfig warning] Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 35cb5e66c504..d9eee9194511 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -119,10 +120,20 @@ struct fpsimd_last_state_struct { static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); /* Default VL for tasks that don't set it explicitly: */ -static int sve_default_vl = -1; +static int __sve_default_vl = -1; + +static int get_sve_default_vl(void) +{ + return READ_ONCE(__sve_default_vl); +} #ifdef CONFIG_ARM64_SVE +static void set_sve_default_vl(int val) +{ + WRITE_ONCE(__sve_default_vl, val); +} + /* Maximum supported vector length across all CPUs (initially poisoned) */ int __ro_after_init sve_max_vl = SVE_VL_MIN; int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN; @@ -344,7 +355,7 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; - int vl = sve_default_vl; + int vl = get_sve_default_vl(); struct ctl_table tmp_table = { .data = &vl, .maxlen = sizeof(vl), @@ -361,7 +372,7 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, if (!sve_vl_valid(vl)) return -EINVAL; - sve_default_vl = find_supported_vector_length(vl); + set_sve_default_vl(find_supported_vector_length(vl)); return 0; } @@ -868,7 +879,7 @@ void __init sve_setup(void) * For the default VL, pick the maximum supported value <= 64. * VL == 64 is guaranteed not to grow the signal frame. */ - sve_default_vl = find_supported_vector_length(64); + set_sve_default_vl(find_supported_vector_length(64)); bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX); @@ -889,7 +900,7 @@ void __init sve_setup(void) pr_info("SVE: maximum available vector length %u bytes per vector\n", sve_max_vl); pr_info("SVE: default vector length %u bytes per vector\n", - sve_default_vl); + get_sve_default_vl()); /* KVM decides whether to support mismatched systems. Just warn here: */ if (sve_max_virtualisable_vl < sve_max_vl) @@ -1029,13 +1040,13 @@ void fpsimd_flush_thread(void) * vector length configured: no kernel task can become a user * task without an exec and hence a call to this function. * By the time the first call to this function is made, all - * early hardware probing is complete, so sve_default_vl + * early hardware probing is complete, so __sve_default_vl * should be valid. * If a bug causes this to go wrong, we make some noise and * try to fudge thread.sve_vl to a safe value here. */ vl = current->thread.sve_vl_onexec ? - current->thread.sve_vl_onexec : sve_default_vl; + current->thread.sve_vl_onexec : get_sve_default_vl(); if (WARN_ON(!sve_vl_valid(vl))) vl = SVE_VL_MIN; -- cgit v1.2.3 From 413d3ea6b775d77b2057f13a9af75875eb066156 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 Jun 2020 12:23:16 +0100 Subject: arm64: traps: Dump registers prior to panic() in bad_mode() When panicing due to an unknown/unhandled exception at EL1, dump the registers of the faulting context so that it's easier to figure out what went wrong. In particular, this makes it a lot easier to debug in-kernel BTI failures since it pretty-prints PSTATE.BTYPE in the crash log. Cc: Mark Brown Cc: Catalin Marinas Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200615113458.2884-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 50cc30acf106..24f2af70ac2e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -813,6 +813,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); + __show_regs(regs); local_daif_mask(); panic("bad mode"); } -- cgit v1.2.3 From 8dd4daa04278d7437641962ed53b843c0b0ec4a9 Mon Sep 17 00:00:00 2001 From: Shyam Thombre Date: Wed, 10 Jun 2020 16:39:44 +0530 Subject: arm64: mm: reset address tag set by kasan sw tagging KASAN sw tagging sets a random tag of 8 bits in the top byte of the pointer returned by the memory allocating functions. So for the functions unaware of this change, the top 8 bits of the address must be reset which is done by the function arch_kasan_reset_tag(). Signed-off-by: Shyam Thombre Link: https://lore.kernel.org/r/1591787384-5823-1-git-send-email-sthombre@codeaurora.org Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 990929c8837e..1df25f26571d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -723,6 +723,7 @@ int kern_addr_valid(unsigned long addr) pmd_t *pmdp, pmd; pte_t *ptep, pte; + addr = arch_kasan_reset_tag(addr); if ((((long)addr) >> VA_BITS) != -1UL) return 0; -- cgit v1.2.3 From 88c200d929c969408779dbae4c4fad32bc510373 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 8 Jun 2020 18:45:18 -0700 Subject: KVM: VMX: Add helpers to identify interrupt type from intr_info Add is_intr_type() and is_intr_type_n() to consolidate the boilerplate code for querying a specific type of interrupt given an encoded value from VMCS.VM_{ENTER,EXIT}_INTR_INFO, with and without an associated vector respectively. Signed-off-by: Sean Christopherson Message-Id: <20200609014518.26756-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmcs.h | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 5c0ff80b85c0..7a3675fddec2 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -72,11 +72,24 @@ struct loaded_vmcs { struct vmcs_controls_shadow controls_shadow; }; +static inline bool is_intr_type(u32 intr_info, u32 type) +{ + const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK; + + return (intr_info & mask) == (INTR_INFO_VALID_MASK | type); +} + +static inline bool is_intr_type_n(u32 intr_info, u32 type, u8 vector) +{ + const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK | + INTR_INFO_VECTOR_MASK; + + return (intr_info & mask) == (INTR_INFO_VALID_MASK | type | vector); +} + static inline bool is_exception_n(u32 intr_info, u8 vector) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | - INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK); + return is_intr_type_n(intr_info, INTR_TYPE_HARD_EXCEPTION, vector); } static inline bool is_debug(u32 intr_info) @@ -106,28 +119,23 @@ static inline bool is_gp_fault(u32 intr_info) static inline bool is_machine_check(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | - INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); + return is_exception_n(intr_info, MC_VECTOR); } /* Undocumented: icebp/int1 */ static inline bool is_icebp(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) - == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK); + return is_intr_type(intr_info, INTR_TYPE_PRIV_SW_EXCEPTION); } static inline bool is_nmi(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) - == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK); + return is_intr_type(intr_info, INTR_TYPE_NMI_INTR); } static inline bool is_external_intr(u32 intr_info) { - return (intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) - == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR); + return is_intr_type(intr_info, INTR_TYPE_EXT_INTR); } enum vmcs_field_width { -- cgit v1.2.3 From b791abf3201d724ac372c2ba1fa6e90d192e1dbf Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:04 +0530 Subject: powerpc/papr_scm: Fetch nvdimm health information from PHYP Implement support for fetching nvdimm health information via H_SCM_HEALTH hcall as documented in Ref[1]. The hcall returns a pair of 64-bit bitmap, bitwise-and of which is then stored in 'struct papr_scm_priv' and subsequently partially exposed to user-space via newly introduced dimm specific attribute 'papr/flags'. Since the hcall is costly, the health information is cached and only re-queried, 60s after the previous successful hcall. The patch also adds a documentation text describing flags reported by the the new sysfs attribute 'papr/flags' is also introduced at Documentation/ABI/testing/sysfs-bus-papr-pmem. [1] commit 58b278f568f0 ("powerpc: Provide initial documentation for PAPR hcalls") Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-4-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- Documentation/ABI/testing/sysfs-bus-papr-pmem | 27 +++++ arch/powerpc/platforms/pseries/papr_scm.c | 168 +++++++++++++++++++++++++- 2 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-bus-papr-pmem (limited to 'arch') diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem new file mode 100644 index 000000000000..5b10d036a8d4 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -0,0 +1,27 @@ +What: /sys/bus/nd/devices/nmemX/papr/flags +Date: Apr, 2020 +KernelVersion: v5.8 +Contact: linuxppc-dev , linux-nvdimm@lists.01.org, +Description: + (RO) Report flags indicating various states of a + papr-pmem NVDIMM device. Each flag maps to a one or + more bits set in the dimm-health-bitmap retrieved in + response to H_SCM_HEALTH hcall. The details of the bit + flags returned in response to this hcall is available + at 'Documentation/powerpc/papr_hcalls.rst' . Below are + the flags reported in this sysfs file: + + * "not_armed" : Indicates that NVDIMM contents will not + survive a power cycle. + * "flush_fail" : Indicates that NVDIMM contents + couldn't be flushed during last + shut-down event. + * "restore_fail": Indicates that NVDIMM contents + couldn't be restored during NVDIMM + initialization. + * "encrypted" : NVDIMM contents are encrypted. + * "smart_notify": There is health event for the NVDIMM. + * "scrubbed" : Indicating that contents of the + NVDIMM have been scrubbed. + * "locked" : Indicating that NVDIMM contents cant + be modified until next power cycle. diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f35592423380..0c091622b15e 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -22,6 +23,44 @@ (1ul << ND_CMD_GET_CONFIG_DATA) | \ (1ul << ND_CMD_SET_CONFIG_DATA)) +/* DIMM health bitmap bitmap indicators */ +/* SCM device is unable to persist memory contents */ +#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) +/* SCM device failed to persist memory contents */ +#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) +/* SCM device contents are persisted from previous IPL */ +#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) +/* SCM device contents are not persisted from previous IPL */ +#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) +/* SCM device memory life remaining is critically low */ +#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) +/* SCM device will be garded off next IPL due to failure */ +#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) +/* SCM contents cannot persist due to current platform health status */ +#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) +/* SCM device is unable to persist memory contents in certain conditions */ +#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) +/* SCM device is encrypted */ +#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) +/* SCM device has been scrubbed and locked */ +#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) + +/* Bits status indicators for health bitmap indicating unarmed dimm */ +#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +/* Bits status indicators for health bitmap indicating unflushed dimm */ +#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) + +/* Bits status indicators for health bitmap indicating unrestored dimm */ +#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) + +/* Bit status indicators for smart event notification */ +#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ + PAPR_PMEM_HEALTH_FATAL | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +/* private struct associated with each region */ struct papr_scm_priv { struct platform_device *pdev; struct device_node *dn; @@ -39,6 +78,15 @@ struct papr_scm_priv { struct resource res; struct nd_region *region; struct nd_interleave_set nd_set; + + /* Protect dimm health data from concurrent read/writes */ + struct mutex health_mutex; + + /* Last time the health information of the dimm was updated */ + unsigned long lasthealth_jiffies; + + /* Health information for the dimm */ + u64 health_bitmap; }; static int drc_pmem_bind(struct papr_scm_priv *p) @@ -144,6 +192,61 @@ err_out: return drc_pmem_bind(p); } +/* + * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the + * health information. + */ +static int __drc_pmem_query_health(struct papr_scm_priv *p) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + long rc; + + /* issue the hcall */ + rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); + if (rc != H_SUCCESS) { + dev_err(&p->pdev->dev, + "Failed to query health information, Err:%ld\n", rc); + return -ENXIO; + } + + p->lasthealth_jiffies = jiffies; + p->health_bitmap = ret[0] & ret[1]; + + dev_dbg(&p->pdev->dev, + "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", + ret[0], ret[1]); + + return 0; +} + +/* Min interval in seconds for assuming stable dimm health */ +#define MIN_HEALTH_QUERY_INTERVAL 60 + +/* Query cached health info and if needed call drc_pmem_query_health */ +static int drc_pmem_query_health(struct papr_scm_priv *p) +{ + unsigned long cache_timeout; + int rc; + + /* Protect concurrent modifications to papr_scm_priv */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + return rc; + + /* Jiffies offset for which the health data is assumed to be same */ + cache_timeout = p->lasthealth_jiffies + + msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000); + + /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */ + if (time_after(jiffies, cache_timeout)) + rc = __drc_pmem_query_health(p); + else + /* Assume cached health data is valid */ + rc = 0; + + mutex_unlock(&p->health_mutex); + return rc; +} static int papr_scm_meta_get(struct papr_scm_priv *p, struct nd_cmd_get_config_data_hdr *hdr) @@ -286,6 +389,64 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + struct seq_buf s; + u64 health; + int rc; + + rc = drc_pmem_query_health(p); + if (rc) + return rc; + + /* Copy health_bitmap locally, check masks & update out buffer */ + health = READ_ONCE(p->health_bitmap); + + seq_buf_init(&s, buf, PAGE_SIZE); + if (health & PAPR_PMEM_UNARMED_MASK) + seq_buf_printf(&s, "not_armed "); + + if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK) + seq_buf_printf(&s, "flush_fail "); + + if (health & PAPR_PMEM_BAD_RESTORE_MASK) + seq_buf_printf(&s, "restore_fail "); + + if (health & PAPR_PMEM_ENCRYPTED) + seq_buf_printf(&s, "encrypted "); + + if (health & PAPR_PMEM_SMART_EVENT_MASK) + seq_buf_printf(&s, "smart_notify "); + + if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED) + seq_buf_printf(&s, "scrubbed locked "); + + if (seq_buf_used(&s)) + seq_buf_printf(&s, "\n"); + + return seq_buf_used(&s); +} +DEVICE_ATTR_RO(flags); + +/* papr_scm specific dimm attributes */ +static struct attribute *papr_nd_attributes[] = { + &dev_attr_flags.attr, + NULL, +}; + +static struct attribute_group papr_nd_attribute_group = { + .name = "papr", + .attrs = papr_nd_attributes, +}; + +static const struct attribute_group *papr_nd_attr_groups[] = { + &papr_nd_attribute_group, + NULL, +}; + static int papr_scm_nvdimm_init(struct papr_scm_priv *p) { struct device *dev = &p->pdev->dev; @@ -312,8 +473,8 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dimm_flags = 0; set_bit(NDD_LABELING, &dimm_flags); - p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags, - PAPR_SCM_DIMM_CMD_MASK, 0, NULL); + p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, + dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); if (!p->nvdimm) { dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); goto err; @@ -399,6 +560,9 @@ static int papr_scm_probe(struct platform_device *pdev) if (!p) return -ENOMEM; + /* Initialize the dimm mutex */ + mutex_init(&p->health_mutex); + /* optional DT properties */ of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); -- cgit v1.2.3 From b5f38f09e1558c20265a2976b0337bab143a66c7 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:05 +0530 Subject: powerpc/papr_scm: Improve error logging and handling papr_scm_ndctl() Since papr_scm_ndctl() can be called from outside papr_scm, its exposed to the possibility of receiving NULL as value of 'cmd_rc' argument. This patch updates papr_scm_ndctl() to protect against such possibility by assigning it pointer to a local variable in case cmd_rc == NULL. Finally the patch also updates the 'default' add a debug log unknown 'cmd' values. Signed-off-by: Vaibhav Jain Reviewed-by: Ira Weiny Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-5-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/platforms/pseries/papr_scm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 0c091622b15e..692ad3d79826 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -355,11 +355,16 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, { struct nd_cmd_get_config_size *get_size_hdr; struct papr_scm_priv *p; + int rc; /* Only dimm-specific calls are supported atm */ if (!nvdimm) return -EINVAL; + /* Use a local variable in case cmd_rc pointer is NULL */ + if (!cmd_rc) + cmd_rc = &rc; + p = nvdimm_provider_data(nvdimm); switch (cmd) { @@ -381,6 +386,7 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, break; default: + dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); return -EINVAL; } -- cgit v1.2.3 From f517f7925b7b453cb83be06c268ba057b78e4792 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:06 +0530 Subject: ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm module and add the command family NVDIMM_FAMILY_PAPR to the white list of NVDIMM command sets. Also advertise support for ND_CMD_CALL for the nvdimm command mask and implement necessary scaffolding in the module to handle ND_CMD_CALL ioctl and PDSM requests that we receive. The layout of the PDSM request as we expect from libnvdimm/libndctl is described in newly introduced uapi header 'papr_pdsm.h' which defines a 'struct nd_pkg_pdsm' and a maximal union named 'nd_pdsm_payload'. These new structs together with 'struct nd_cmd_pkg' for a pdsm envelop thats sent by libndctl to libnvdimm and serviced by papr_scm in 'papr_scm_service_pdsm()'. The PDSM request is communicated by member 'struct nd_cmd_pkg.nd_command' together with other information on the pdsm payload (size-in, size-out). The patch also introduces 'struct pdsm_cmd_desc' instances of which are stored in an array __pdsm_cmd_descriptors[] indexed with PDSM cmd and corresponding access function pdsm_cmd_desc() is introduced. 'struct pdsm_cdm_desc' holds the service function for a given PDSM and corresponding payload in/out sizes. A new function papr_scm_service_pdsm() is introduced and is called from papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL command from libnvdimm. The function performs validation on the PDSM payload based on info present in corresponding PDSM descriptor and if valid calls the 'struct pdcm_cmd_desc.service' function to service the PDSM. Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-6-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/include/uapi/asm/papr_pdsm.h | 95 +++++++++++++++ arch/powerpc/platforms/pseries/papr_scm.c | 193 +++++++++++++++++++++++++++++- include/uapi/linux/ndctl.h | 1 + 3 files changed, 285 insertions(+), 4 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h (limited to 'arch') diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h new file mode 100644 index 000000000000..28115152aa4e --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl + * + * (C) Copyright IBM 2020 + * + * Author: Vaibhav Jain + */ + +#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_ +#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_ + +#include +#include + +/* + * PDSM Envelope: + * + * The ioctl ND_CMD_CALL exchange data between user-space and kernel via + * envelope which consists of 2 headers sections and payload sections as + * illustrated below: + * +-----------------+---------------+---------------------------+ + * | 64-Bytes | 8-Bytes | Max 184-Bytes | + * +-----------------+---------------+---------------------------+ + * | ND-HEADER | PDSM-HEADER | PDSM-PAYLOAD | + * +-----------------+---------------+---------------------------+ + * | nd_family | | | + * | nd_size_out | cmd_status | | + * | nd_size_in | reserved | nd_pdsm_payload | + * | nd_command | payload --> | | + * | nd_fw_size | | | + * | nd_payload ---> | | | + * +---------------+-----------------+---------------------------+ + * + * ND Header: + * This is the generic libnvdimm header described as 'struct nd_cmd_pkg' + * which is interpreted by libnvdimm before passed on to papr_scm. Important + * member fields used are: + * 'nd_family' : (In) NVDIMM_FAMILY_PAPR_SCM + * 'nd_size_in' : (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0) + * 'nd_size_out' : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD + * 'nd_command' : (In) One of PAPR_PDSM_XXX + * 'nd_fw_size' : (Out) PDSM-HEADER + size of actual payload returned + * + * PDSM Header: + * This is papr-scm specific header that precedes the payload. This is defined + * as nd_cmd_pdsm_pkg. Following fields aare available in this header: + * + * 'cmd_status' : (Out) Errors if any encountered while servicing PDSM. + * 'reserved' : Not used, reserved for future and should be set to 0. + * 'payload' : A union of all the possible payload structs + * + * PDSM Payload: + * + * The layout of the PDSM Payload is defined by various structs shared between + * papr_scm and libndctl so that contents of payload can be interpreted. As such + * its defined as a union of all possible payload structs as + * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command' + * appropriate member of the union is accessed. + */ + +/* Max payload size that we can handle */ +#define ND_PDSM_PAYLOAD_MAX_SIZE 184 + +/* Max payload size that we can handle */ +#define ND_PDSM_HDR_SIZE \ + (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE) + +/* + * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel + * via 'nd_cmd_pkg.nd_command' member of the ioctl struct + */ +enum papr_pdsm { + PAPR_PDSM_MIN = 0x0, + PAPR_PDSM_MAX, +}; + +/* Maximal union that can hold all possible payload types */ +union nd_pdsm_payload { + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; +} __packed; + +/* + * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm + * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command' + * that should always precede this struct when sent to papr_scm via CMD_CALL + * interface. + */ +struct nd_pkg_pdsm { + __s32 cmd_status; /* Out: Sub-cmd status returned back */ + __u16 reserved[2]; /* Ignored and to be set as '0' */ + union nd_pdsm_payload payload; +} __packed; + +#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */ diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 692ad3d79826..d3bbf9940ba4 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -15,13 +15,15 @@ #include #include +#include #define BIND_ANY_ADDR (~0ul) #define PAPR_SCM_DIMM_CMD_MASK \ ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ (1ul << ND_CMD_GET_CONFIG_DATA) | \ - (1ul << ND_CMD_SET_CONFIG_DATA)) + (1ul << ND_CMD_SET_CONFIG_DATA) | \ + (1ul << ND_CMD_CALL)) /* DIMM health bitmap bitmap indicators */ /* SCM device is unable to persist memory contents */ @@ -349,17 +351,195 @@ static int papr_scm_meta_set(struct papr_scm_priv *p, return 0; } +/* + * Do a sanity checks on the inputs args to dimm-control function and return + * '0' if valid. Validation of PDSM payloads happens later in + * papr_scm_service_pdsm. + */ +static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len) +{ + unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK; + struct nd_cmd_pkg *nd_cmd; + struct papr_scm_priv *p; + enum papr_pdsm pdsm; + + /* Only dimm-specific calls are supported atm */ + if (!nvdimm) + return -EINVAL; + + /* get the provider data from struct nvdimm */ + p = nvdimm_provider_data(nvdimm); + + if (!test_bit(cmd, &cmd_mask)) { + dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd); + return -EINVAL; + } + + /* For CMD_CALL verify pdsm request */ + if (cmd == ND_CMD_CALL) { + /* Verify the envelope and envelop size */ + if (!buf || + buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n", + buf_len); + return -EINVAL; + } + + /* Verify that the nd_cmd_pkg.nd_family is correct */ + nd_cmd = (struct nd_cmd_pkg *)buf; + + if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) { + dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n", + nd_cmd->nd_family); + return -EINVAL; + } + + pdsm = (enum papr_pdsm)nd_cmd->nd_command; + + /* Verify if the pdsm command is valid */ + if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", + pdsm); + return -EINVAL; + } + + /* Have enough space to hold returned 'nd_pkg_pdsm' header */ + if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n", + pdsm); + return -EINVAL; + } + } + + /* Let the command be further processed */ + return 0; +} + +/* + * 'struct pdsm_cmd_desc' + * Identifies supported PDSMs' expected length of in/out payloads + * and pdsm service function. + * + * size_in : Size of input payload if any in the PDSM request. + * size_out : Size of output payload if any in the PDSM request. + * service : Service function for the PDSM request. Return semantics: + * rc < 0 : Error servicing PDSM and rc indicates the error. + * rc >=0 : Serviced successfully and 'rc' indicate number of + * bytes written to payload. + */ +struct pdsm_cmd_desc { + u32 size_in; + u32 size_out; + int (*service)(struct papr_scm_priv *dimm, + union nd_pdsm_payload *payload); +}; + +/* Holds all supported PDSMs' command descriptors */ +static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { + [PAPR_PDSM_MIN] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, + /* New PDSM command descriptors to be added below */ + + /* Empty */ + [PAPR_PDSM_MAX] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, +}; + +/* Given a valid pdsm cmd return its command descriptor else return NULL */ +static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd) +{ + if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) + return &__pdsm_cmd_descriptors[cmd]; + + return NULL; +} + +/* + * For a given pdsm request call an appropriate service function. + * Returns errors if any while handling the pdsm command package. + */ +static int papr_scm_service_pdsm(struct papr_scm_priv *p, + struct nd_cmd_pkg *pkg) +{ + /* Get the PDSM header and PDSM command */ + struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; + enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; + const struct pdsm_cmd_desc *pdsc; + int rc; + + /* Fetch corresponding pdsm descriptor for validation and servicing */ + pdsc = pdsm_cmd_desc(pdsm); + + /* Validate pdsm descriptor */ + /* Ensure that reserved fields are 0 */ + if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n", + pdsm); + return -EINVAL; + } + + /* If pdsm expects some input, then ensure that the size_in matches */ + if (pdsc->size_in && + pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n", + pdsm, pkg->nd_size_in); + return -EINVAL; + } + + /* If pdsm wants to return data, then ensure that size_out matches */ + if (pdsc->size_out && + pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n", + pdsm, pkg->nd_size_out); + return -EINVAL; + } + + /* Service the pdsm */ + if (pdsc->service) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm); + + rc = pdsc->service(p, &pdsm_pkg->payload); + + if (rc < 0) { + /* error encountered while servicing pdsm */ + pdsm_pkg->cmd_status = rc; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } else { + /* pdsm serviced and 'rc' bytes written to payload */ + pdsm_pkg->cmd_status = 0; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc; + } + } else { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n", + pdsm); + pdsm_pkg->cmd_status = -ENOENT; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } + + return pdsm_pkg->cmd_status; +} + static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { struct nd_cmd_get_config_size *get_size_hdr; + struct nd_cmd_pkg *call_pkg = NULL; struct papr_scm_priv *p; int rc; - /* Only dimm-specific calls are supported atm */ - if (!nvdimm) - return -EINVAL; + rc = is_cmd_valid(nvdimm, cmd, buf, buf_len); + if (rc) { + pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc); + return rc; + } /* Use a local variable in case cmd_rc pointer is NULL */ if (!cmd_rc) @@ -385,6 +565,11 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, *cmd_rc = papr_scm_meta_set(p, buf); break; + case ND_CMD_CALL: + call_pkg = (struct nd_cmd_pkg *)buf; + *cmd_rc = papr_scm_service_pdsm(p, call_pkg); + break; + default: dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); return -EINVAL; diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index de5d90212409..0e09dc5cec19 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -244,6 +244,7 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_HPE2 2 #define NVDIMM_FAMILY_MSFT 3 #define NVDIMM_FAMILY_HYPERV 4 +#define NVDIMM_FAMILY_PAPR 5 #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg) -- cgit v1.2.3 From d35f18b554be015b6fa89fad6447c6fce8e6ad66 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:07 +0530 Subject: powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH This patch implements support for PDSM request 'PAPR_PDSM_HEALTH' that returns a newly introduced 'struct nd_papr_pdsm_health' instance containing dimm health information back to user space in response to ND_CMD_CALL. This functionality is implemented in newly introduced papr_pdsm_health() that queries the nvdimm health information and then copies this information to the package payload whose layout is defined by 'struct nd_papr_pdsm_health'. Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-7-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/include/uapi/asm/papr_pdsm.h | 37 ++++++++++++++++++++++ arch/powerpc/platforms/pseries/papr_scm.c | 51 +++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 28115152aa4e..9ccecc1d6840 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -66,17 +66,54 @@ #define ND_PDSM_HDR_SIZE \ (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE) +/* Various nvdimm health indicators */ +#define PAPR_PDSM_DIMM_HEALTHY 0 +#define PAPR_PDSM_DIMM_UNHEALTHY 1 +#define PAPR_PDSM_DIMM_CRITICAL 2 +#define PAPR_PDSM_DIMM_FATAL 3 + +/* + * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH + * Various flags indicate the health status of the dimm. + * + * extension_flags : Any extension fields present in the struct. + * dimm_unarmed : Dimm not armed. So contents wont persist. + * dimm_bad_shutdown : Previous shutdown did not persist contents. + * dimm_bad_restore : Contents from previous shutdown werent restored. + * dimm_scrubbed : Contents of the dimm have been scrubbed. + * dimm_locked : Contents of the dimm cant be modified until CEC reboot + * dimm_encrypted : Contents of dimm are encrypted. + * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX + */ +struct nd_papr_pdsm_health { + union { + struct { + __u32 extension_flags; + __u8 dimm_unarmed; + __u8 dimm_bad_shutdown; + __u8 dimm_bad_restore; + __u8 dimm_scrubbed; + __u8 dimm_locked; + __u8 dimm_encrypted; + __u16 dimm_health; + }; + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; + }; +}; + /* * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel * via 'nd_cmd_pkg.nd_command' member of the ioctl struct */ enum papr_pdsm { PAPR_PDSM_MIN = 0x0, + PAPR_PDSM_HEALTH, PAPR_PDSM_MAX, }; /* Maximal union that can hold all possible payload types */ union nd_pdsm_payload { + struct nd_papr_pdsm_health health; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; } __packed; diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index d3bbf9940ba4..9c569078a09f 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -416,6 +416,52 @@ static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, return 0; } +/* Fetch the DIMM health info and populate it in provided package. */ +static int papr_pdsm_health(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc; + + /* Ensure dimm health mutex is taken preventing concurrent access */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + goto out; + + /* Always fetch upto date dimm health data ignoring cached values */ + rc = __drc_pmem_query_health(p); + if (rc) { + mutex_unlock(&p->health_mutex); + goto out; + } + + /* update health struct with various flags derived from health bitmap */ + payload->health = (struct nd_papr_pdsm_health) { + .extension_flags = 0, + .dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK), + .dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK), + .dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK), + .dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), + .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), + .dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED), + .dimm_health = PAPR_PDSM_DIMM_HEALTHY, + }; + + /* Update field dimm_health based on health_bitmap flags */ + if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL) + payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL; + else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL) + payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL; + else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY) + payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY; + + /* struct populated hence can release the mutex now */ + mutex_unlock(&p->health_mutex); + rc = sizeof(struct nd_papr_pdsm_health); + +out: + return rc; +} + /* * 'struct pdsm_cmd_desc' * Identifies supported PDSMs' expected length of in/out payloads @@ -444,6 +490,11 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { }, /* New PDSM command descriptors to be added below */ + [PAPR_PDSM_HEALTH] = { + .size_in = 0, + .size_out = sizeof(struct nd_papr_pdsm_health), + .service = papr_pdsm_health, + }, /* Empty */ [PAPR_PDSM_MAX] = { .size_in = 0, -- cgit v1.2.3 From 0bdcfa182506526fbe4e088ff9ca86a31b81828d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 15 Jun 2020 16:12:47 +1000 Subject: powerpc/64s: Fix KVM interrupt using wrong save area The CTR register reload in the KVM interrupt path used the wrong save area for SLB (and NMI) interrupts. Fixes: 9600f261acaa ("powerpc/64s/exception: Move KVM test to common code") Cc: stable@vger.kernel.org # v5.7+ Reported-by: Christian Zigotzky Signed-off-by: Nicholas Piggin Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200615061247.1310763-1-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e70ebb5c318c..fa080694e581 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -270,7 +270,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .endif - ld r10,PACA_EXGEN+EX_CTR(r13) + ld r10,IAREA+EX_CTR(r13) mtctr r10 BEGIN_FTR_SECTION ld r10,IAREA+EX_PPR(r13) @@ -298,7 +298,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .if IKVM_SKIP 89: mtocrf 0x80,r9 - ld r10,PACA_EXGEN+EX_CTR(r13) + ld r10,IAREA+EX_CTR(r13) mtctr r10 ld r9,IAREA+EX_R9(r13) ld r10,IAREA+EX_R10(r13) -- cgit v1.2.3 From 1907774c37f052ebc7606b6c24ae6d455ed07d85 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: ia64: kernel: unwind_i.h: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- arch/ia64/kernel/unwind_i.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/kernel/unwind_i.h b/arch/ia64/kernel/unwind_i.h index 67994a7e5816..1dd57ba44327 100644 --- a/arch/ia64/kernel/unwind_i.h +++ b/arch/ia64/kernel/unwind_i.h @@ -42,7 +42,7 @@ enum unw_register_index { struct unw_info_block { u64 header; - u64 desc[0]; /* unwind descriptors */ + u64 desc[]; /* unwind descriptors */ /* personality routine and language-specific data follow behind descriptors */ }; -- cgit v1.2.3 From b9dbe0101e344e8339406a11b7a91d4a0c50ad13 Mon Sep 17 00:00:00 2001 From: Matthew Hagan Date: Tue, 9 Jun 2020 17:58:29 +0100 Subject: ARM: dts: NSP: Disable PL330 by default, add dma-coherent property Currently the PL330 is enabled by default. However if left in IDM reset, as is the case with the Meraki and Synology NSP devices, the system will hang when probing for the PL330's AMBA peripheral ID. We therefore should be able to disable it in these cases. The PL330 is also included among of the list of peripherals put into coherent mode, so "dma-coherent" has been added here as well. Fixes: 5fa1026a3e4d ("ARM: dts: NSP: Add PL330 support") Signed-off-by: Matthew Hagan Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm-nsp.dtsi | 4 +++- arch/arm/boot/dts/bcm958522er.dts | 4 ++++ arch/arm/boot/dts/bcm958525er.dts | 4 ++++ arch/arm/boot/dts/bcm958525xmc.dts | 4 ++++ arch/arm/boot/dts/bcm958622hr.dts | 4 ++++ arch/arm/boot/dts/bcm958623hr.dts | 4 ++++ arch/arm/boot/dts/bcm958625hr.dts | 4 ++++ arch/arm/boot/dts/bcm958625k.dts | 4 ++++ 8 files changed, 31 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index da6d70f09ef1..920c0f561e5c 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -200,7 +200,7 @@ status = "disabled"; }; - dma@20000 { + dma: dma@20000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x20000 0x1000>; interrupts = , @@ -215,6 +215,8 @@ clocks = <&iprocslow>; clock-names = "apb_pclk"; #dma-cells = <1>; + dma-coherent; + status = "disabled"; }; sdio: sdhci@21000 { diff --git a/arch/arm/boot/dts/bcm958522er.dts b/arch/arm/boot/dts/bcm958522er.dts index 8c388eb8a08f..7be4c4e628e0 100644 --- a/arch/arm/boot/dts/bcm958522er.dts +++ b/arch/arm/boot/dts/bcm958522er.dts @@ -58,6 +58,10 @@ /* USB 3 support needed to be complete */ +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm958525er.dts b/arch/arm/boot/dts/bcm958525er.dts index c339771bb22e..e58ed7e95346 100644 --- a/arch/arm/boot/dts/bcm958525er.dts +++ b/arch/arm/boot/dts/bcm958525er.dts @@ -58,6 +58,10 @@ /* USB 3 support needed to be complete */ +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm958525xmc.dts b/arch/arm/boot/dts/bcm958525xmc.dts index 1c72ec8288de..716da62f5788 100644 --- a/arch/arm/boot/dts/bcm958525xmc.dts +++ b/arch/arm/boot/dts/bcm958525xmc.dts @@ -58,6 +58,10 @@ /* XHCI support needed to be complete */ +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm958622hr.dts b/arch/arm/boot/dts/bcm958622hr.dts index 96a021cebd97..a49c2fd21f4a 100644 --- a/arch/arm/boot/dts/bcm958622hr.dts +++ b/arch/arm/boot/dts/bcm958622hr.dts @@ -58,6 +58,10 @@ /* USB 3 and SLIC support needed to be complete */ +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts index b2c7f21d471e..dd6dff6452b8 100644 --- a/arch/arm/boot/dts/bcm958623hr.dts +++ b/arch/arm/boot/dts/bcm958623hr.dts @@ -58,6 +58,10 @@ /* USB 3 and SLIC support needed to be complete */ +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts index 536fb24f38bb..a71371b4065e 100644 --- a/arch/arm/boot/dts/bcm958625hr.dts +++ b/arch/arm/boot/dts/bcm958625hr.dts @@ -69,6 +69,10 @@ status = "okay"; }; +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm958625k.dts b/arch/arm/boot/dts/bcm958625k.dts index 3fcca12d83c2..7b84b54436ed 100644 --- a/arch/arm/boot/dts/bcm958625k.dts +++ b/arch/arm/boot/dts/bcm958625k.dts @@ -48,6 +48,10 @@ }; }; +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; -- cgit v1.2.3 From 664f5f8de825648d1d31f6f5652e3cd117c77b50 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 4 Mar 2020 16:07:34 +0100 Subject: s390/seccomp: pass syscall arguments via seccomp_data Use __secure_computing() and pass the register data via seccomp_data so secure computing doesn't have to fetch it again. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ptrace.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ce60a459a143..e319482da5f0 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -838,6 +838,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { unsigned long mask = -1UL; + if (is_compat_task()) + mask = 0xffffffff; + /* * The sysc_tracesys code in entry.S stored the system * call number to gprs[2]. @@ -854,17 +857,35 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) return -1; } +#ifdef CONFIG_SECCOMP /* Do the secure computing check after ptrace. */ - if (secure_computing()) { - /* seccomp failures shouldn't expose any additional code. */ - return -1; + if (unlikely(test_thread_flag(TIF_SECCOMP))) { + struct seccomp_data sd; + + if (is_compat_task()) { + sd.instruction_pointer = regs->psw.addr & 0x7fffffff; + sd.arch = AUDIT_ARCH_S390; + } else { + sd.instruction_pointer = regs->psw.addr; + sd.arch = AUDIT_ARCH_S390X; + } + + sd.nr = regs->gprs[2] & 0xffff; + sd.args[0] = regs->orig_gpr2 & mask; + sd.args[1] = regs->gprs[3] & mask; + sd.args[2] = regs->gprs[4] & mask; + sd.args[3] = regs->gprs[5] & mask; + sd.args[4] = regs->gprs[6] & mask; + sd.args[5] = regs->gprs[7] & mask; + + if (__secure_computing(&sd) == -1) + return -1; } +#endif /* CONFIG_SECCOMP */ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->gprs[2]); - if (is_compat_task()) - mask = 0xffffffff; audit_syscall_entry(regs->gprs[2], regs->orig_gpr2 & mask, regs->gprs[3] &mask, regs->gprs[4] &mask, -- cgit v1.2.3 From cd29fa798001075a554b978df3a64e6656c25794 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 6 Mar 2020 13:18:31 +0100 Subject: s390/ptrace: return -ENOSYS when invalid syscall is supplied The current code returns the syscall number which an invalid syscall number is supplied and tracing is enabled. This makes the strace testsuite fail. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ptrace.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index e319482da5f0..ceb8105a8086 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -837,6 +837,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { unsigned long mask = -1UL; + long ret = -1; if (is_compat_task()) mask = 0xffffffff; @@ -853,8 +854,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - clear_pt_regs_flag(regs, PIF_SYSCALL); - return -1; + goto skip; } #ifdef CONFIG_SECCOMP @@ -870,7 +870,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) sd.arch = AUDIT_ARCH_S390X; } - sd.nr = regs->gprs[2] & 0xffff; + sd.nr = regs->int_code & 0xffff; sd.args[0] = regs->orig_gpr2 & mask; sd.args[1] = regs->gprs[3] & mask; sd.args[2] = regs->gprs[4] & mask; @@ -879,19 +879,26 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) sd.args[5] = regs->gprs[7] & mask; if (__secure_computing(&sd) == -1) - return -1; + goto skip; } #endif /* CONFIG_SECCOMP */ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->gprs[2]); + trace_sys_enter(regs, regs->int_code & 0xffff); - audit_syscall_entry(regs->gprs[2], regs->orig_gpr2 & mask, + audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask, regs->gprs[3] &mask, regs->gprs[4] &mask, regs->gprs[5] &mask); + if ((signed long)regs->gprs[2] >= NR_syscalls) { + regs->gprs[2] = -ENOSYS; + ret = -ENOSYS; + } return regs->gprs[2]; +skip: + clear_pt_regs_flag(regs, PIF_SYSCALL); + return ret; } asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) -- cgit v1.2.3 From 00332c16b1604242a56289ff2b26e283dbad0812 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 6 Mar 2020 13:19:34 +0100 Subject: s390/ptrace: pass invalid syscall numbers to tracing tracing expects to see invalid syscalls, so pass it through. The syscall path in entry.S checks the syscall number before looking up the handler, so it is still safe. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 2 +- arch/s390/kernel/ptrace.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 50ff6dd0f995..496f74d98473 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -401,9 +401,9 @@ ENTRY(system_call) jnz .Lsysc_nr_ok # svc 0: system call number in %r1 llgfr %r1,%r1 # clear high word in r1 + sth %r1,__PT_INT_CODE+2(%r11) cghi %r1,NR_syscalls jnl .Lsysc_nr_ok - sth %r1,__PT_INT_CODE+2(%r11) slag %r8,%r1,3 .Lsysc_nr_ok: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ceb8105a8086..1fdbb2d19477 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -847,11 +847,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * call number to gprs[2]. */ if (test_thread_flag(TIF_SYSCALL_TRACE) && - (tracehook_report_syscall_entry(regs) || - regs->gprs[2] >= NR_syscalls)) { + tracehook_report_syscall_entry(regs)) { /* - * Tracing decided this syscall should not happen or the - * debugger stored an invalid system call number. Skip + * Tracing decided this syscall should not happen. Skip * the system call and the system call restart handling. */ goto skip; -- cgit v1.2.3 From 873e5a763d604c32988c4a78913a8dab3862d2f9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 9 Mar 2020 16:44:50 +0100 Subject: s390/ptrace: fix setting syscall number When strace wants to update the syscall number, it sets GPR2 to the desired number and updates the GPR via PTRACE_SETREGSET. It doesn't update regs->int_code which would cause the old syscall executed on syscall restart. As we cannot change the ptrace ABI and don't have a field for the interruption code, check whether the tracee is in a syscall and the last instruction was svc. In that case assume that the tracer wants to update the syscall number and copy the GPR2 value to regs->int_code. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ptrace.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 1fdbb2d19477..3cc15c066298 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -323,6 +323,25 @@ static inline void __poke_user_per(struct task_struct *child, child->thread.per_user.end = data; } +static void fixup_int_code(struct task_struct *child, addr_t data) +{ + struct pt_regs *regs = task_pt_regs(child); + int ilc = regs->int_code >> 16; + u16 insn; + + if (ilc > 6) + return; + + if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16), + &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn)) + return; + + /* double check that tracee stopped on svc instruction */ + if ((insn >> 8) != 0xa) + return; + + regs->int_code = 0x20000 | (data & 0xffff); +} /* * Write a word to the user area of a process at location addr. This * operation does have an additional problem compared to peek_user. @@ -334,7 +353,9 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) struct user *dummy = NULL; addr_t offset; + if (addr < (addr_t) &dummy->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ @@ -352,7 +373,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* Invalid addressing mode bits */ return -EINVAL; } - *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct user, regs.gprs[2])) + fixup_int_code(child, data); + *(addr_t *)((addr_t) ®s->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { /* @@ -718,6 +743,10 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); } else { + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct compat_user, regs.gprs[2])) + fixup_int_code(child, data); /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } -- cgit v1.2.3 From df8cea2a4bef3088c8570af543835992ce1d327e Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Sat, 9 May 2020 16:56:06 +0800 Subject: s390/crypto: use scnprintf() instead of snprintf() snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Link: https://lkml.kernel.org/r/20200509085608.41061-2-chenzhou10@huawei.com Signed-off-by: Chen Zhou Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/crypto/prng.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index d977643fa627..e1ae23911ccd 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -693,7 +693,7 @@ static ssize_t prng_chunksize_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); + return scnprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); } static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL); @@ -712,7 +712,7 @@ static ssize_t prng_counter_show(struct device *dev, counter = prng_data->prngws.byte_counter; mutex_unlock(&prng_data->mutex); - return snprintf(buf, PAGE_SIZE, "%llu\n", counter); + return scnprintf(buf, PAGE_SIZE, "%llu\n", counter); } static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL); @@ -721,7 +721,7 @@ static ssize_t prng_errorflag_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); + return scnprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); } static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL); @@ -731,9 +731,9 @@ static ssize_t prng_mode_show(struct device *dev, char *buf) { if (prng_mode == PRNG_MODE_TDES) - return snprintf(buf, PAGE_SIZE, "TDES\n"); + return scnprintf(buf, PAGE_SIZE, "TDES\n"); else - return snprintf(buf, PAGE_SIZE, "SHA512\n"); + return scnprintf(buf, PAGE_SIZE, "SHA512\n"); } static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL); @@ -756,7 +756,7 @@ static ssize_t prng_reseed_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); + return scnprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); } static ssize_t prng_reseed_limit_store(struct device *dev, struct device_attribute *attr, @@ -787,7 +787,7 @@ static ssize_t prng_strength_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "256\n"); + return scnprintf(buf, PAGE_SIZE, "256\n"); } static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL); -- cgit v1.2.3 From 92fd356514b7505f40ca72b38ef84070e6502a70 Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Sat, 9 May 2020 16:56:07 +0800 Subject: s390: use scnprintf() in sys_##_prefix##_##_name##_show snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Link: https://lkml.kernel.org/r/20200509085608.41061-3-chenzhou10@huawei.com Signed-off-by: Chen Zhou Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ipl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index ccea9a245867..90a2a17239b0 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -181,7 +181,7 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, \ char *page) \ { \ - return snprintf(page, PAGE_SIZE, _format, ##args); \ + return scnprintf(page, PAGE_SIZE, _format, ##args); \ } #define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \ -- cgit v1.2.3 From 99448016ac792ac096def056828ab72c21f8582b Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Sat, 9 May 2020 16:56:08 +0800 Subject: s390/protvirt: use scnprintf() instead of snprintf() snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. uv_query_facilities() should return the number of bytes printed into the buffer. This is the return value of scnprintf(). The other functions are the same. Link: https://lkml.kernel.org/r/20200509085608.41061-4-chenzhou10@huawei.com Signed-off-by: Chen Zhou Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/uv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 66e89b2866d7..c296e5c8dbf9 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -331,7 +331,7 @@ EXPORT_SYMBOL_GPL(arch_make_page_accessible); static ssize_t uv_query_facilities(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", + return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", uv_info.inst_calls_list[0], uv_info.inst_calls_list[1], uv_info.inst_calls_list[2], @@ -344,7 +344,7 @@ static struct kobj_attribute uv_query_facilities_attr = static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%d\n", + return scnprintf(page, PAGE_SIZE, "%d\n", uv_info.max_guest_cpus); } @@ -354,7 +354,7 @@ static struct kobj_attribute uv_query_max_guest_cpus_attr = static ssize_t uv_query_max_guest_vms(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%d\n", + return scnprintf(page, PAGE_SIZE, "%d\n", uv_info.max_num_sec_conf); } @@ -364,7 +364,7 @@ static struct kobj_attribute uv_query_max_guest_vms_attr = static ssize_t uv_query_max_guest_addr(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%lx\n", + return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.max_sec_stor_addr); } -- cgit v1.2.3 From 2b2a25845d534ac6d55086e35c033961fdd83a26 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 2 Jun 2020 12:25:24 -0700 Subject: s390/vdso: Use $(LD) instead of $(CC) to link vDSO Currently, the VDSO is being linked through $(CC). This does not match how the rest of the kernel links objects, which is through the $(LD) variable. When clang is built in a default configuration, it first attempts to use the target triple's default linker, which is just ld. However, the user can override this through the CLANG_DEFAULT_LINKER cmake define so that clang uses another linker by default, such as LLVM's own linker, ld.lld. This can be useful to get more optimized links across various different projects. However, this is problematic for the s390 vDSO because ld.lld does not have any s390 emulatiom support: https://github.com/llvm/llvm-project/blob/llvmorg-10.0.1-rc1/lld/ELF/Driver.cpp#L132-L150 Thus, if a user is using a toolchain with ld.lld as the default, they will see an error, even if they have specified ld.bfd through the LD make variable: $ make -j"$(nproc)" -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- LLVM=1 \ LD=s390x-linux-gnu-ld \ defconfig arch/s390/kernel/vdso64/ ld.lld: error: unknown emulation: elf64_s390 clang-11: error: linker command failed with exit code 1 (use -v to see invocation) Normally, '-fuse-ld=bfd' could be used to get around this; however, this can be fragile, depending on paths and variable naming. The cleaner solution for the kernel is to take advantage of the fact that $(LD) can be invoked directly, which bypasses the heuristics of $(CC) and respects the user's choice. Similar changes have been done for ARM, ARM64, and MIPS. Link: https://lkml.kernel.org/r/20200602192523.32758-1-natechancellor@gmail.com Link: https://github.com/ClangBuiltLinux/linux/issues/1041 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers [heiko.carstens@de.ibm.com: add --build-id flag] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso64/Makefile | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index bec19e7e6e1c..4a66a1cb919b 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -18,8 +18,8 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin -KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - -Wl,--hash-style=both +ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \ + --hash-style=both --build-id -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) @@ -37,8 +37,8 @@ KASAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE - $(call if_changed,vdso64ld) +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE + $(call if_changed,ld) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -50,8 +50,6 @@ $(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< -- cgit v1.2.3 From 478237a595120a18e9b52fd2c57a6e8b7a01e411 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 24 Mar 2020 12:10:27 +0000 Subject: s390/vdso: fix vDSO clock_getres() clock_getres in the vDSO library has to preserve the same behaviour of posix_get_hrtimer_res(). In particular, posix_get_hrtimer_res() does: sec = 0; ns = hrtimer_resolution; and hrtimer_resolution depends on the enablement of the high resolution timers that can happen either at compile or at run time. Fix the s390 vdso implementation of clock_getres keeping a copy of hrtimer_resolution in vdso data and using that directly. Link: https://lkml.kernel.org/r/20200324121027.21665-1-vincenzo.frascino@arm.com Signed-off-by: Vincenzo Frascino Acked-by: Martin Schwidefsky [heiko.carstens@de.ibm.com: use llgf for proper zero extension] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/vdso.h | 1 + arch/s390/kernel/asm-offsets.c | 2 +- arch/s390/kernel/time.c | 1 + arch/s390/kernel/vdso64/clock_getres.S | 10 +++++----- 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 3bcfdeb01395..0cd085cdeb4f 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -36,6 +36,7 @@ struct vdso_data { __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ __u32 ts_dir; /* TOD steering direction 0x64 */ __u64 ts_end; /* TOD steering end 0x68 */ + __u32 hrtimer_res; /* hrtimer resolution 0x70 */ }; struct vdso_per_cpu_data { diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 165031bd3370..5d8cc1864566 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -76,6 +76,7 @@ int main(void) OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift); OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir); OFFSET(__VDSO_TS_END, vdso_data, ts_end); + OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res); OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); @@ -86,7 +87,6 @@ int main(void) DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); - DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); BLANK(); /* idle data offsets */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index f9d070d016e3..b1113b519432 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -301,6 +301,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->tk_mult = tk->tkr_mono.mult; vdso_data->tk_shift = tk->tkr_mono.shift; + vdso_data->hrtimer_res = hrtimer_resolution; smp_wmb(); ++vdso_data->tb_update_count; } diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 081435398e0a..0c79caa32b59 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -17,12 +17,14 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: CFI_STARTPROC - larl %r1,4f + larl %r1,3f + lg %r0,0(%r1) cghi %r2,__CLOCK_REALTIME_COARSE je 0f cghi %r2,__CLOCK_MONOTONIC_COARSE je 0f - larl %r1,3f + larl %r1,_vdso_data + llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1) cghi %r2,__CLOCK_REALTIME je 0f cghi %r2,__CLOCK_MONOTONIC @@ -36,7 +38,6 @@ __kernel_clock_getres: jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ - lg %r0,0(%r1) xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ stg %r0,8(%r3) /* store tp->tv_usec */ 1: lghi %r2,0 @@ -45,6 +46,5 @@ __kernel_clock_getres: svc 0 br %r14 CFI_ENDPROC -3: .quad __CLOCK_REALTIME_RES -4: .quad __CLOCK_COARSE_RES +3: .quad __CLOCK_COARSE_RES .size __kernel_clock_getres,.-__kernel_clock_getres -- cgit v1.2.3 From 64438e1bc0cdbe6d30bcdcb976f935eb3c297adc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Jun 2020 10:36:05 +0200 Subject: s390/numa: let NODES_SHIFT depend on NEED_MULTIPLE_NODES Qian Cai reported: """ When NUMA=n and nr_node_ids=2, in apply_wqattrs_prepare(), it has, for_each_node(node) { if (wq_calc_node_cpumask(... where it will trigger a booting warning, WARNING: workqueue cpumask: online intersect > possible intersect because it found 2 nodes and wq_numa_possible_cpumask[1] is an empty cpumask. """ Let NODES_SHIFT depend on NEED_MULTIPLE_NODES like it is done on other architectures in order to fix this. Fixes: 701dc81e7412 ("s390/mm: remove fake numa support") Reported-by: Qian Cai Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 194824932a60..c7d7ede6300c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -462,6 +462,7 @@ config NUMA config NODES_SHIFT int + depends on NEED_MULTIPLE_NODES default "1" config SCHED_SMT -- cgit v1.2.3 From d7af722344e6dc52d87649100516515263e15c75 Mon Sep 17 00:00:00 2001 From: Drew Fustini Date: Tue, 9 Jun 2020 23:45:21 +0200 Subject: ARM: dts: am335x-pocketbeagle: Fix mmc0 Write Protect AM3358 pin mcasp0_aclkr (ZCZ ball B13) [0] is routed to P1.31 header [1] Mode 4 of this pin is mmc0_sdwp (SD Write Protect). A signal connected to P1.31 may accidentally trigger mmc0 write protection. To avoid this situation, do not put mcasp0_aclkr in mode 4 (mmc0_sdwp) by default. [0] http://www.ti.com/lit/ds/symlink/am3358.pdf [1] https://github.com/beagleboard/pocketbeagle/wiki/System-Reference-Manual#531_Expansion_Headers Fixes: 047905376a16 (ARM: dts: Add am335x-pocketbeagle) Signed-off-by: Robert Nelson Signed-off-by: Drew Fustini Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-pocketbeagle.dts | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am335x-pocketbeagle.dts b/arch/arm/boot/dts/am335x-pocketbeagle.dts index 4da719098028..f0b222201b86 100644 --- a/arch/arm/boot/dts/am335x-pocketbeagle.dts +++ b/arch/arm/boot/dts/am335x-pocketbeagle.dts @@ -88,7 +88,6 @@ AM33XX_PADCONF(AM335X_PIN_MMC0_DAT3, PIN_INPUT_PULLUP, MUX_MODE0) AM33XX_PADCONF(AM335X_PIN_MMC0_CMD, PIN_INPUT_PULLUP, MUX_MODE0) AM33XX_PADCONF(AM335X_PIN_MMC0_CLK, PIN_INPUT_PULLUP, MUX_MODE0) - AM33XX_PADCONF(AM335X_PIN_MCASP0_ACLKR, PIN_INPUT, MUX_MODE4) /* (B12) mcasp0_aclkr.mmc0_sdwp */ >; }; -- cgit v1.2.3 From 9cf28e41f9f768791f54ee18333239fda6927ed8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 12 Jun 2020 10:19:50 -0700 Subject: ARM: dts: Fix duovero smsc interrupt for suspend While testing the recent suspend and resume regressions I noticed that duovero can still end up losing edge gpio interrupts on runtime suspend. This causes NFSroot easily stopping working after resume on duovero. Let's fix the issue by using gpio level interrupts for smsc as then the gpio interrupt state is seen by the gpio controller on resume. Fixes: 731b409878a3 ("ARM: dts: Configure duovero for to allow core retention during idle") Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap4-duovero-parlor.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap4-duovero-parlor.dts b/arch/arm/boot/dts/omap4-duovero-parlor.dts index 8047e8cdb3af..4548d87534e3 100644 --- a/arch/arm/boot/dts/omap4-duovero-parlor.dts +++ b/arch/arm/boot/dts/omap4-duovero-parlor.dts @@ -139,7 +139,7 @@ ethernet@gpmc { reg = <5 0 0xff>; interrupt-parent = <&gpio2>; - interrupts = <12 IRQ_TYPE_EDGE_FALLING>; /* gpio_44 */ + interrupts = <12 IRQ_TYPE_LEVEL_LOW>; /* gpio_44 */ phy-mode = "mii"; -- cgit v1.2.3 From c030688d4427658bc4e947111341f75d2cb3d526 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 12 Jun 2020 10:23:40 -0700 Subject: ARM: dts: Fix omap4 system timer source clocks I accidentally flipped the system timer to use system clock instead of the 32k source clock. Fixes: 14b1925a7219 ("ARM: dts: Configure system timers for omap4") Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap4.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 6c2b07f0704d..4400f5f8e099 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -662,6 +662,6 @@ ti,no-idle; timer@0 { assigned-clocks = <&l4_wkup_clkctrl OMAP4_TIMER1_CLKCTRL 24>; - assigned-clock-parents = <&sys_clkin_ck>; + assigned-clock-parents = <&sys_32k_ck>; }; }; -- cgit v1.2.3 From 80bf72598663496d08b3c0231377db6a99d7fd68 Mon Sep 17 00:00:00 2001 From: Drew Fustini Date: Mon, 15 Jun 2020 17:57:01 +0200 Subject: ARM: dts: am5729: beaglebone-ai: fix rgmii phy-mode Since commit cd28d1d6e52e ("net: phy: at803x: Disable phy delay for RGMII mode") the networking is broken on the BeagleBone AI which has the AR8035 PHY for Gigabit Ethernet [0]. The fix is to switch from phy-mode = "rgmii" to phy-mode = "rgmii-rxid". Note: Grygorii made a similar DT fix for other AM57xx boards with a different phy in commit 820f8a870f65 ("ARM: dts: am57xx: fix networking on boards with ksz9031 phy"). [0] https://git.io/Jf7PX Fixes: 520557d4854b ("ARM: dts: am5729: beaglebone-ai: adding device tree") Cc: Vinod Koul Reviewed-by: Grygorii Strashko Signed-off-by: Robert Nelson Signed-off-by: Drew Fustini Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am5729-beagleboneai.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am5729-beagleboneai.dts b/arch/arm/boot/dts/am5729-beagleboneai.dts index 9877d7709d41..4c51c6b05e64 100644 --- a/arch/arm/boot/dts/am5729-beagleboneai.dts +++ b/arch/arm/boot/dts/am5729-beagleboneai.dts @@ -505,7 +505,7 @@ &cpsw_emac0 { phy-handle = <&phy0>; - phy-mode = "rgmii"; + phy-mode = "rgmii-rxid"; }; &ocp { -- cgit v1.2.3 From 034aa9cd698e315c767af1bac3fd1ff8898d2cd7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 Jun 2020 16:27:43 +0100 Subject: arm64: pgtable: Clear the GP bit for non-executable kernel pages Commit cca98e9f8b5e ("mm: enforce that vmap can't map pages executable") introduced 'pgprot_nx(prot)' for arm64 but collided silently with the BTI support during the merge window, which endeavours to clear the GP bit for non-executable kernel mappings in set_memory_nx(). For consistency between the two APIs, clear the GP bit in pgprot_nx(). Acked-by: Mark Rutland Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20200615154642.3579-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6dbd267ab931..758e2d1577d0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -416,7 +416,7 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) #define pgprot_nx(prot) \ - __pgprot_modify(prot, 0, PTE_PXN) + __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) /* * Mark the prot value as uncacheable and unbufferable. -- cgit v1.2.3 From e575fb9e76c8e33440fb859572a8b7d430f053d6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Jun 2020 18:29:11 +0100 Subject: arm64: sve: Fix build failure when ARM64_SVE=y and SYSCTL=n When I squashed the 'allnoconfig' compiler warning about the set_sve_default_vl() function being defined but not used in commit 1e570f512cbd ("arm64/sve: Eliminate data races on sve_default_vl"), I accidentally broke the build for configs where ARM64_SVE is enabled, but SYSCTL is not. Fix this by only compiling the SVE sysctl support if both CONFIG_SVE=y and CONFIG_SYSCTL=y. Cc: Dave Martin Reported-by: Qian Cai Link: https://lore.kernel.org/r/20200616131808.GA1040@lca.pw Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index d9eee9194511..55c8f3ec6705 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -349,7 +349,7 @@ static unsigned int find_supported_vector_length(unsigned int vl) return sve_vl_from_vq(__bit_to_vq(bit)); } -#ifdef CONFIG_SYSCTL +#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL) static int sve_proc_do_default_vl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -394,9 +394,9 @@ static int __init sve_sysctl_init(void) return 0; } -#else /* ! CONFIG_SYSCTL */ +#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ static int __init sve_sysctl_init(void) { return 0; } -#endif /* ! CONFIG_SYSCTL */ +#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) -- cgit v1.2.3 From 00fdec98d9881bf5173af09aebd353ab3b9ac729 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 19 May 2020 22:28:32 -0700 Subject: ARC: entry: fix potential EFA clobber when TIF_SYSCALL_TRACE Trap handler for syscall tracing reads EFA (Exception Fault Address), in case strace wants PC of trap instruction (EFA is not part of pt_regs as of current code). However this EFA read is racy as it happens after dropping to pure kernel mode (re-enabling interrupts). A taken interrupt could context-switch, trigger a different task's trap, clobbering EFA for this execution context. Fix this by reading EFA early, before re-enabling interrupts. A slight side benefit is de-duplication of FAKE_RET_FROM_EXCPN in trap handler. The trap handler is common to both ARCompact and ARCv2 builds too. This just came out of code rework/review and no real problem was reported but is clearly a potential problem specially for strace. Cc: Signed-off-by: Vineet Gupta --- arch/arc/kernel/entry.S | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 60406ec62eb8..ea00c8a17f07 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -165,7 +165,6 @@ END(EV_Extension) tracesys: ; save EFA in case tracer wants the PC of traced task ; using ERET won't work since next-PC has already committed - lr r12, [efa] GET_CURR_TASK_FIELD_PTR TASK_THREAD, r11 st r12, [r11, THREAD_FAULT_ADDR] ; thread.fault_address @@ -208,15 +207,9 @@ tracesys_exit: ; Breakpoint TRAP ; --------------------------------------------- trap_with_param: - - ; stop_pc info by gdb needs this info - lr r0, [efa] + mov r0, r12 ; EFA in case ptracer/gdb wants stop_pc mov r1, sp - ; Now that we have read EFA, it is safe to do "fake" rtie - ; and get out of CPU exception mode - FAKE_RET_FROM_EXCPN - ; Save callee regs in case gdb wants to have a look ; SP will grow up by size of CALLEE Reg-File ; NOTE: clobbers r12 @@ -243,6 +236,10 @@ ENTRY(EV_Trap) EXCEPTION_PROLOGUE + lr r12, [efa] + + FAKE_RET_FROM_EXCPN + ;============ TRAP 1 :breakpoints ; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR) bmsk.f 0, r10, 7 @@ -250,9 +247,6 @@ ENTRY(EV_Trap) ;============ TRAP (no param): syscall top level - ; First return from Exception to pure K mode (Exception/IRQs renabled) - FAKE_RET_FROM_EXCPN - ; If syscall tracing ongoing, invoke pre-post-hooks GET_CURR_THR_INFO_FLAGS r10 btst r10, TIF_SYSCALL_TRACE -- cgit v1.2.3 From 33b59f1671f105a6da9c0aa75d7cf6bea126d2c5 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 13 May 2020 01:08:23 -0700 Subject: ARC: [arcompact] fix bitrot with 2 levels of interrupt Signed-off-by: Vineet Gupta --- arch/arc/include/asm/irqflags-compact.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index 7fc73fef5e29..863d63ad18d6 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -90,6 +90,9 @@ static inline void arch_local_irq_restore(unsigned long flags) /* * Unconditionally Enable IRQs */ +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS +extern void arch_local_irq_enable(void); +#else static inline void arch_local_irq_enable(void) { unsigned long temp; @@ -102,7 +105,7 @@ static inline void arch_local_irq_enable(void) : "n"((STATUS_E1_MASK | STATUS_E2_MASK)) : "cc", "memory"); } - +#endif /* * Unconditionally Disable IRQs -- cgit v1.2.3 From b7faf971081a4e56147f082234bfff55135305cb Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 27 May 2020 14:18:45 -0700 Subject: ARC: elf: use right ELF_ARCH Cc: Signed-off-by: Vineet Gupta --- arch/arc/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h index c77a0e3671ac..0284ace0e1ab 100644 --- a/arch/arc/include/asm/elf.h +++ b/arch/arc/include/asm/elf.h @@ -19,7 +19,7 @@ #define R_ARC_32_PCREL 0x31 /*to set parameters in the core dumps */ -#define ELF_ARCH EM_ARCOMPACT +#define ELF_ARCH EM_ARC_INUSE #define ELF_CLASS ELFCLASS32 #ifdef CONFIG_CPU_BIG_ENDIAN -- cgit v1.2.3 From 97d0b5d0b5a99871a983ca9b5c02bfde8bf73cbf Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 10 Jun 2020 12:26:15 -0700 Subject: ARCv2: boot log: detect newer/upconing HS3x/HS4x releases Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index dad8a656a2f1..41f07b3e594e 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -58,10 +58,12 @@ static const struct id_to_str arc_legacy_rel[] = { { 0x00, NULL } }; -static const struct id_to_str arc_cpu_rel[] = { +static const struct id_to_str arc_hs_ver54_rel[] = { /* UARCH.MAJOR, Release */ { 0, "R3.10a"}, { 1, "R3.50a"}, + { 2, "R3.60a"}, + { 3, "R4.00a"}, { 0xFF, NULL } }; @@ -117,12 +119,6 @@ static void decode_arc_core(struct cpuinfo_arc *cpu) struct bcr_uarch_build_arcv2 uarch; const struct id_to_str *tbl; - /* - * Up until (including) the first core4 release (0x54) things were - * simple: AUX IDENTITY.ARCVER was sufficient to identify arc family - * and release: 0x50 to 0x53 was HS38, 0x54 was HS48 (dual issue) - */ - if (cpu->core.family < 0x54) { /* includes arc700 */ for (tbl = &arc_legacy_rel[0]; tbl->id != 0; tbl++) { @@ -143,11 +139,10 @@ static void decode_arc_core(struct cpuinfo_arc *cpu) } /* - * However the subsequent HS release (same 0x54) allow HS38 or HS48 - * configurations and encode this info in a different BCR. - * The BCR was introduced in 0x54 so can't be read unconditionally. + * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until + * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent + * releases only update it. */ - READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch); if (uarch.prod == 4) { @@ -158,7 +153,7 @@ static void decode_arc_core(struct cpuinfo_arc *cpu) cpu->name = "HS38"; } - for (tbl = &arc_cpu_rel[0]; tbl->id != 0xFF; tbl++) { + for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) { if (uarch.maj == tbl->id) { cpu->release = tbl->str; break; -- cgit v1.2.3 From 0bdd6e7428a2e8971d7c9b8e212056dd0e0001c9 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 4 Jun 2020 20:39:24 +0300 Subject: ARC: build: allow users to specify -mcpu kernel build system used to add -mcpu for each ARC ISA as default. These days there are versions and varaints of ARC HS cores some of which have specific -mcpu options to fine tune / optimize generated code. So allow users/external build systems to specify their own -mcpu This will be used in future patches for HSDK-4xD board support which uses specific -mcpu to utilize dual issue scheduling of the core. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta [abrodkin/vgupta: rewrote changelog] --- arch/arc/Kconfig | 9 +++++++++ arch/arc/Makefile | 21 +++++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index fddc70029727..323014149e48 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -170,6 +170,15 @@ config ARC_CPU_HS endchoice +config ARC_TUNE_MCPU + string "Override default -mcpu compiler flag" + default "" + help + Override default -mcpu=xxx compiler flag (which is set depending on + the ISA version) with the specified value. + NOTE: If specified flag isn't supported by current compiler the + ISA default value will be used as a fallback. + config CPU_BIG_ENDIAN bool "Enable Big Endian Mode" help diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 20e9ab6cc521..2b66e8264174 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -10,8 +10,25 @@ CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-) endif cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ -cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 -cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38 + +tune-mcpu-def-$(CONFIG_ISA_ARCOMPACT) := -mA7 +tune-mcpu-def-$(CONFIG_ISA_ARCV2) := -mcpu=hs38 + +ifeq ($(CONFIG_ARC_TUNE_MCPU),"") +cflags-y += $(tune-mcpu-def-y) +else +tune-mcpu := $(shell echo $(CONFIG_ARC_TUNE_MCPU)) +tune-mcpu-ok := $(call cc-option-yn, $(tune-mcpu)) +ifeq ($(tune-mcpu-ok),y) +cflags-y += $(tune-mcpu) +else +# The flag provided by 'CONFIG_ARC_TUNE_MCPU' option isn't known by this compiler +# (probably the compiler is too old). Use ISA default mcpu flag instead as a safe option. +$(warning ** WARNING ** CONFIG_ARC_TUNE_MCPU flag '$(tune-mcpu)' is unknown, fallback to '$(tune-mcpu-def-y)') +cflags-y += $(tune-mcpu-def-y) +endif +endif + ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file -- cgit v1.2.3 From 040ece2a3c1503c0a7e327034510367747c27a5f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 16 Jun 2020 15:14:50 -0700 Subject: ARC: build: remove deprecated toggle for arc700 builds Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 2b66e8264174..d00f8b8afd08 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -11,7 +11,7 @@ endif cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ -tune-mcpu-def-$(CONFIG_ISA_ARCOMPACT) := -mA7 +tune-mcpu-def-$(CONFIG_ISA_ARCOMPACT) := -mcpu=arc700 tune-mcpu-def-$(CONFIG_ISA_ARCV2) := -mcpu=hs38 ifeq ($(CONFIG_ARC_TUNE_MCPU),"") -- cgit v1.2.3 From ff58155ca4fa7e931f34d948fa09fe14c6a66116 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 16 Jun 2020 18:25:47 -0400 Subject: x86/purgatory: Add -fno-stack-protector The purgatory Makefile removes -fstack-protector options if they were configured in, but does not currently add -fno-stack-protector. If gcc was configured with the --enable-default-ssp configure option, this results in the stack protector still being enabled for the purgatory (absent distro-specific specs files that might disable it again for freestanding compilations), if the main kernel is being compiled with stack protection enabled (if it's disabled for the main kernel, the top-level Makefile will add -fno-stack-protector). This will break the build since commit e4160b2e4b02 ("x86/purgatory: Fail the build if purgatory.ro has missing symbols") and prior to that would have caused runtime failure when trying to use kexec. Explicitly add -fno-stack-protector to avoid this, as done in other Makefiles that need to disable the stack protector. Reported-by: Gabriel C Signed-off-by: Arvind Sankar Signed-off-by: Linus Torvalds --- arch/x86/purgatory/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index b04e6e72a592..088bd764e0b7 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -34,6 +34,7 @@ KCOV_INSTRUMENT := n PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING +PURGATORY_CFLAGS += $(call cc-option,-fno-stack-protector) # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That # in turn leaves some undefined symbols like __fentry__ in purgatory and not -- cgit v1.2.3 From 9b38cc704e844e41d9cf74e647bff1d249512cb3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 12 May 2020 17:03:18 +0900 Subject: kretprobe: Prevent triggering kretprobe from within kprobe_flush_task Ziqian reported lockup when adding retprobe on _raw_spin_lock_irqsave. My test was also able to trigger lockdep output: ============================================ WARNING: possible recursive locking detected 5.6.0-rc6+ #6 Not tainted -------------------------------------------- sched-messaging/2767 is trying to acquire lock: ffffffff9a492798 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_hash_lock+0x52/0xa0 but task is already holding lock: ffffffff9a491a18 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_trampoline+0x0/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(kretprobe_table_locks[i].lock)); lock(&(kretprobe_table_locks[i].lock)); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by sched-messaging/2767: #0: ffffffff9a491a18 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_trampoline+0x0/0x50 stack backtrace: CPU: 3 PID: 2767 Comm: sched-messaging Not tainted 5.6.0-rc6+ #6 Call Trace: dump_stack+0x96/0xe0 __lock_acquire.cold.57+0x173/0x2b7 ? native_queued_spin_lock_slowpath+0x42b/0x9e0 ? lockdep_hardirqs_on+0x590/0x590 ? __lock_acquire+0xf63/0x4030 lock_acquire+0x15a/0x3d0 ? kretprobe_hash_lock+0x52/0xa0 _raw_spin_lock_irqsave+0x36/0x70 ? kretprobe_hash_lock+0x52/0xa0 kretprobe_hash_lock+0x52/0xa0 trampoline_handler+0xf8/0x940 ? kprobe_fault_handler+0x380/0x380 ? find_held_lock+0x3a/0x1c0 kretprobe_trampoline+0x25/0x50 ? lock_acquired+0x392/0xbc0 ? _raw_spin_lock_irqsave+0x50/0x70 ? __get_valid_kprobe+0x1f0/0x1f0 ? _raw_spin_unlock_irqrestore+0x3b/0x40 ? finish_task_switch+0x4b9/0x6d0 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 The code within the kretprobe handler checks for probe reentrancy, so we won't trigger any _raw_spin_lock_irqsave probe in there. The problem is in outside kprobe_flush_task, where we call: kprobe_flush_task kretprobe_table_lock raw_spin_lock_irqsave _raw_spin_lock_irqsave where _raw_spin_lock_irqsave triggers the kretprobe and installs kretprobe_trampoline handler on _raw_spin_lock_irqsave return. The kretprobe_trampoline handler is then executed with already locked kretprobe_table_locks, and first thing it does is to lock kretprobe_table_locks ;-) the whole lockup path like: kprobe_flush_task kretprobe_table_lock raw_spin_lock_irqsave _raw_spin_lock_irqsave ---> probe triggered, kretprobe_trampoline installed ---> kretprobe_table_locks locked kretprobe_trampoline trampoline_handler kretprobe_hash_lock(current, &head, &flags); <--- deadlock Adding kprobe_busy_begin/end helpers that mark code with fake probe installed to prevent triggering of another kprobe within this code. Using these helpers in kprobe_flush_task, so the probe recursion protection check is hit and the probe is never set to prevent above lockup. Link: http://lkml.kernel.org/r/158927059835.27680.7011202830041561604.stgit@devnote2 Fixes: ef53d9c5e4da ("kprobes: improve kretprobe scalability with hashed locking") Cc: Ingo Molnar Cc: "Gustavo A . R . Silva" Cc: Anders Roxell Cc: "Naveen N . Rao" Cc: Anil S Keshavamurthy Cc: David Miller Cc: Ingo Molnar Cc: Peter Zijlstra Cc: stable@vger.kernel.org Reported-by: "Ziqian SUN (Zamir)" Acked-by: Masami Hiramatsu Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/kprobes/core.c | 16 +++------------- include/linux/kprobes.h | 4 ++++ kernel/kprobes.c | 24 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3bafe1bd4dc7..8a5ec10e95dc 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -753,16 +753,11 @@ asm( NOKPROBE_SYMBOL(kretprobe_trampoline); STACK_FRAME_NON_STANDARD(kretprobe_trampoline); -static struct kprobe kretprobe_kprobe = { - .addr = (void *)kretprobe_trampoline, -}; - /* * Called from kretprobe_trampoline */ __used __visible void *trampoline_handler(struct pt_regs *regs) { - struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; @@ -772,16 +767,12 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) void *frame_pointer; bool skipped = false; - preempt_disable(); - /* * Set a dummy kprobe for avoiding kretprobe recursion. * Since kretprobe never run in kprobe handler, kprobe must not * be running at this point. */ - kcb = get_kprobe_ctlblk(); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; + kprobe_busy_begin(); INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -857,7 +848,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) __this_cpu_write(current_kprobe, &ri->rp->kp); ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); + __this_cpu_write(current_kprobe, &kprobe_busy); } recycle_rp_inst(ri, &empty_rp); @@ -873,8 +864,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); - __this_cpu_write(current_kprobe, NULL); - preempt_enable(); + kprobe_busy_end(); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 594265bfd390..05ed663e6c7b 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -350,6 +350,10 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) return this_cpu_ptr(&kprobe_ctlblk); } +extern struct kprobe kprobe_busy; +void kprobe_busy_begin(void); +void kprobe_busy_end(void); + kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5cb7791c16b3..4a904cc56d68 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1241,6 +1241,26 @@ __releases(hlist_lock) } NOKPROBE_SYMBOL(kretprobe_table_unlock); +struct kprobe kprobe_busy = { + .addr = (void *) get_kprobe, +}; + +void kprobe_busy_begin(void) +{ + struct kprobe_ctlblk *kcb; + + preempt_disable(); + __this_cpu_write(current_kprobe, &kprobe_busy); + kcb = get_kprobe_ctlblk(); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; +} + +void kprobe_busy_end(void) +{ + __this_cpu_write(current_kprobe, NULL); + preempt_enable(); +} + /* * This function is called from finish_task_switch when task tk becomes dead, * so that we can recycle any function-return probe instances associated @@ -1258,6 +1278,8 @@ void kprobe_flush_task(struct task_struct *tk) /* Early boot. kretprobe_table_locks not yet initialized. */ return; + kprobe_busy_begin(); + INIT_HLIST_HEAD(&empty_rp); hash = hash_ptr(tk, KPROBE_HASH_BITS); head = &kretprobe_inst_table[hash]; @@ -1271,6 +1293,8 @@ void kprobe_flush_task(struct task_struct *tk) hlist_del(&ri->hlist); kfree(ri); } + + kprobe_busy_end(); } NOKPROBE_SYMBOL(kprobe_flush_task); -- cgit v1.2.3 From 4d0831e8a029c03f49f434f28b8faef9f0bd403f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 14 Jun 2020 23:43:41 +0900 Subject: kconfig: unify cc-option and as-option cc-option and as-option are almost the same; both pass the flag to $(CC). The main difference is the cc-option stops before the assemble stage (-S option) whereas as-option stops after (-c option). I chose -S because it is slightly faster, but $(cc-option,-gz=zlib) returns a wrong result (https://lkml.org/lkml/2020/6/9/1529). It has been fixed by commit 7b16994437c7 ("Makefile: Improve compressed debug info support detection"), but the assembler should always be invoked for more reliable compiler option tests. However, you cannot simply replace -S with -c because the following code in lib/Kconfig.debug would break: depends on $(cc-option,-gsplit-dwarf) The combination of -c and -gsplit-dwarf does not accept /dev/null as output. $ cat /dev/null | gcc -gsplit-dwarf -S -x c - -o /dev/null $ echo $? 0 $ cat /dev/null | gcc -gsplit-dwarf -c -x c - -o /dev/null objcopy: Warning: '/dev/null' is not an ordinary file $ echo $? 1 $ cat /dev/null | gcc -gsplit-dwarf -c -x c - -o tmp.o $ echo $? 0 There is another flag that creates an separate file based on the object file path: $ cat /dev/null | gcc -ftest-coverage -c -x c - -o /dev/null :1: error: cannot open /dev/null.gcno So, we cannot use /dev/null to sink the output. Align the cc-option implementation with scripts/Kbuild.include. With -c option used in cc-option, as-option is unneeded. Signed-off-by: Masahiro Yamada Acked-by: Will Deacon --- arch/arm64/Kconfig | 2 +- lib/Kconfig.debug | 1 - scripts/Kconfig.include | 8 +------- 3 files changed, 2 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 31380da53689..6eb18f45258e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1564,7 +1564,7 @@ config CC_HAS_SIGN_RETURN_ADDRESS def_bool $(cc-option,-msign-return-address=all) config AS_HAS_PAC - def_bool $(as-option,-Wa$(comma)-march=armv8.3-a) + def_bool $(cc-option,-Wa$(comma)-march=armv8.3-a) config AS_HAS_CFI_NEGATE_RA_STATE def_bool $(as-instr,.cfi_startproc\n.cfi_negate_ra_state\n.cfi_endproc\n) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 96999d4d2dda..9ad9210d70a1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -229,7 +229,6 @@ config DEBUG_INFO_COMPRESSED bool "Compressed debugging information" depends on DEBUG_INFO depends on $(cc-option,-gz=zlib) - depends on $(as-option,-gz=zlib) depends on $(ld-option,--compress-debug-sections=zlib) help Compress the debug information using zlib. Requires GCC 5.0+ or Clang diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include index c264da2b9b30..a5fe72c504ff 100644 --- a/scripts/Kconfig.include +++ b/scripts/Kconfig.include @@ -25,18 +25,12 @@ failure = $(if-success,$(1),n,y) # $(cc-option,) # Return y if the compiler supports , n otherwise -cc-option = $(success,$(CC) -Werror $(CLANG_FLAGS) $(1) -S -x c /dev/null -o /dev/null) +cc-option = $(success,mkdir .tmp_$$$$; trap "rm -rf .tmp_$$$$" EXIT; $(CC) -Werror $(CLANG_FLAGS) $(1) -c -x c /dev/null -o .tmp_$$$$/tmp.o) # $(ld-option,) # Return y if the linker supports , n otherwise ld-option = $(success,$(LD) -v $(1)) -# $(as-option,) -# /dev/zero is used as output instead of /dev/null as some assembler cribs when -# both input and output are same. Also both of them have same write behaviour so -# can be easily substituted. -as-option = $(success, $(CC) $(CLANG_FLAGS) $(1) -c -x assembler /dev/null -o /dev/zero) - # $(as-instr,) # Return y if the assembler supports , n otherwise as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -c -x assembler -o /dev/null -) -- cgit v1.2.3 From cc5277fe66cf3ad68f41f1c539b2ef0d5e432974 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Jun 2020 22:36:11 +0300 Subject: x86/resctrl: Fix a NULL vs IS_ERR() static checker warning in rdt_cdp_peer_get() The callers don't expect *d_cdp to be set to an error pointer, they only check for NULL. This leads to a static checker warning: arch/x86/kernel/cpu/resctrl/rdtgroup.c:2648 __init_one_rdt_domain() warn: 'd_cdp' could be an error pointer This would not trigger a bug in this specific case because __init_one_rdt_domain() calls it with a valid domain that would not have a negative id and thus not trigger the return of the ERR_PTR(). If this was a negative domain id then the call to rdt_find_domain() in domain_add_cpu() would have returned the ERR_PTR() much earlier and the creation of the domain with an invalid id would have been prevented. Even though a bug is not triggered currently the right and safe thing to do is to set the pointer to NULL because that is what can be checked for when the caller is handling the CDP and non-CDP cases. Fixes: 52eb74339a62 ("x86/resctrl: Fix rdt_find_domain() return value and checks") Signed-off-by: Dan Carpenter Signed-off-by: Borislav Petkov Acked-by: Reinette Chatre Acked-by: Fenghua Yu Link: https://lkml.kernel.org/r/20200602193611.GA190851@mwanda --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 23b4b61319d3..3f844f14fc0a 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1117,6 +1117,7 @@ static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d, _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL); if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) { _r_cdp = NULL; + _d_cdp = NULL; ret = -EINVAL; } -- cgit v1.2.3 From b9249cba25a5dce5de87e5404503a5e11832c2dd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Jun 2020 19:03:49 +0100 Subject: arm64: bti: Require clang >= 10.0.1 for in-kernel BTI support Unfortunately, most versions of clang that support BTI are capable of miscompiling the kernel when converting a switch statement into a jump table. As an example, attempting to spawn a KVM guest results in a panic: [ 56.253312] Kernel panic - not syncing: bad mode [ 56.253834] CPU: 0 PID: 279 Comm: lkvm Not tainted 5.8.0-rc1 #2 [ 56.254225] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 [ 56.254712] Call trace: [ 56.254952] dump_backtrace+0x0/0x1d4 [ 56.255305] show_stack+0x1c/0x28 [ 56.255647] dump_stack+0xc4/0x128 [ 56.255905] panic+0x16c/0x35c [ 56.256146] bad_el0_sync+0x0/0x58 [ 56.256403] el1_sync_handler+0xb4/0xe0 [ 56.256674] el1_sync+0x7c/0x100 [ 56.256928] kvm_vm_ioctl_check_extension_generic+0x74/0x98 [ 56.257286] __arm64_sys_ioctl+0x94/0xcc [ 56.257569] el0_svc_common+0x9c/0x150 [ 56.257836] do_el0_svc+0x84/0x90 [ 56.258083] el0_sync_handler+0xf8/0x298 [ 56.258361] el0_sync+0x158/0x180 This is because the switch in kvm_vm_ioctl_check_extension_generic() is executed as an indirect branch to tail-call through a jump table: ffff800010032dc8: 3869694c ldrb w12, [x10, x9] ffff800010032dcc: 8b0c096b add x11, x11, x12, lsl #2 ffff800010032dd0: d61f0160 br x11 However, where the target case uses the stack, the landing pad is elided due to the presence of a paciasp instruction: ffff800010032e14: d503233f paciasp ffff800010032e18: a9bf7bfd stp x29, x30, [sp, #-16]! ffff800010032e1c: 910003fd mov x29, sp ffff800010032e20: aa0803e0 mov x0, x8 ffff800010032e24: 940017c0 bl ffff800010038d24 ffff800010032e28: 93407c00 sxtw x0, w0 ffff800010032e2c: a8c17bfd ldp x29, x30, [sp], #16 ffff800010032e30: d50323bf autiasp ffff800010032e34: d65f03c0 ret Unfortunately, this results in a fatal exception because paciasp is compatible only with branch-and-link (call) instructions and not simple indirect branches. A fix is being merged into Clang 10.0.1 so that a 'bti j' instruction is emitted as an explicit landing pad in this situation. Make in-kernel BTI depend on that compiler version when building with clang. Cc: Tom Stellard Cc: Daniel Kiss Reviewed-by: Mark Brown Acked-by: Dave Martin Reviewed-by: Nathan Chancellor Acked-by: Nick Desaulniers Link: https://lore.kernel.org/r/20200615105524.GA2694@willie-the-truck Link: https://lore.kernel.org/r/20200616183630.2445-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 31380da53689..4ae2419c14a8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1630,6 +1630,8 @@ config ARM64_BTI_KERNEL depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697 depends on !CC_IS_GCC || GCC_VERSION >= 100100 + # https://reviews.llvm.org/rGb8ae3fdfa579dbf366b1bb1cbfdbf8c51db7fa55 + depends on !CC_IS_CLANG || CLANG_VERSION >= 100001 depends on !(CC_IS_CLANG && GCOV_KERNEL) depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) help -- cgit v1.2.3 From 687993ccf3b05070598b89fad97410b26d7bc9d2 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 15 Jun 2020 12:22:29 +0300 Subject: powerpc/8xx: use pmd_off() to access a PMD entry in pte_update() The pte_update() implementation for PPC_8xx unfolds page table from the PGD level to access a PMD entry. Since 8xx has only 2-level page table this can be simplified with pmd_off() shortcut. Replace explicit unfolding with pmd_off() and drop defines of pgd_index() and pgd_offset() that are no longer needed. Signed-off-by: Mike Rapoport Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200615092229.23142-1-rppt@kernel.org --- arch/powerpc/include/asm/nohash/32/pgtable.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b56f14160ae5..5a590ceaec14 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -205,10 +205,6 @@ static inline void pmd_clear(pmd_t *pmdp) *pmdp = __pmd(0); } -/* to find an entry in a page-table-directory */ -#define pgd_index(address) ((address) >> PGDIR_SHIFT) -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) - /* * PTE updates. This function is called whenever an existing * valid PTE is updated. This does -not- include set_pte_at() @@ -230,6 +226,8 @@ static inline void pmd_clear(pmd_t *pmdp) * For other page sizes, we have a single entry in the table. */ #ifdef CONFIG_PPC_8xx +static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr); + static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, unsigned long clr, unsigned long set, int huge) { @@ -237,7 +235,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p pte_basic_t old = pte_val(*p); pte_basic_t new = (old & ~(pte_basic_t)clr) | set; int num, i; - pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, addr), addr), addr), addr); + pmd_t *pmd = pmd_off(mm, addr); if (!huge) num = PAGE_SIZE / SZ_4K; -- cgit v1.2.3 From 1497eea68624f6076bf3eaf66baec3771ea04045 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jun 2020 23:56:16 +1000 Subject: powerpc/syscalls: Use the number when building SPU syscall table Currently the macro that inserts entries into the SPU syscall table doesn't actually use the "nr" (syscall number) parameter. This does work, but it relies on the exact right number of syscall entries being emitted in order for the syscal numbers to line up with the array entries. If for example we had two entries with the same syscall number we wouldn't get an error, it would just cause all subsequent syscalls to be off by one in the spu_syscall_table. So instead change the macro to assign to the specific entry of the array, meaning any numbering overlap will be caught by the compiler. Signed-off-by: Michael Ellerman Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20200616135617.2937252-1-mpe@ellerman.id.au --- arch/powerpc/platforms/cell/spu_callbacks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index cbee3666da07..abdef9bcf432 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -35,7 +35,7 @@ */ static void *spu_syscall_table[] = { -#define __SYSCALL(nr, entry) entry, +#define __SYSCALL(nr, entry) [nr] = entry, #include #undef __SYSCALL }; -- cgit v1.2.3 From 41d90b0c1108d1e46c48cf79964636c553844f4c Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Wed, 17 Jun 2020 09:19:57 -0400 Subject: efi/x86: Setup stack correctly for efi_pe_entry Commit 17054f492dfd ("efi/x86: Implement mixed mode boot without the handover protocol") introduced a new entry point for the EFI stub to be booted in mixed mode on 32-bit firmware. When entered via efi32_pe_entry, control is first transferred to startup_32 to setup for the switch to long mode, and then the EFI stub proper is entered via efi_pe_entry. efi_pe_entry is an MS ABI function, and the ABI requires 32 bytes of shadow stack space to be allocated by the caller, as well as the stack being aligned to 8 mod 16 on entry. Allocate 40 bytes on the stack before switching to 64-bit mode when calling efi_pe_entry to account for this. For robustness, explicitly align boot_stack_end to 16 bytes. It is currently implicitly aligned since .bss is cacheline-size aligned, head_64.o is the first object file with a .bss section, and the heap and boot sizes are aligned. Fixes: 17054f492dfd ("efi/x86: Implement mixed mode boot without the handover protocol") Signed-off-by: Arvind Sankar Link: https://lore.kernel.org/r/20200617131957.2507632-1-nivedita@alum.mit.edu Signed-off-by: Ard Biesheuvel --- arch/x86/boot/compressed/head_64.S | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index e821a7d7d5c4..97d37f0a34f5 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -213,7 +213,6 @@ SYM_FUNC_START(startup_32) * We place all of the values on our mini stack so lret can * used to perform that far jump. */ - pushl $__KERNEL_CS leal startup_64(%ebp), %eax #ifdef CONFIG_EFI_MIXED movl efi32_boot_args(%ebp), %edi @@ -224,11 +223,20 @@ SYM_FUNC_START(startup_32) movl efi32_boot_args+8(%ebp), %edx // saved bootparams pointer cmpl $0, %edx jnz 1f + /* + * efi_pe_entry uses MS calling convention, which requires 32 bytes of + * shadow space on the stack even if all arguments are passed in + * registers. We also need an additional 8 bytes for the space that + * would be occupied by the return address, and this also results in + * the correct stack alignment for entry. + */ + subl $40, %esp leal efi_pe_entry(%ebp), %eax movl %edi, %ecx // MS calling convention movl %esi, %edx 1: #endif + pushl $__KERNEL_CS pushl %eax /* Enter paged protected Mode, activating Long Mode */ @@ -784,6 +792,7 @@ SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0) SYM_DATA_START_LOCAL(boot_stack) .fill BOOT_STACK_SIZE, 1, 0 + .balign 16 SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end) /* -- cgit v1.2.3 From 2a55280a3675203496d302463b941834228b9875 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 7 Jun 2020 15:41:35 +0200 Subject: efi/libstub: arm: Print CPU boot mode and MMU state at boot On 32-bit ARM, we may boot at HYP mode, or with the MMU and caches off (or both), even though the EFI spec does not actually support this. While booting at HYP mode is something we might tolerate, fiddling with the caches is a more serious issue, as disabling the caches is tricky to do safely from C code, and running without the Dcache makes it impossible to support unaligned memory accesses, which is another explicit requirement imposed by the EFI spec. So take note of the CPU mode and MMU state in the EFI stub diagnostic output so that we can easily diagnose any issues that may arise from this. E.g., EFI stub: Entering in SVC mode with MMU enabled Also, capture the CPSR and SCTLR system register values at EFI stub entry, and after ExitBootServices() returns, and check whether the MMU and Dcache were disabled at any point. If this is the case, a diagnostic message like the following will be emitted: efi: [Firmware Bug]: EFI stub was entered with MMU and Dcache disabled, please fix your firmware! efi: CPSR at EFI stub entry : 0x600001d3 efi: SCTLR at EFI stub entry : 0x00c51838 efi: CPSR after ExitBootServices() : 0x600001d3 efi: SCTLR after ExitBootServices(): 0x00c50838 Signed-off-by: Ard Biesheuvel Reviewed-by: Leif Lindholm --- arch/arm/include/asm/efi.h | 7 ++++ drivers/firmware/efi/arm-init.c | 34 +++++++++++++++++-- drivers/firmware/efi/libstub/arm32-stub.c | 54 ++++++++++++++++++++++++++++++- drivers/firmware/efi/libstub/efi-stub.c | 3 ++ drivers/firmware/efi/libstub/efistub.h | 2 ++ include/linux/efi.h | 1 + 6 files changed, 98 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 84dc0ba822f5..5dcf3c6011b7 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -87,4 +87,11 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, return dram_base + SZ_512M; } +struct efi_arm_entry_state { + u32 cpsr_before_ebs; + u32 sctlr_before_ebs; + u32 cpsr_after_ebs; + u32 sctlr_after_ebs; +}; + #endif /* _ASM_ARM_EFI_H */ diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 6f4baf70db16..71c445d20258 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -52,9 +52,11 @@ static phys_addr_t __init efi_to_phys(unsigned long addr) } static __initdata unsigned long screen_info_table = EFI_INVALID_TABLE_ADDR; +static __initdata unsigned long cpu_state_table = EFI_INVALID_TABLE_ADDR; static const efi_config_table_type_t arch_tables[] __initconst = { {LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, &screen_info_table}, + {LINUX_EFI_ARM_CPU_STATE_TABLE_GUID, &cpu_state_table}, {} }; @@ -240,9 +242,37 @@ void __init efi_init(void) init_screen_info(); +#ifdef CONFIG_ARM /* ARM does not permit early mappings to persist across paging_init() */ - if (IS_ENABLED(CONFIG_ARM)) - efi_memmap_unmap(); + efi_memmap_unmap(); + + if (cpu_state_table != EFI_INVALID_TABLE_ADDR) { + struct efi_arm_entry_state *state; + bool dump_state = true; + + state = early_memremap_ro(cpu_state_table, + sizeof(struct efi_arm_entry_state)); + if (state == NULL) { + pr_warn("Unable to map CPU entry state table.\n"); + return; + } + + if ((state->sctlr_before_ebs & 1) == 0) + pr_warn(FW_BUG "EFI stub was entered with MMU and Dcache disabled, please fix your firmware!\n"); + else if ((state->sctlr_after_ebs & 1) == 0) + pr_warn(FW_BUG "ExitBootServices() returned with MMU and Dcache disabled, please fix your firmware!\n"); + else + dump_state = false; + + if (dump_state || efi_enabled(EFI_DBG)) { + pr_info("CPSR at EFI stub entry : 0x%08x\n", state->cpsr_before_ebs); + pr_info("SCTLR at EFI stub entry : 0x%08x\n", state->sctlr_before_ebs); + pr_info("CPSR after ExitBootServices() : 0x%08x\n", state->cpsr_after_ebs); + pr_info("SCTLR after ExitBootServices(): 0x%08x\n", state->sctlr_after_ebs); + } + early_memunmap(state, sizeof(struct efi_arm_entry_state)); + } +#endif } static bool efifb_overlaps_pci_range(const struct of_pci_range *range) diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index 40243f524556..d08e5d55838c 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -7,10 +7,49 @@ #include "efistub.h" +static efi_guid_t cpu_state_guid = LINUX_EFI_ARM_CPU_STATE_TABLE_GUID; + +struct efi_arm_entry_state *efi_entry_state; + +static void get_cpu_state(u32 *cpsr, u32 *sctlr) +{ + asm("mrs %0, cpsr" : "=r"(*cpsr)); + if ((*cpsr & MODE_MASK) == HYP_MODE) + asm("mrc p15, 4, %0, c1, c0, 0" : "=r"(*sctlr)); + else + asm("mrc p15, 0, %0, c1, c0, 0" : "=r"(*sctlr)); +} + efi_status_t check_platform_features(void) { + efi_status_t status; + u32 cpsr, sctlr; int block; + get_cpu_state(&cpsr, &sctlr); + + efi_info("Entering in %s mode with MMU %sabled\n", + ((cpsr & MODE_MASK) == HYP_MODE) ? "HYP" : "SVC", + (sctlr & 1) ? "en" : "dis"); + + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + sizeof(*efi_entry_state), + (void **)&efi_entry_state); + if (status != EFI_SUCCESS) { + efi_err("allocate_pool() failed\n"); + return status; + } + + efi_entry_state->cpsr_before_ebs = cpsr; + efi_entry_state->sctlr_before_ebs = sctlr; + + status = efi_bs_call(install_configuration_table, &cpu_state_guid, + efi_entry_state); + if (status != EFI_SUCCESS) { + efi_err("install_configuration_table() failed\n"); + goto free_state; + } + /* non-LPAE kernels can run anywhere */ if (!IS_ENABLED(CONFIG_ARM_LPAE)) return EFI_SUCCESS; @@ -19,9 +58,22 @@ efi_status_t check_platform_features(void) block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0); if (block < 5) { efi_err("This LPAE kernel is not supported by your CPU\n"); - return EFI_UNSUPPORTED; + status = EFI_UNSUPPORTED; + goto drop_table; } return EFI_SUCCESS; + +drop_table: + efi_bs_call(install_configuration_table, &cpu_state_guid, NULL); +free_state: + efi_bs_call(free_pool, efi_entry_state); + return status; +} + +void efi_handle_post_ebs_state(void) +{ + get_cpu_state(&efi_entry_state->cpsr_after_ebs, + &efi_entry_state->sctlr_after_ebs); } static efi_guid_t screen_info_guid = LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID; diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index e97370bdfdb0..3318ec3f8e5b 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -329,6 +329,9 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, if (status != EFI_SUCCESS) goto fail_free_initrd; + if (IS_ENABLED(CONFIG_ARM)) + efi_handle_post_ebs_state(); + efi_enter_kernel(image_addr, fdt_addr, fdt_totalsize((void *)fdt_addr)); /* not reached */ diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index ac756f1fdb1a..2c9d42264c29 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -777,4 +777,6 @@ efi_status_t efi_load_initrd(efi_loaded_image_t *image, unsigned long soft_limit, unsigned long hard_limit); +void efi_handle_post_ebs_state(void); + #endif diff --git a/include/linux/efi.h b/include/linux/efi.h index c3449c9699d0..bb35f3305e55 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -350,6 +350,7 @@ void efi_native_runtime_setup(void); * associated with ConOut */ #define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, 0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95) +#define LINUX_EFI_ARM_CPU_STATE_TABLE_GUID EFI_GUID(0xef79e4aa, 0x3c3d, 0x4989, 0xb9, 0x02, 0x07, 0xa9, 0x43, 0xe5, 0x50, 0xd2) #define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) #define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b) #define LINUX_EFI_TPM_EVENT_LOG_GUID EFI_GUID(0xb7799cb0, 0xeca2, 0x4943, 0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa) -- cgit v1.2.3 From fe557319aa06c23cffc9346000f119547e0f289a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:53 +0200 Subject: maccess: rename probe_kernel_{read,write} to copy_{from,to}_kernel_nofault Better describe what these functions do. Suggested-by: Linus Torvalds Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/ftrace.c | 3 ++- arch/arm/kernel/kgdb.c | 2 +- arch/arm64/kernel/insn.c | 4 ++-- arch/csky/kernel/ftrace.c | 5 ++-- arch/ia64/kernel/ftrace.c | 6 ++--- arch/mips/kernel/kprobes.c | 6 ++--- arch/nds32/kernel/ftrace.c | 5 ++-- arch/parisc/kernel/ftrace.c | 2 +- arch/parisc/kernel/kgdb.c | 4 ++-- arch/parisc/lib/memcpy.c | 2 +- arch/powerpc/kernel/module_64.c | 6 +++-- arch/powerpc/kernel/trace/ftrace.c | 4 ++-- arch/powerpc/lib/inst.c | 6 ++--- arch/powerpc/perf/core-book3s.c | 3 ++- arch/riscv/kernel/ftrace.c | 3 ++- arch/riscv/kernel/kgdb.c | 4 ++-- arch/riscv/kernel/patch.c | 4 ++-- arch/s390/kernel/ftrace.c | 4 ++-- arch/sh/kernel/ftrace.c | 6 ++--- arch/um/kernel/maccess.c | 2 +- arch/x86/include/asm/ptrace.h | 4 ++-- arch/x86/kernel/dumpstack.c | 2 +- arch/x86/kernel/ftrace.c | 10 ++++---- arch/x86/kernel/kgdb.c | 6 ++--- arch/x86/kernel/kprobes/core.c | 5 ++-- arch/x86/kernel/kprobes/opt.c | 2 +- arch/x86/kernel/traps.c | 3 ++- arch/x86/mm/fault.c | 2 +- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/maccess.c | 4 ++-- arch/x86/xen/enlighten_pv.c | 2 +- drivers/char/mem.c | 2 +- drivers/dio/dio.c | 6 +++-- drivers/input/serio/hp_sdc.c | 2 +- drivers/misc/kgdbts.c | 6 ++--- drivers/video/fbdev/hpfb.c | 2 +- fs/proc/kcore.c | 3 ++- include/linux/uaccess.h | 13 +++++----- kernel/debug/debug_core.c | 6 ++--- kernel/debug/gdbstub.c | 6 ++--- kernel/debug/kdb/kdb_main.c | 3 ++- kernel/debug/kdb/kdb_support.c | 7 +++--- kernel/kthread.c | 2 +- kernel/trace/bpf_trace.c | 4 ++-- kernel/trace/trace_kprobe.c | 4 ++-- kernel/workqueue.c | 10 ++++---- mm/debug.c | 8 +++---- mm/maccess.c | 49 +++++++++++++++++++------------------- mm/rodata_test.c | 2 +- mm/slub.c | 2 +- 50 files changed, 138 insertions(+), 122 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 10499d44964a..9a79ef6b1876 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -84,7 +84,8 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old, old = __opcode_to_mem_arm(old); if (validate) { - if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(&replaced, (void *)pc, + MCOUNT_INSN_SIZE)) return -EFAULT; if (replaced != old) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 6a95b9296640..7bd30c0a4280 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -236,7 +236,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) /* patch_text() only supports int-sized breakpoints */ BUILD_BUG_ON(sizeof(int) != BREAK_INSTR_SIZE); - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 684d871ae38d..a107375005bc 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -135,7 +135,7 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp) int ret; __le32 val; - ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); + ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE); if (!ret) *insnp = le32_to_cpu(val); @@ -151,7 +151,7 @@ static int __kprobes __aarch64_insn_write(void *addr, __le32 insn) raw_spin_lock_irqsave(&patch_lock, flags); waddr = patch_map(addr, FIX_TEXT_POKE0); - ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE); + ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE); patch_unmap(FIX_TEXT_POKE0); raw_spin_unlock_irqrestore(&patch_lock, flags); diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c index 3c425b84e3be..b4a7ec1517ff 100644 --- a/arch/csky/kernel/ftrace.c +++ b/arch/csky/kernel/ftrace.c @@ -72,7 +72,8 @@ static int ftrace_check_current_nop(unsigned long hook) uint16_t olds[7]; unsigned long hook_pos = hook - 2; - if (probe_kernel_read((void *)olds, (void *)hook_pos, sizeof(nops))) + if (copy_from_kernel_nofault((void *)olds, (void *)hook_pos, + sizeof(nops))) return -EFAULT; if (memcmp((void *)nops, (void *)olds, sizeof(nops))) { @@ -97,7 +98,7 @@ static int ftrace_modify_code(unsigned long hook, unsigned long target, make_jbsr(target, hook, call, nolr); - ret = probe_kernel_write((void *)hook_pos, enable ? call : nops, + ret = copy_to_kernel_nofault((void *)hook_pos, enable ? call : nops, sizeof(nops)); if (ret) return -EPERM; diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c index cee411e647ca..b2ab2d58fb30 100644 --- a/arch/ia64/kernel/ftrace.c +++ b/arch/ia64/kernel/ftrace.c @@ -108,7 +108,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, goto skip_check; /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -117,7 +117,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, skip_check: /* replace the text with the new text */ - if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE)) + if (copy_to_kernel_nofault(((void *)ip), new_code, MCOUNT_INSN_SIZE)) return -EPERM; flush_icache_range(ip, ip + MCOUNT_INSN_SIZE); @@ -129,7 +129,7 @@ static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr) unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE]; unsigned long ip = rec->ip; - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; if (rec->flags & FTRACE_FL_CONVERTED) { struct ftrace_call_insn *call_insn, *tmp_call; diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 6cfae2411c04..d043c2f897fc 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -86,9 +86,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) goto out; } - if ((probe_kernel_read(&prev_insn, p->addr - 1, - sizeof(mips_instruction)) == 0) && - insn_has_delayslot(prev_insn)) { + if (copy_from_kernel_nofault(&prev_insn, p->addr - 1, + sizeof(mips_instruction)) == 0 && + insn_has_delayslot(prev_insn)) { pr_notice("Kprobes for branch delayslot are not supported\n"); ret = -EINVAL; goto out; diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 22ab77ea27ad..3763b3f8c3db 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -131,13 +131,14 @@ static int __ftrace_modify_code(unsigned long pc, unsigned long *old_insn, unsigned long orig_insn[3]; if (validate) { - if (probe_kernel_read(orig_insn, (void *)pc, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(orig_insn, (void *)pc, + MCOUNT_INSN_SIZE)) return -EFAULT; if (memcmp(orig_insn, old_insn, MCOUNT_INSN_SIZE)) return -EINVAL; } - if (probe_kernel_write((void *)pc, new_insn, MCOUNT_INSN_SIZE)) + if (copy_to_kernel_nofault((void *)pc, new_insn, MCOUNT_INSN_SIZE)) return -EPERM; return 0; diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index b836fc61a24f..1df0f67ed667 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -172,7 +172,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ip = (void *)(rec->ip + 4 - size); - ret = probe_kernel_read(insn, ip, size); + ret = copy_from_kernel_nofault(insn, ip, size); if (ret) return ret; diff --git a/arch/parisc/kernel/kgdb.c b/arch/parisc/kernel/kgdb.c index 664278db9b97..c4554ac13eac 100644 --- a/arch/parisc/kernel/kgdb.c +++ b/arch/parisc/kernel/kgdb.c @@ -154,8 +154,8 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { - int ret = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, - BREAK_INSTR_SIZE); + int ret = copy_from_kernel_nofault(bpt->saved_instr, + (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (ret) return ret; diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index 94a9fe2702c2..4b75388190b4 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -57,7 +57,7 @@ void * memcpy(void * dst,const void *src, size_t count) EXPORT_SYMBOL(raw_copy_in_user); EXPORT_SYMBOL(memcpy); -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { if ((unsigned long)unsafe_src < PAGE_SIZE) return false; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index f4c2fa190192..ae2b188365b1 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -756,7 +756,8 @@ int module_trampoline_target(struct module *mod, unsigned long addr, stub = (struct ppc64_stub_entry *)addr; - if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { + if (copy_from_kernel_nofault(&magic, &stub->magic, + sizeof(magic))) { pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; } @@ -766,7 +767,8 @@ int module_trampoline_target(struct module *mod, unsigned long addr, return -EFAULT; } - if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { + if (copy_from_kernel_nofault(&funcdata, &stub->funcdata, + sizeof(funcdata))) { pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; } diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 5e399628f51a..c1fede6ec934 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -226,7 +226,7 @@ __ftrace_make_nop(struct module *mod, unsigned long ip = rec->ip; unsigned long tramp; - if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure that that this is still a 24bit jump */ @@ -249,7 +249,7 @@ __ftrace_make_nop(struct module *mod, pr_devel("ip:%lx jumps to %lx", ip, tramp); /* Find where the trampoline jumps to */ - if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { + if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) { pr_err("Failed to read %lx\n", tramp); return -EFAULT; } diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index aedfd6e31e53..6c7a20af9fd6 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -33,11 +33,11 @@ int probe_kernel_read_inst(struct ppc_inst *inst, unsigned int val, suffix; int err; - err = probe_kernel_read(&val, src, sizeof(val)); + err = copy_from_kernel_nofault(&val, src, sizeof(val)); if (err) return err; if (get_op(val) == OP_PREFIX) { - err = probe_kernel_read(&suffix, (void *)src + 4, 4); + err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); @@ -64,7 +64,7 @@ int probe_kernel_read_inst(struct ppc_inst *inst, unsigned int val; int err; - err = probe_kernel_read(&val, src, sizeof(val)); + err = copy_from_kernel_nofault(&val, src, sizeof(val)); if (!err) *inst = ppc_inst(val); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 13b9dd5e4a76..efe97ff82557 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -418,7 +418,8 @@ static __u64 power_pmu_bhrb_to(u64 addr) __u64 target; if (is_kernel_addr(addr)) { - if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + if (copy_from_kernel_nofault(&instr, (void *)addr, + sizeof(instr))) return 0; return branch_target((struct ppc_inst *)&instr); diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 08396614d6f4..2ff63d0cbb50 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -38,7 +38,8 @@ static int ftrace_check_current_call(unsigned long hook_pos, * Read the text we want to modify; * return must be -EFAULT on read error */ - if (probe_kernel_read(replaced, (void *)hook_pos, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)hook_pos, + MCOUNT_INSN_SIZE)) return -EFAULT; /* diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index f16ade84a11f..a21fb21883e7 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -153,7 +153,7 @@ int do_single_step(struct pt_regs *regs) stepped_address = addr; /* Replace the op code with the break instruction */ - error = probe_kernel_write((void *)stepped_address, + error = copy_to_kernel_nofault((void *)stepped_address, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); /* Flush and return */ @@ -173,7 +173,7 @@ int do_single_step(struct pt_regs *regs) static void undo_single_step(struct pt_regs *regs) { if (stepped_opcode != 0) { - probe_kernel_write((void *)stepped_address, + copy_to_kernel_nofault((void *)stepped_address, (void *)&stepped_opcode, BREAK_INSTR_SIZE); flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE); diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index d4a64dfed342..3fe7a5296aa5 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -63,7 +63,7 @@ static int patch_insn_write(void *addr, const void *insn, size_t len) waddr = patch_map(addr, FIX_TEXT_POKE0); - ret = probe_kernel_write(waddr, insn, len); + ret = copy_to_kernel_nofault(waddr, insn, len); patch_unmap(FIX_TEXT_POKE0); @@ -76,7 +76,7 @@ NOKPROBE_SYMBOL(patch_insn_write); #else static int patch_insn_write(void *addr, const void *insn, size_t len) { - return probe_kernel_write(addr, insn, len); + return copy_to_kernel_nofault(addr, insn, len); } NOKPROBE_SYMBOL(patch_insn_write); #endif /* CONFIG_MMU */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 44e01dd1e624..b388e87a08bf 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -83,7 +83,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, { struct ftrace_insn orig, new, old; - if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; if (addr == MCOUNT_ADDR) { /* Initial code replacement */ @@ -105,7 +105,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { struct ftrace_insn orig, new, old; - if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; /* Replace nop with an ftrace call. */ ftrace_generate_nop_insn(&orig); diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 1b04270e5460..0646c5961846 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -119,7 +119,7 @@ static void ftrace_mod_code(void) * But if one were to fail, then they all should, and if one were * to succeed, then they all should. */ - mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, + mod_code_status = copy_to_kernel_nofault(mod_code_ip, mod_code_newcode, MCOUNT_INSN_SIZE); /* if we fail, then kill any new writers */ @@ -203,7 +203,7 @@ static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, */ /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -268,7 +268,7 @@ static int ftrace_mod(unsigned long ip, unsigned long old_addr, { unsigned char code[MCOUNT_INSN_SIZE]; - if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(code, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; if (old_addr != __raw_readl((unsigned long *)code)) diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c index e929c0966696..8ccd56813f68 100644 --- a/arch/um/kernel/maccess.c +++ b/arch/um/kernel/maccess.c @@ -7,7 +7,7 @@ #include #include -bool probe_kernel_read_allowed(const void *src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *src, size_t size) { void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index ebedeab48704..255b2dde2c1b 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -278,7 +278,7 @@ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs } /* To avoid include hell, we can't include uaccess.h */ -extern long probe_kernel_read(void *dst, const void *src, size_t size); +extern long copy_from_kernel_nofault(void *dst, const void *src, size_t size); /** * regs_get_kernel_stack_nth() - get Nth entry of the stack @@ -298,7 +298,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, addr = regs_get_kernel_stack_nth_addr(regs, n); if (addr) { - ret = probe_kernel_read(&val, addr, sizeof(val)); + ret = copy_from_kernel_nofault(&val, addr, sizeof(val)); if (!ret) return val; } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 456511b2284e..b037cfa7c0c5 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -106,7 +106,7 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl) bad_ip = user_mode(regs) && __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX); - if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue, + if (bad_ip || copy_from_kernel_nofault(opcodes, (u8 *)prologue, OPCODE_BUFSIZE)) { printk("%sCode: Bad RIP value.\n", loglvl); } else { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c84d28e90a58..51504566b3a6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -86,7 +86,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code) * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ - if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { + if (copy_from_kernel_nofault(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { WARN_ON(1); return -EFAULT; } @@ -355,7 +355,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); /* Copy ftrace_caller onto the trampoline memory */ - ret = probe_kernel_read(trampoline, (void *)start_offset, size); + ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size); if (WARN_ON(ret < 0)) goto fail; @@ -363,13 +363,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* The trampoline ends with ret(q) */ retq = (unsigned long)ftrace_stub; - ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); if (WARN_ON(ret < 0)) goto fail; if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller); - ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); if (WARN_ON(ret < 0)) goto fail; } @@ -506,7 +506,7 @@ static void *addr_from_call(void *ptr) union text_poke_insn call; int ret; - ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE); + ret = copy_from_kernel_nofault(&call, ptr, CALL_INSN_SIZE); if (WARN_ON_ONCE(ret < 0)) return NULL; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index c44fe7d8d9a4..68acd30c6b87 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -732,11 +732,11 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) int err; bpt->type = BP_BREAKPOINT; - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; - err = probe_kernel_write((char *)bpt->bpt_addr, + err = copy_to_kernel_nofault((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); if (!err) return err; @@ -768,7 +768,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) return 0; knl_write: - return probe_kernel_write((char *)bpt->bpt_addr, + return copy_to_kernel_nofault((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3bafe1bd4dc7..f09985c87d73 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -243,7 +243,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) * Fortunately, we know that the original code is the ideal 5-byte * long NOP. */ - if (probe_kernel_read(buf, (void *)addr, + if (copy_from_kernel_nofault(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) return 0UL; @@ -346,7 +346,8 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) return 0; /* This can access kernel text if given address is not recovered */ - if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE)) + if (copy_from_kernel_nofault(dest, (void *)recovered_insn, + MAX_INSN_SIZE)) return 0; kernel_insn_init(insn, dest, MAX_INSN_SIZE); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 321c19950285..7af4c61dde52 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -56,7 +56,7 @@ found: * overwritten by jump destination address. In this case, original * bytes must be recovered from op->optinsn.copied_insn buffer. */ - if (probe_kernel_read(buf, (void *)addr, + if (copy_from_kernel_nofault(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) return 0UL; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index af75109485c2..7003f2e7b163 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -488,7 +488,8 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, u8 insn_buf[MAX_INSN_SIZE]; struct insn insn; - if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE)) + if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, + MAX_INSN_SIZE)) return GP_NO_HINT; kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 66be9bd60307..e996aa3833b8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -442,7 +442,7 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index) return; } - if (probe_kernel_read(&desc, (void *)(gdt->address + offset), + if (copy_from_kernel_nofault(&desc, (void *)(gdt->address + offset), sizeof(struct ldttss_desc))) { pr_alert("%s: 0x%hx -- GDT entry is not readable\n", name, index); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bda909e3e37e..8b4afad84f4a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -737,7 +737,7 @@ static void __init test_wp_bit(void) __set_fixmap(FIX_WP_TEST, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO); - if (probe_kernel_write((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) { + if (copy_to_kernel_nofault((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) { clear_fixmap(FIX_WP_TEST); printk(KERN_CONT "Ok.\n"); return; diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index e1d7d7477c22..92ec176a7293 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -9,7 +9,7 @@ static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits) return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); } -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { unsigned long vaddr = (unsigned long)unsafe_src; @@ -22,7 +22,7 @@ bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) canonical_address(vaddr, boot_cpu_data.x86_virt_bits) == vaddr; } #else -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return (unsigned long)unsafe_src >= TASK_SIZE_MAX; } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 33b309d65955..acc49fa6a097 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -386,7 +386,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) preempt_disable(); - probe_kernel_read(&dummy, v, 1); + copy_from_kernel_nofault(&dummy, v, 1); if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 31cae88a730b..934c92dcb9ab 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -171,7 +171,7 @@ static ssize_t read_mem(struct file *file, char __user *buf, if (!ptr) goto failed; - probe = probe_kernel_read(bounce, ptr, sz); + probe = copy_from_kernel_nofault(bounce, ptr, sz); unxlate_dev_mem_ptr(p, ptr); if (probe) goto failed; diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c index c9aa15fb86a9..193b40e7aec0 100644 --- a/drivers/dio/dio.c +++ b/drivers/dio/dio.c @@ -135,7 +135,8 @@ int __init dio_find(int deviceid) else va = ioremap(pa, PAGE_SIZE); - if (probe_kernel_read(&i, (unsigned char *)va + DIO_IDOFF, 1)) { + if (copy_from_kernel_nofault(&i, + (unsigned char *)va + DIO_IDOFF, 1)) { if (scode >= DIOII_SCBASE) iounmap(va); continue; /* no board present at that select code */ @@ -208,7 +209,8 @@ static int __init dio_init(void) else va = ioremap(pa, PAGE_SIZE); - if (probe_kernel_read(&i, (unsigned char *)va + DIO_IDOFF, 1)) { + if (copy_from_kernel_nofault(&i, + (unsigned char *)va + DIO_IDOFF, 1)) { if (scode >= DIOII_SCBASE) iounmap(va); continue; /* no board present at that select code */ diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 654252361653..13eacf6ab431 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -1021,7 +1021,7 @@ static int __init hp_sdc_register(void) hp_sdc.base_io = (unsigned long) 0xf0428000; hp_sdc.data_io = (unsigned long) hp_sdc.base_io + 1; hp_sdc.status_io = (unsigned long) hp_sdc.base_io + 3; - if (!probe_kernel_read(&i, (unsigned char *)hp_sdc.data_io, 1)) + if (!copy_from_kernel_nofault(&i, (unsigned char *)hp_sdc.data_io, 1)) hp_sdc.dev = (void *)1; hp_sdc.dev_err = hp_sdc_init(); #endif diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index bccd341e9ae1..d5d2af4d10e6 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -828,7 +828,7 @@ static void run_plant_and_detach_test(int is_early) char before[BREAK_INSTR_SIZE]; char after[BREAK_INSTR_SIZE]; - probe_kernel_read(before, (char *)kgdbts_break_test, + copy_from_kernel_nofault(before, (char *)kgdbts_break_test, BREAK_INSTR_SIZE); init_simple_test(); ts.tst = plant_and_detach_test; @@ -836,8 +836,8 @@ static void run_plant_and_detach_test(int is_early) /* Activate test with initial breakpoint */ if (!is_early) kgdb_breakpoint(); - probe_kernel_read(after, (char *)kgdbts_break_test, - BREAK_INSTR_SIZE); + copy_from_kernel_nofault(after, (char *)kgdbts_break_test, + BREAK_INSTR_SIZE); if (memcmp(before, after, BREAK_INSTR_SIZE)) { printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n"); panic("kgdb memory corruption"); diff --git a/drivers/video/fbdev/hpfb.c b/drivers/video/fbdev/hpfb.c index f02be0db335e..8d418abdd767 100644 --- a/drivers/video/fbdev/hpfb.c +++ b/drivers/video/fbdev/hpfb.c @@ -402,7 +402,7 @@ int __init hpfb_init(void) if (err) return err; - err = probe_kernel_read(&i, (unsigned char *)INTFBVADDR + DIO_IDOFF, 1); + err = copy_from_kernel_nofault(&i, (unsigned char *)INTFBVADDR + DIO_IDOFF, 1); if (!err && (i == DIO_ID_FBUFFER) && topcat_sid_ok(sid = DIO_SECID(INTFBVADDR))) { if (!request_mem_region(INTFBPADDR, DIO_DEVSIZE, "Internal Topcat")) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 8ba492d44e68..e502414b3556 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -512,7 +512,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) * Using bounce buffer to bypass the * hardened user copy kernel text checks. */ - if (probe_kernel_read(buf, (void *) start, tsz)) { + if (copy_from_kernel_nofault(buf, (void *)start, + tsz)) { if (clear_user(buffer, tsz)) { ret = -EFAULT; goto out; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 7bcadca22100..70a3d9cd9113 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -301,13 +301,14 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, return 0; } -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size); +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); -extern long probe_kernel_read(void *dst, const void *src, size_t size); -extern long probe_user_read(void *dst, const void __user *src, size_t size); +long copy_from_kernel_nofault(void *dst, const void *src, size_t size); +long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size); -extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); -extern long notrace probe_user_write(void __user *dst, const void *src, size_t size); +extern long probe_user_read(void *dst, const void __user *src, size_t size); +extern long notrace probe_user_write(void __user *dst, const void *src, + size_t size); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count); @@ -324,7 +325,7 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count); * Returns 0 on success, or -EFAULT. */ #define probe_kernel_address(addr, retval) \ - probe_kernel_read(&retval, addr, sizeof(retval)) + copy_from_kernel_nofault(&retval, addr, sizeof(retval)) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index ccc0f98abdd4..bc8d25f2ac8a 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -169,18 +169,18 @@ int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; - err = probe_kernel_write((char *)bpt->bpt_addr, + err = copy_to_kernel_nofault((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); return err; } int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { - return probe_kernel_write((char *)bpt->bpt_addr, + return copy_to_kernel_nofault((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 4b280fc7dd67..61774aec46b4 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -247,7 +247,7 @@ char *kgdb_mem2hex(char *mem, char *buf, int count) */ tmp = buf + count; - err = probe_kernel_read(tmp, mem, count); + err = copy_from_kernel_nofault(tmp, mem, count); if (err) return NULL; while (count > 0) { @@ -283,7 +283,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count) *tmp_raw |= hex_to_bin(*tmp_hex--) << 4; } - return probe_kernel_write(mem, tmp_raw, count); + return copy_to_kernel_nofault(mem, tmp_raw, count); } /* @@ -335,7 +335,7 @@ static int kgdb_ebin2mem(char *buf, char *mem, int count) size++; } - return probe_kernel_write(mem, c, size); + return copy_to_kernel_nofault(mem, c, size); } #if DBG_MAX_REG_NUM > 0 diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index ec190569f690..5c7949061671 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2326,7 +2326,8 @@ void kdb_ps1(const struct task_struct *p) int cpu; unsigned long tmp; - if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long))) + if (!p || + copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long))) return; cpu = kdb_process_cpu(p); diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index b8e6306e7e13..004c5b6c87f8 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -325,7 +325,7 @@ char *kdb_strdup(const char *str, gfp_t type) */ int kdb_getarea_size(void *res, unsigned long addr, size_t size) { - int ret = probe_kernel_read((char *)res, (char *)addr, size); + int ret = copy_from_kernel_nofault((char *)res, (char *)addr, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr); @@ -350,7 +350,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size) */ int kdb_putarea_size(unsigned long addr, void *res, size_t size) { - int ret = probe_kernel_read((char *)addr, (char *)res, size); + int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr); @@ -624,7 +624,8 @@ char kdb_task_state_char (const struct task_struct *p) char state; unsigned long tmp; - if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long))) + if (!p || + copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long))) return 'E'; cpu = kdb_process_cpu(p); diff --git a/kernel/kthread.c b/kernel/kthread.c index 8e3d2d7fdf5e..132f84a5fde3 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -201,7 +201,7 @@ void *kthread_probe_data(struct task_struct *task) struct kthread *kthread = to_kthread(task); void *data = NULL; - probe_kernel_read(&data, &kthread->data, sizeof(data)); + copy_from_kernel_nofault(&data, &kthread->data, sizeof(data)); return data; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e729c9e587a0..204afc12425f 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -196,7 +196,7 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) if (unlikely(ret < 0)) goto fail; - ret = probe_kernel_read(dst, unsafe_ptr, size); + ret = copy_from_kernel_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) goto fail; return ret; @@ -661,7 +661,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, copy_size = (fmt[i + 2] == '4') ? 4 : 16; - err = probe_kernel_read(bufs->buf[memcpy_cnt], + err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt], (void *) (long) args[fmt_cnt], copy_size); if (err < 0) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6048f1be26d2..841c74863ff8 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1222,7 +1222,7 @@ fetch_store_strlen(unsigned long addr) #endif do { - ret = probe_kernel_read(&c, (u8 *)addr + len, 1); + ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1); len++; } while (c && ret == 0 && len < MAX_STRING_SIZE); @@ -1300,7 +1300,7 @@ probe_mem_read(void *dest, void *src, size_t size) if ((unsigned long)src < TASK_SIZE) return probe_mem_read_user(dest, src, size); #endif - return probe_kernel_read(dest, src, size); + return copy_from_kernel_nofault(dest, src, size); } /* Note that we don't verify it, since the code does not come from user space */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 9fbe1e237563..c41c3c17b86a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4638,11 +4638,11 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) * Carefully copy the associated workqueue's workfn, name and desc. * Keep the original last '\0' in case the original is garbage. */ - probe_kernel_read(&fn, &worker->current_func, sizeof(fn)); - probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq)); - probe_kernel_read(&wq, &pwq->wq, sizeof(wq)); - probe_kernel_read(name, wq->name, sizeof(name) - 1); - probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); + copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn)); + copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq)); + copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq)); + copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1); + copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1); if (fn || name[0] || desc[0]) { printk("%sWorkqueue: %s %ps", log_lvl, name, fn); diff --git a/mm/debug.c b/mm/debug.c index b5b1de8c71ac..4f376514744d 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -120,9 +120,9 @@ void __dump_page(struct page *page, const char *reason) * mapping can be invalid pointer and we don't want to crash * accessing it, so probe everything depending on it carefully */ - if (probe_kernel_read(&host, &mapping->host, + if (copy_from_kernel_nofault(&host, &mapping->host, sizeof(struct inode *)) || - probe_kernel_read(&a_ops, &mapping->a_ops, + copy_from_kernel_nofault(&a_ops, &mapping->a_ops, sizeof(struct address_space_operations *))) { pr_warn("failed to read mapping->host or a_ops, mapping not a valid kernel address?\n"); goto out_mapping; @@ -133,7 +133,7 @@ void __dump_page(struct page *page, const char *reason) goto out_mapping; } - if (probe_kernel_read(&dentry_first, + if (copy_from_kernel_nofault(&dentry_first, &host->i_dentry.first, sizeof(struct hlist_node *))) { pr_warn("mapping->a_ops:%ps with invalid mapping->host inode address %px\n", a_ops, host); @@ -146,7 +146,7 @@ void __dump_page(struct page *page, const char *reason) } dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); - if (probe_kernel_read(&dentry, dentry_ptr, + if (copy_from_kernel_nofault(&dentry, dentry_ptr, sizeof(struct dentry))) { pr_warn("mapping->aops:%ps with invalid mapping->host->i_dentry.first %px\n", a_ops, dentry_ptr); diff --git a/mm/maccess.c b/mm/maccess.c index 88845eda5047..cc5d8c6233c0 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -6,14 +6,15 @@ #include #include -bool __weak probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src, + size_t size) { return true; } #ifdef HAVE_GET_KERNEL_NOFAULT -#define probe_kernel_read_loop(dst, src, len, type, err_label) \ +#define copy_from_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __get_kernel_nofault(dst, src, type, err_label); \ dst += sizeof(type); \ @@ -21,25 +22,25 @@ bool __weak probe_kernel_read_allowed(const void *unsafe_src, size_t size) len -= sizeof(type); \ } -long probe_kernel_read(void *dst, const void *src, size_t size) +long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { - if (!probe_kernel_read_allowed(src, size)) + if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; pagefault_disable(); - probe_kernel_read_loop(dst, src, size, u64, Efault); - probe_kernel_read_loop(dst, src, size, u32, Efault); - probe_kernel_read_loop(dst, src, size, u16, Efault); - probe_kernel_read_loop(dst, src, size, u8, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u64, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u32, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u16, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } -EXPORT_SYMBOL_GPL(probe_kernel_read); +EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); -#define probe_kernel_write_loop(dst, src, len, type, err_label) \ +#define copy_to_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __put_kernel_nofault(dst, src, type, err_label); \ dst += sizeof(type); \ @@ -47,13 +48,13 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); len -= sizeof(type); \ } -long probe_kernel_write(void *dst, const void *src, size_t size) +long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { pagefault_disable(); - probe_kernel_write_loop(dst, src, size, u64, Efault); - probe_kernel_write_loop(dst, src, size, u32, Efault); - probe_kernel_write_loop(dst, src, size, u16, Efault); - probe_kernel_write_loop(dst, src, size, u8, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u64, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u32, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u16, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: @@ -67,7 +68,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) if (unlikely(count <= 0)) return 0; - if (!probe_kernel_read_allowed(unsafe_addr, count)) + if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; pagefault_disable(); @@ -87,7 +88,7 @@ Efault: } #else /* HAVE_GET_KERNEL_NOFAULT */ /** - * probe_kernel_read(): safely attempt to read from kernel-space + * copy_from_kernel_nofault(): safely attempt to read from kernel-space * @dst: pointer to the buffer that shall take the data * @src: address to read from * @size: size of the data chunk @@ -98,15 +99,15 @@ Efault: * * We ensure that the copy_from_user is executed in atomic context so that * do_page_fault() doesn't attempt to take mmap_lock. This makes - * probe_kernel_read() suitable for use within regions where the caller + * copy_from_kernel_nofault() suitable for use within regions where the caller * already holds mmap_lock, or other locks which nest inside mmap_lock. */ -long probe_kernel_read(void *dst, const void *src, size_t size) +long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); - if (!probe_kernel_read_allowed(src, size)) + if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; set_fs(KERNEL_DS); @@ -120,10 +121,10 @@ long probe_kernel_read(void *dst, const void *src, size_t size) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(probe_kernel_read); +EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); /** - * probe_kernel_write(): safely attempt to write to a location + * copy_to_kernel_nofault(): safely attempt to write to a location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk @@ -131,7 +132,7 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_kernel_write(void *dst, const void *src, size_t size) +long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); @@ -174,7 +175,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) if (unlikely(count <= 0)) return 0; - if (!probe_kernel_read_allowed(unsafe_addr, count)) + if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; set_fs(KERNEL_DS); diff --git a/mm/rodata_test.c b/mm/rodata_test.c index 5e313fa93276..2a99df7beeb3 100644 --- a/mm/rodata_test.c +++ b/mm/rodata_test.c @@ -25,7 +25,7 @@ void rodata_test(void) } /* test 2: write to the variable; this should fault */ - if (!probe_kernel_write((void *)&rodata_test_data, + if (!copy_to_kernel_nofault((void *)&rodata_test_data, (void *)&zero, sizeof(zero))) { pr_err("test data was not read only\n"); return; diff --git a/mm/slub.c b/mm/slub.c index b8f798b50d44..fe81773fd97e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -292,7 +292,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) return get_freepointer(s, object); freepointer_addr = (unsigned long)object + s->offset; - probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p)); + copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p)); return freelist_ptr(s, p, freepointer_addr); } -- cgit v1.2.3 From c0ee37e85e0e47402b8bbe35b6cec8e06937ca58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:54 +0200 Subject: maccess: rename probe_user_{read,write} to copy_{from,to}_user_nofault Better describe what these functions do. Suggested-by: Linus Torvalds Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/process.c | 3 ++- arch/powerpc/kvm/book3s_64_mmu_radix.c | 4 ++-- arch/powerpc/lib/inst.c | 6 +++--- arch/powerpc/oprofile/backtrace.c | 6 ++++-- arch/powerpc/perf/callchain_32.c | 2 +- arch/powerpc/perf/callchain_64.c | 2 +- arch/powerpc/perf/core-book3s.c | 3 ++- arch/powerpc/sysdev/fsl_pci.c | 4 ++-- include/linux/uaccess.h | 4 ++-- kernel/trace/bpf_trace.c | 4 ++-- kernel/trace/trace_kprobe.c | 2 +- mm/maccess.c | 12 ++++++------ 12 files changed, 28 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7bb7faf84490..d4d0d1048500 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1305,7 +1305,8 @@ void show_user_instructions(struct pt_regs *regs) for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) { int instr; - if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) { + if (copy_from_user_nofault(&instr, (void __user *)pc, + sizeof(instr))) { seq_buf_printf(&s, "XXXXXXXX "); continue; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 3cb0c9843d01..e738ea652192 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -64,9 +64,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, isync(); if (is_load) - ret = probe_user_read(to, (const void __user *)from, n); + ret = copy_from_user_nofault(to, (const void __user *)from, n); else - ret = probe_user_write((void __user *)to, from, n); + ret = copy_to_user_nofault((void __user *)to, from, n); /* switch the pid first to avoid running host with unallocated pid */ if (quadrant == 1 && pid != old_pid) diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index 6c7a20af9fd6..9cc17eb62462 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -15,11 +15,11 @@ int probe_user_read_inst(struct ppc_inst *inst, unsigned int val, suffix; int err; - err = probe_user_read(&val, nip, sizeof(val)); + err = copy_from_user_nofault(&val, nip, sizeof(val)); if (err) return err; if (get_op(val) == OP_PREFIX) { - err = probe_user_read(&suffix, (void __user *)nip + 4, 4); + err = copy_from_user_nofault(&suffix, (void __user *)nip + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); @@ -51,7 +51,7 @@ int probe_user_read_inst(struct ppc_inst *inst, unsigned int val; int err; - err = probe_user_read(&val, nip, sizeof(val)); + err = copy_from_user_nofault(&val, nip, sizeof(val)); if (!err) *inst = ppc_inst(val); diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index 6f347fa29f41..9db7ada79d10 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -33,7 +33,8 @@ static unsigned int user_getsp32(unsigned int sp, int is_first) * which means that we've done all that we can do from * interrupt context. */ - if (probe_user_read(stack_frame, (void __user *)p, sizeof(stack_frame))) + if (copy_from_user_nofault(stack_frame, (void __user *)p, + sizeof(stack_frame))) return 0; if (!is_first) @@ -51,7 +52,8 @@ static unsigned long user_getsp64(unsigned long sp, int is_first) { unsigned long stack_frame[3]; - if (probe_user_read(stack_frame, (void __user *)sp, sizeof(stack_frame))) + if (copy_from_user_nofault(stack_frame, (void __user *)sp, + sizeof(stack_frame))) return 0; if (!is_first) diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index f7d888d39cd3..542e68b8eae0 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -44,7 +44,7 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) ((unsigned long)ptr & 3)) return -EFAULT; - rc = probe_user_read(ret, ptr, sizeof(*ret)); + rc = copy_from_user_nofault(ret, ptr, sizeof(*ret)); if (IS_ENABLED(CONFIG_PPC64) && rc) return read_user_stack_slow(ptr, ret, 4); diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 814d1c2c2b9c..fa2a1b83b9b0 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -50,7 +50,7 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) ((unsigned long)ptr & 7)) return -EFAULT; - if (!probe_user_read(ret, ptr, sizeof(*ret))) + if (!copy_from_user_nofault(ret, ptr, sizeof(*ret))) return 0; return read_user_stack_slow(ptr, ret, 8); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index efe97ff82557..cd6a742ac6ef 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -426,7 +426,8 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Userspace: need copy instruction here then translate it */ - if (probe_user_read(&instr, (unsigned int __user *)addr, sizeof(instr))) + if (copy_from_user_nofault(&instr, (unsigned int __user *)addr, + sizeof(instr))) return 0; target = branch_target((struct ppc_inst *)&instr); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 4a8874bc1057..73fa37ca40ef 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1066,8 +1066,8 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) if (is_in_pci_mem_space(addr)) { if (user_mode(regs)) - ret = probe_user_read(&inst, (void __user *)regs->nip, - sizeof(inst)); + ret = copy_from_user_nofault(&inst, + (void __user *)regs->nip, sizeof(inst)); else ret = probe_kernel_address((void *)regs->nip, inst); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 70a3d9cd9113..bef48da242cc 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -306,8 +306,8 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); long copy_from_kernel_nofault(void *dst, const void *src, size_t size); long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size); -extern long probe_user_read(void *dst, const void __user *src, size_t size); -extern long notrace probe_user_write(void __user *dst, const void *src, +long copy_from_user_nofault(void *dst, const void __user *src, size_t size); +long notrace copy_to_user_nofault(void __user *dst, const void *src, size_t size); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 204afc12425f..dc05626979b8 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -141,7 +141,7 @@ bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr) { int ret; - ret = probe_user_read(dst, unsafe_ptr, size); + ret = copy_from_user_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); return ret; @@ -326,7 +326,7 @@ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src, if (unlikely(!nmi_uaccess_okay())) return -EPERM; - return probe_user_write(unsafe_ptr, src, size); + return copy_to_user_nofault(unsafe_ptr, src, size); } static const struct bpf_func_proto bpf_probe_write_user_proto = { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 841c74863ff8..aefb6065b508 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1290,7 +1290,7 @@ probe_mem_read_user(void *dest, void *src, size_t size) { const void __user *uaddr = (__force const void __user *)src; - return probe_user_read(dest, uaddr, size); + return copy_from_user_nofault(dest, uaddr, size); } static nokprobe_inline int diff --git a/mm/maccess.c b/mm/maccess.c index cc5d8c6233c0..f98ff91e32c6 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -194,7 +194,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) #endif /* HAVE_GET_KERNEL_NOFAULT */ /** - * probe_user_read(): safely attempt to read from a user-space location + * copy_from_user_nofault(): safely attempt to read from a user-space location * @dst: pointer to the buffer that shall take the data * @src: address to read from. This must be a user address. * @size: size of the data chunk @@ -202,7 +202,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) * Safely read from user address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_user_read(void *dst, const void __user *src, size_t size) +long copy_from_user_nofault(void *dst, const void __user *src, size_t size) { long ret = -EFAULT; mm_segment_t old_fs = get_fs(); @@ -219,10 +219,10 @@ long probe_user_read(void *dst, const void __user *src, size_t size) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(probe_user_read); +EXPORT_SYMBOL_GPL(copy_from_user_nofault); /** - * probe_user_write(): safely attempt to write to a user-space location + * copy_to_user_nofault(): safely attempt to write to a user-space location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk @@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(probe_user_read); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_user_write(void __user *dst, const void *src, size_t size) +long copy_to_user_nofault(void __user *dst, const void *src, size_t size) { long ret = -EFAULT; mm_segment_t old_fs = get_fs(); @@ -247,7 +247,7 @@ long probe_user_write(void __user *dst, const void *src, size_t size) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(probe_user_write); +EXPORT_SYMBOL_GPL(copy_to_user_nofault); /** * strncpy_from_user_nofault: - Copy a NUL terminated string from unsafe user -- cgit v1.2.3 From ac4e106d8934a5894811fc263f4b03fc8ed0fb7a Mon Sep 17 00:00:00 2001 From: Matthew Hagan Date: Sun, 14 Jun 2020 15:19:00 -0700 Subject: ARM: dts: NSP: Correct FA2 mailbox node The FA2 mailbox is specified at 0x18025000 but should actually be 0x18025c00, length 0x400 according to socregs_nsp.h and board_bu.c. Also the interrupt was off by one and should be GIC SPI 151 instead of 150. Fixes: 17d517172300 ("ARM: dts: NSP: Add mailbox (PDC) to NSP") Signed-off-by: Matthew Hagan Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm-nsp.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index 920c0f561e5c..3175266ede64 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -259,10 +259,10 @@ status = "disabled"; }; - mailbox: mailbox@25000 { + mailbox: mailbox@25c00 { compatible = "brcm,iproc-fa2-mbox"; - reg = <0x25000 0x445>; - interrupts = ; + reg = <0x25c00 0x400>; + interrupts = ; #mbox-cells = <1>; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; -- cgit v1.2.3 From b3583fca5fb654af2cfc1c08259abb9728272538 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 2 Jun 2020 21:00:51 +0300 Subject: s390: fix syscall_get_error for compat processes If both the tracer and the tracee are compat processes, and gprs[2] is assigned a value by __poke_user_compat, then the higher 32 bits of gprs[2] are cleared, IS_ERR_VALUE() always returns false, and syscall_get_error() always returns 0. Fix the implementation by sign-extending the value for compat processes the same way as x86 implementation does. The bug was exposed to user space by commit 201766a20e30f ("ptrace: add PTRACE_GET_SYSCALL_INFO request") and detected by strace test suite. This change fixes strace syscall tampering on s390. Link: https://lkml.kernel.org/r/20200602180051.GA2427@altlinux.org Fixes: 753c4dd6a2fa2 ("[S390] ptrace changes") Cc: Elvira Khabirova Cc: stable@vger.kernel.org # v2.6.28+ Signed-off-by: Dmitry V. Levin Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/syscall.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index f073292e9fdb..d9d5de0f67ff 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -33,7 +33,17 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; + unsigned long error = regs->gprs[2]; +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) { + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. + */ + error = (long)(int)error; + } +#endif + return IS_ERR_VALUE(error) ? error : 0; } static inline long syscall_get_return_value(struct task_struct *task, -- cgit v1.2.3 From 4fd6b5735c03c0955d93960d31f17d7144f5578f Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Fri, 22 May 2020 18:44:50 +0800 Subject: arm64: dts: imx8mm-evk: correct ldo1/ldo2 voltage range Correct ldo1 voltage range from wrong high group(3.0V~3.3V) to low group (1.6V~1.9V) because the ldo1 should be 1.8V. Actually, two voltage groups have been supported at bd718x7-regulator driver, hence, just corrrect the voltage range to 1.6V~3.3V. For ldo2@0.8V, correct voltage range too. Otherwise, ldo1 would be kept @3.0V and ldo2@0.9V which violate i.mx8mm datasheet as the below warning log in kernel: [ 0.995524] LDO1: Bringing 1800000uV into 3000000-3000000uV [ 0.999196] LDO2: Bringing 800000uV into 900000-900000uV Fixes: 78cc25fa265d ("arm64: dts: imx8mm-evk: Add BD71847 PMIC") Cc: stable@vger.kernel.org Signed-off-by: Robin Gong Reviewed-by: Dong Aisheng Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-evk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts index e5ec8322796d..0f1d7f8aeac4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts @@ -208,7 +208,7 @@ ldo1_reg: LDO1 { regulator-name = "LDO1"; - regulator-min-microvolt = <3000000>; + regulator-min-microvolt = <1600000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; @@ -216,7 +216,7 @@ ldo2_reg: LDO2 { regulator-name = "LDO2"; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; -- cgit v1.2.3 From cfb12c8952f617df58d73d24161e539a035d82b0 Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Fri, 22 May 2020 18:44:51 +0800 Subject: arm64: dts: imx8mn-ddr4-evk: correct ldo1/ldo2 voltage range Correct ldo1 voltage range from wrong high group(3.0V~3.3V) to low group (1.6V~1.9V) because the ldo1 should be 1.8V. Actually, two voltage groups have been supported at bd718x7-regulator driver, hence, just corrrect the voltage range to 1.6V~3.3V. For ldo2@0.8V, correct voltage range too. Otherwise, ldo1 would be kept @3.0V and ldo2@0.9V which violate i.mx8mn datasheet as the below warning log in kernel: [ 0.995524] LDO1: Bringing 1800000uV into 3000000-3000000uV [ 0.999196] LDO2: Bringing 800000uV into 900000-900000uV Fixes: 3e44dd09736d ("arm64: dts: imx8mn-ddr4-evk: Add rohm,bd71847 PMIC support") Cc: stable@vger.kernel.org Signed-off-by: Robin Gong Reviewed-by: Dong Aisheng Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts index d07e0e6a00cc..a1e5483dbbbe 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts @@ -113,7 +113,7 @@ ldo1_reg: LDO1 { regulator-name = "LDO1"; - regulator-min-microvolt = <3000000>; + regulator-min-microvolt = <1600000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; @@ -121,7 +121,7 @@ ldo2_reg: LDO2 { regulator-name = "LDO2"; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; -- cgit v1.2.3 From 774911290c589e98e3638e73b24b0a4d4530e97c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 17 Jun 2020 10:36:20 +0200 Subject: KVM: s390: reduce number of IO pins to 1 The current number of KVM_IRQCHIP_NUM_PINS results in an order 3 allocation (32kb) for each guest start/restart. This can result in OOM killer activity even with free swap when the memory is fragmented enough: kernel: qemu-system-s39 invoked oom-killer: gfp_mask=0x440dc0(GFP_KERNEL_ACCOUNT|__GFP_COMP|__GFP_ZERO), order=3, oom_score_adj=0 kernel: CPU: 1 PID: 357274 Comm: qemu-system-s39 Kdump: loaded Not tainted 5.4.0-29-generic #33-Ubuntu kernel: Hardware name: IBM 8562 T02 Z06 (LPAR) kernel: Call Trace: kernel: ([<00000001f848fe2a>] show_stack+0x7a/0xc0) kernel: [<00000001f8d3437a>] dump_stack+0x8a/0xc0 kernel: [<00000001f8687032>] dump_header+0x62/0x258 kernel: [<00000001f8686122>] oom_kill_process+0x172/0x180 kernel: [<00000001f8686abe>] out_of_memory+0xee/0x580 kernel: [<00000001f86e66b8>] __alloc_pages_slowpath+0xd18/0xe90 kernel: [<00000001f86e6ad4>] __alloc_pages_nodemask+0x2a4/0x320 kernel: [<00000001f86b1ab4>] kmalloc_order+0x34/0xb0 kernel: [<00000001f86b1b62>] kmalloc_order_trace+0x32/0xe0 kernel: [<00000001f84bb806>] kvm_set_irq_routing+0xa6/0x2e0 kernel: [<00000001f84c99a4>] kvm_arch_vm_ioctl+0x544/0x9e0 kernel: [<00000001f84b8936>] kvm_vm_ioctl+0x396/0x760 kernel: [<00000001f875df66>] do_vfs_ioctl+0x376/0x690 kernel: [<00000001f875e304>] ksys_ioctl+0x84/0xb0 kernel: [<00000001f875e39a>] __s390x_sys_ioctl+0x2a/0x40 kernel: [<00000001f8d55424>] system_call+0xd8/0x2c8 As far as I can tell s390x does not use the iopins as we bail our for anything other than KVM_IRQ_ROUTING_S390_ADAPTER and the chip/pin is only used for KVM_IRQ_ROUTING_IRQCHIP. So let us use a small number to reduce the memory footprint. Signed-off-by: Christian Borntraeger Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20200617083620.5409-1-borntraeger@de.ibm.com --- arch/s390/include/asm/kvm_host.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index cee3cb6455a2..6ea0820e7c7f 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -31,12 +31,12 @@ #define KVM_USER_MEM_SLOTS 32 /* - * These seem to be used for allocating ->chip in the routing table, - * which we don't use. 4096 is an out-of-thin-air value. If we need - * to look at ->chip later on, we'll need to revisit this. + * These seem to be used for allocating ->chip in the routing table, which we + * don't use. 1 is as small as we can get to reduce the needed memory. If we + * need to look at ->chip later on, we'll need to revisit this. */ #define KVM_NR_IRQCHIPS 1 -#define KVM_IRQCHIP_NUM_PINS 4096 +#define KVM_IRQCHIP_NUM_PINS 1 #define KVM_HALT_POLL_NS_DEFAULT 50000 /* s390-specific vcpu->requests bit members */ -- cgit v1.2.3 From 618e07865b7453d02410c1f3407c2d78a670eabb Mon Sep 17 00:00:00 2001 From: Barry Song Date: Thu, 18 Jun 2020 09:58:28 +1200 Subject: arm64: mm: reserve hugetlb CMA after numa_init hugetlb_cma_reserve() is called at the wrong place. numa_init has not been done yet. so all reserved memory will be located at node0. Fixes: cf11e85fc08c ("mm: hugetlb: optionally allocate gigantic hugepages using cma") Signed-off-by: Barry Song Reviewed-by: Anshuman Khandual Acked-by: Roman Gushchin Cc: Matthias Brugger Cc: Will Deacon Link: https://lore.kernel.org/r/20200617215828.25296-1-song.bao.hua@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index e631e6425165..1e93cfc7c47a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -404,11 +404,6 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; dma_contiguous_reserve(arm64_dma32_phys_limit); - -#ifdef CONFIG_ARM64_4K_PAGES - hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); -#endif - } void __init bootmem_init(void) @@ -424,6 +419,16 @@ void __init bootmem_init(void) min_low_pfn = min; arm64_numa_init(); + + /* + * must be done after arm64_numa_init() which calls numa_init() to + * initialize node_online_map that gets used in hugetlb_cma_reserve() + * while allocating required CMA size across online nodes. + */ +#ifdef CONFIG_ARM64_4K_PAGES + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); +#endif + /* * Sparsemem tries to allocate bootmem in memory_present(), so must be * done after the fixed reservations. -- cgit v1.2.3 From a13b9d0b97211579ea63b96c606de79b963c0f47 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 8 Jun 2020 20:15:09 -0700 Subject: x86/cpu: Use pinning mask for CR4 bits needing to be 0 The X86_CR4_FSGSBASE bit of CR4 should not change after boot[1]. Older kernels should enforce this bit to zero, and newer kernels need to enforce it depending on boot-time configuration (e.g. "nofsgsbase"). To support a pinned bit being either 1 or 0, use an explicit mask in combination with the expected pinned bit values. [1] https://lore.kernel.org/lkml/20200527103147.GI325280@hirez.programming.kicks-ass.net Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/202006082013.71E29A42@keescook --- arch/x86/kernel/cpu/common.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 043d93cdcaad..95c090a45b4b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -347,6 +347,9 @@ out: cr4_clear_bits(X86_CR4_UMIP); } +/* These bits should not change their value after CPU init is finished. */ +static const unsigned long cr4_pinned_mask = + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; @@ -371,20 +374,20 @@ EXPORT_SYMBOL(native_write_cr0); void native_write_cr4(unsigned long val) { - unsigned long bits_missing = 0; + unsigned long bits_changed = 0; set_register: asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits)); if (static_branch_likely(&cr_pinning)) { - if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) { - bits_missing = ~val & cr4_pinned_bits; - val |= bits_missing; + if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) { + bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits; + val = (val & ~cr4_pinned_mask) | cr4_pinned_bits; goto set_register; } - /* Warn after we've set the missing bits. */ - WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n", - bits_missing); + /* Warn after we've corrected the changed bits. */ + WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n", + bits_changed); } } #if IS_MODULE(CONFIG_LKDTM) @@ -419,7 +422,7 @@ void cr4_init(void) if (boot_cpu_has(X86_FEATURE_PCID)) cr4 |= X86_CR4_PCIDE; if (static_branch_likely(&cr_pinning)) - cr4 |= cr4_pinned_bits; + cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits; __write_cr4(cr4); @@ -434,10 +437,7 @@ void cr4_init(void) */ static void __init setup_cr_pinning(void) { - unsigned long mask; - - mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP); - cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask; + cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask; static_key_enable(&cr_pinning.key); } -- cgit v1.2.3 From bf508ec95ca3b902f14bb311a7709e5cb57fbc49 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Jun 2020 16:34:07 -0500 Subject: arm64: kexec_file: Use struct_size() in kmalloc() Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200617213407.GA1385@embeddedor Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec_file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 522e6f517ec0..361a1143e09e 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -219,8 +219,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) MEMBLOCK_NONE, &start, &end, NULL) nr_ranges++; - cmem = kmalloc(sizeof(struct crash_mem) + - sizeof(struct crash_mem_range) * nr_ranges, GFP_KERNEL); + cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); if (!cmem) return -ENOMEM; -- cgit v1.2.3 From 24ebec25fb270100e252b19c288e21bd7d8cc7f7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 May 2020 14:12:18 +0100 Subject: arm64: hw_breakpoint: Don't invoke overflow handler on uaccess watchpoints Unprivileged memory accesses generated by the so-called "translated" instructions (e.g. STTR) at EL1 can cause EL0 watchpoints to fire unexpectedly if kernel debugging is enabled. In such cases, the hw_breakpoint logic will invoke the user overflow handler which will typically raise a SIGTRAP back to the current task. This is futile when returning back to the kernel because (a) the signal won't have been delivered and (b) userspace can't handle the thing anyway. Avoid invoking the user overflow handler for watchpoints triggered by kernel uaccess routines, and instead single-step over the faulting instruction as we would if no overflow handler had been installed. (Fixes tag identifies the introduction of unprivileged memory accesses, which exposed this latent bug in the hw_breakpoint code) Cc: Catalin Marinas Cc: James Morse Fixes: 57f4959bad0a ("arm64: kernel: Add support for User Access Override") Reported-by: Luis Machado Signed-off-by: Will Deacon --- arch/arm64/kernel/hw_breakpoint.c | 44 +++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 0b727edf4104..af234a1e08b7 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -730,6 +730,27 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, return 0; } +static int watchpoint_report(struct perf_event *wp, unsigned long addr, + struct pt_regs *regs) +{ + int step = is_default_overflow_handler(wp); + struct arch_hw_breakpoint *info = counter_arch_bp(wp); + + info->trigger = addr; + + /* + * If we triggered a user watchpoint from a uaccess routine, then + * handle the stepping ourselves since userspace really can't help + * us with this. + */ + if (!user_mode(regs) && info->ctrl.privilege == AARCH64_BREAKPOINT_EL0) + step = 1; + else + perf_bp_event(wp, regs); + + return step; +} + static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -739,7 +760,6 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, u64 val; struct perf_event *wp, **slots; struct debug_info *debug_info; - struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; slots = this_cpu_ptr(wp_on_reg); @@ -777,25 +797,13 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, if (dist != 0) continue; - info = counter_arch_bp(wp); - info->trigger = addr; - perf_bp_event(wp, regs); - - /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(wp)) - step = 1; + step = watchpoint_report(wp, addr, regs); } - if (min_dist > 0 && min_dist != -1) { - /* No exact match found. */ - wp = slots[closest_match]; - info = counter_arch_bp(wp); - info->trigger = addr; - perf_bp_event(wp, regs); - /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(wp)) - step = 1; - } + /* No exact match found? */ + if (min_dist > 0 && min_dist != -1) + step = watchpoint_report(slots[closest_match], addr, regs); + rcu_read_unlock(); if (!step) -- cgit v1.2.3 From e353b325948d04f1305a7b4ad32eca87ba0823a2 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 26 May 2020 12:09:39 -0500 Subject: arm64: dts: imx8mm-beacon: Fix voltages on LDO1 and LDO2 LDO1 and LDO2 settings are wrong and case the voltage to go above the maximum level of 2.15V permitted by the SoC to 3.0V. This patch is based on work done on the i.MX8M Mini-EVK which utilizes the same fix. Fixes: 593816fa2f35 ("arm64: dts: imx: Add Beacon i.MX8m-Mini development kit") Signed-off-by: Adam Ford Reviewed-by: Daniel Baluta Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi index fb0137a8611c..94911b1707ef 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi @@ -136,7 +136,7 @@ ldo1_reg: LDO1 { regulator-name = "LDO1"; - regulator-min-microvolt = <3000000>; + regulator-min-microvolt = <1600000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; @@ -144,7 +144,7 @@ ldo2_reg: LDO2 { regulator-name = "LDO2"; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; -- cgit v1.2.3 From 0f1441b44e823a74f3f3780902a113e07c73fbf6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 12 Jun 2020 16:05:26 +0200 Subject: objtool: Fix noinstr vs KCOV Since many compilers cannot disable KCOV with a function attribute, help it to NOP out any __sanitizer_cov_*() calls injected in noinstr code. This turns: 12: e8 00 00 00 00 callq 17 13: R_X86_64_PLT32 __sanitizer_cov_trace_pc-0x4 into: 12: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 13: R_X86_64_NONE __sanitizer_cov_trace_pc-0x4 Just like recordmcount does. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Dmitry Vyukov --- arch/x86/Kconfig | 2 +- tools/objtool/arch.h | 2 ++ tools/objtool/arch/x86/decode.c | 18 ++++++++++++++++++ tools/objtool/arch/x86/include/arch_elf.h | 6 ++++++ tools/objtool/check.c | 19 +++++++++++++++++++ 5 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 tools/objtool/arch/x86/include/arch_elf.h (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6a0cc524882d..883da0abf779 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -67,7 +67,7 @@ config X86 select ARCH_HAS_FILTER_PGPROT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_HAS_KCOV if X86_64 + select ARCH_HAS_KCOV if X86_64 && STACK_VALIDATION select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h index eda15a5a285e..3c5967748abb 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/arch.h @@ -84,4 +84,6 @@ unsigned long arch_jump_destination(struct instruction *insn); unsigned long arch_dest_rela_offset(int addend); +const char *arch_nop_insn(int len); + #endif /* _ARCH_H */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 4b504fc90bbb..9872195f998b 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -565,3 +565,21 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state) state->regs[16].base = CFI_CFA; state->regs[16].offset = -8; } + +const char *arch_nop_insn(int len) +{ + static const char nops[5][5] = { + /* 1 */ { 0x90 }, + /* 2 */ { 0x66, 0x90 }, + /* 3 */ { 0x0f, 0x1f, 0x00 }, + /* 4 */ { 0x0f, 0x1f, 0x40, 0x00 }, + /* 5 */ { 0x0f, 0x1f, 0x44, 0x00, 0x00 }, + }; + + if (len < 1 || len > 5) { + WARN("invalid NOP size: %d\n", len); + return NULL; + } + + return nops[len-1]; +} diff --git a/tools/objtool/arch/x86/include/arch_elf.h b/tools/objtool/arch/x86/include/arch_elf.h new file mode 100644 index 000000000000..69cc4264b28a --- /dev/null +++ b/tools/objtool/arch/x86/include/arch_elf.h @@ -0,0 +1,6 @@ +#ifndef _OBJTOOL_ARCH_ELF +#define _OBJTOOL_ARCH_ELF + +#define R_NONE R_X86_64_NONE + +#endif /* _OBJTOOL_ARCH_ELF */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 91a67db26165..478267a072d0 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -12,6 +12,7 @@ #include "check.h" #include "special.h" #include "warn.h" +#include "arch_elf.h" #include #include @@ -765,6 +766,24 @@ static int add_call_destinations(struct objtool_file *file) } else insn->call_dest = rela->sym; + /* + * Many compilers cannot disable KCOV with a function attribute + * so they need a little help, NOP out any KCOV calls from noinstr + * text. + */ + if (insn->sec->noinstr && + !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) { + if (rela) { + rela->type = R_NONE; + elf_write_rela(file->elf, rela); + } + + elf_write_insn(file->elf, insn->sec, + insn->offset, insn->len, + arch_nop_insn(insn->len)); + insn->type = INSN_NOP; + } + /* * Whatever stack impact regular CALLs have, should be undone * by the RETURN of the called function. -- cgit v1.2.3 From 25f12ae45fc1931a1dce3cc59f9989a9d87834b0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:55 +0200 Subject: maccess: rename probe_kernel_address to get_kernel_nofault Better describe what this helper does, and match the naming of copy_from_kernel_nofault. Also switch the argument order around, so that it acts and looks like get_user(). Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/traps.c | 2 +- arch/arm/mm/alignment.c | 4 ++-- arch/arm64/kernel/traps.c | 2 +- arch/ia64/include/asm/sections.h | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/include/asm/sections.h | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- arch/riscv/kernel/kgdb.c | 4 ++-- arch/riscv/kernel/traps.c | 4 ++-- arch/s390/mm/fault.c | 2 +- arch/sh/kernel/traps.c | 2 +- arch/x86/kernel/probe_roms.c | 20 ++++++++++---------- arch/x86/kernel/traps.c | 2 +- arch/x86/mm/fault.c | 6 +++--- arch/x86/pci/pcbios.c | 2 +- include/linux/uaccess.h | 10 +++++----- lib/test_lockup.c | 6 +++--- 20 files changed, 40 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 65a3b1e75480..49ce15c3612d 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -396,7 +396,7 @@ int is_valid_bugaddr(unsigned long pc) u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif - if (probe_kernel_address((unsigned *)pc, bkpt)) + if (get_kernel_nofault(bkpt, (unsigned *)pc)) return 0; return bkpt == insn; diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 84718eddae60..81a627e6e1c5 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -774,7 +774,7 @@ static int alignment_get_arm(struct pt_regs *regs, u32 *ip, u32 *inst) if (user_mode(regs)) fault = get_user(instr, ip); else - fault = probe_kernel_address(ip, instr); + fault = get_kernel_nofault(instr, ip); *inst = __mem_to_opcode_arm(instr); @@ -789,7 +789,7 @@ static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst) if (user_mode(regs)) fault = get_user(instr, ip); else - fault = probe_kernel_address(ip, instr); + fault = get_kernel_nofault(instr, ip); *inst = __mem_to_opcode_thumb16(instr); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 50cc30acf106..227b2d9bae3d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -376,7 +376,7 @@ static int call_undef_hook(struct pt_regs *regs) if (!user_mode(regs)) { __le32 instr_le; - if (probe_kernel_address((__force __le32 *)pc, instr_le)) + if (get_kernel_nofault(instr_le, (__force __le32 *)pc)) goto exit; instr = le32_to_cpu(instr_le); } else if (compat_thumb_mode(regs)) { diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index cea15f2dd38d..ad4fc06e5f4b 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct fdesc *desc = ptr; void *p; - if (!probe_kernel_address(&desc->ip, p)) + if (!get_kernel_nofault(p, &desc->ip)) ptr = p; return ptr; } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 230a6422b99f..6c435dbccca0 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -293,7 +293,7 @@ void *dereference_function_descriptor(void *ptr) Elf64_Fdesc *desc = ptr; void *p; - if (!probe_kernel_address(&desc->addr, p)) + if (!get_kernel_nofault(p, &desc->addr)) ptr = p; return ptr; } diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index d19871763ed4..bd311616fca8 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -85,7 +85,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct ppc64_opd_entry *desc = ptr; void *p; - if (!probe_kernel_address(&desc->funcaddr, p)) + if (!get_kernel_nofault(p, &desc->funcaddr)) ptr = p; return ptr; } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 652b2852bea3..e14a1862a3ca 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -421,7 +421,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) unsigned int instr; struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = probe_kernel_address(addr, instr); + err = get_kernel_nofault(instr, addr); if (err) return err; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 6f96f65ebfe8..9cc792a3a6a9 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -289,7 +289,7 @@ int kprobe_handler(struct pt_regs *regs) if (!p) { unsigned int instr; - if (probe_kernel_address(addr, instr)) + if (get_kernel_nofault(instr, addr)) goto no_kprobe; if (instr != BREAKPOINT_INSTRUCTION) { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d4d0d1048500..30955a0c32d0 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1271,7 +1271,7 @@ static void show_instructions(struct pt_regs *regs) #endif if (!__kernel_text_address(pc) || - probe_kernel_address((const void *)pc, instr)) { + get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); } else { if (regs->nip == pc) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 73fa37ca40ef..040b9d01c079 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1069,7 +1069,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) ret = copy_from_user_nofault(&inst, (void __user *)regs->nip, sizeof(inst)); else - ret = probe_kernel_address((void *)regs->nip, inst); + ret = get_kernel_nofault(inst, (void *)regs->nip); if (!ret && mcheck_handle_load(regs, inst)) { regs->nip += 4; diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index a21fb21883e7..c3275f42d1ac 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -62,7 +62,7 @@ int get_step_address(struct pt_regs *regs, unsigned long *next_addr) unsigned int rs1_num, rs2_num; int op_code; - if (probe_kernel_address((void *)pc, op_code)) + if (get_kernel_nofault(op_code, (void *)pc)) return -EINVAL; if ((op_code & __INSN_LENGTH_MASK) != __INSN_LENGTH_GE_32) { if (is_c_jalr_insn(op_code) || is_c_jr_insn(op_code)) { @@ -146,7 +146,7 @@ int do_single_step(struct pt_regs *regs) return error; /* Store the op code in the stepped address */ - error = probe_kernel_address((void *)addr, stepped_opcode); + error = get_kernel_nofault(stepped_opcode, (void *)addr); if (error) return error; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index ecec1778e3a4..7d95cce5e47c 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -137,7 +137,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc) { bug_insn_t insn; - if (probe_kernel_address((bug_insn_t *)pc, insn)) + if (get_kernel_nofault(insn, (bug_insn_t *)pc)) return 0; return GET_INSN_LENGTH(insn); @@ -165,7 +165,7 @@ int is_valid_bugaddr(unsigned long pc) if (pc < VMALLOC_START) return 0; - if (probe_kernel_address((bug_insn_t *)pc, insn)) + if (get_kernel_nofault(insn, (bug_insn_t *)pc)) return 0; if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) return (insn == __BUG_INSN_32); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 6a24751557f0..d53c2e2ea1fd 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -105,7 +105,7 @@ static int bad_address(void *p) { unsigned long dummy; - return probe_kernel_address((unsigned long *)p, dummy); + return get_kernel_nofault(dummy, (unsigned long *)p); } static void dump_pagetable(unsigned long asce, unsigned long address) diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index a33025451fcd..9c3d32b80038 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -118,7 +118,7 @@ int is_valid_bugaddr(unsigned long addr) if (addr < PAGE_OFFSET) return 0; - if (probe_kernel_address((insn_size_t *)addr, opcode)) + if (get_kernel_nofault(opcode, (insn_size_t *)addr)) return 0; if (opcode == TRAPA_BUG_OPCODE) return 1; diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index ee0286390a4c..65b0dd2bf25c 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -99,7 +99,7 @@ static bool probe_list(struct pci_dev *pdev, unsigned short vendor, unsigned short device; do { - if (probe_kernel_address(rom_list, device) != 0) + if (get_kernel_nofault(device, rom_list) != 0) device = 0; if (device && match_id(pdev, vendor, device)) @@ -125,13 +125,13 @@ static struct resource *find_oprom(struct pci_dev *pdev) break; rom = isa_bus_to_virt(res->start); - if (probe_kernel_address(rom + 0x18, offset) != 0) + if (get_kernel_nofault(offset, rom + 0x18) != 0) continue; - if (probe_kernel_address(rom + offset + 0x4, vendor) != 0) + if (get_kernel_nofault(vendor, rom + offset + 0x4) != 0) continue; - if (probe_kernel_address(rom + offset + 0x6, device) != 0) + if (get_kernel_nofault(device, rom + offset + 0x6) != 0) continue; if (match_id(pdev, vendor, device)) { @@ -139,8 +139,8 @@ static struct resource *find_oprom(struct pci_dev *pdev) break; } - if (probe_kernel_address(rom + offset + 0x8, list) == 0 && - probe_kernel_address(rom + offset + 0xc, rev) == 0 && + if (get_kernel_nofault(list, rom + offset + 0x8) == 0 && + get_kernel_nofault(rev, rom + offset + 0xc) == 0 && rev >= 3 && list && probe_list(pdev, vendor, rom + offset + list)) { oprom = res; @@ -183,14 +183,14 @@ static int __init romsignature(const unsigned char *rom) const unsigned short * const ptr = (const unsigned short *)rom; unsigned short sig; - return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; + return get_kernel_nofault(sig, ptr) == 0 && sig == ROMSIGNATURE; } static int __init romchecksum(const unsigned char *rom, unsigned long length) { unsigned char sum, c; - for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) + for (sum = 0; length && get_kernel_nofault(c, rom++) == 0; length--) sum += c; return !length && !sum; } @@ -211,7 +211,7 @@ void __init probe_roms(void) video_rom_resource.start = start; - if (probe_kernel_address(rom + 2, c) != 0) + if (get_kernel_nofault(c, rom + 2) != 0) continue; /* 0 < length <= 0x7f * 512, historically */ @@ -249,7 +249,7 @@ void __init probe_roms(void) if (!romsignature(rom)) continue; - if (probe_kernel_address(rom + 2, c) != 0) + if (get_kernel_nofault(c, rom + 2) != 0) continue; /* 0 < length <= 0x7f * 512, historically */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7003f2e7b163..f9727b96961f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -91,7 +91,7 @@ int is_valid_bugaddr(unsigned long addr) if (addr < TASK_SIZE_MAX) return 0; - if (probe_kernel_address((unsigned short *)addr, ud)) + if (get_kernel_nofault(ud, (unsigned short *)addr)) return 0; return ud == INSN_UD0 || ud == INSN_UD2; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e996aa3833b8..1ead568c0101 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -99,7 +99,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, return !instr_lo || (instr_lo>>1) == 1; case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ - if (probe_kernel_address(instr, opcode)) + if (get_kernel_nofault(opcode, instr)) return 0; *prefetch = (instr_lo == 0xF) && @@ -133,7 +133,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) while (instr < max_instr) { unsigned char opcode; - if (probe_kernel_address(instr, opcode)) + if (get_kernel_nofault(opcode, instr)) break; instr++; @@ -301,7 +301,7 @@ static int bad_address(void *p) { unsigned long dummy; - return probe_kernel_address((unsigned long *)p, dummy); + return get_kernel_nofault(dummy, (unsigned long *)p); } static void dump_pagetable(unsigned long address) diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 9c97d814125e..4f15280732ed 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -302,7 +302,7 @@ static const struct pci_raw_ops *__init pci_find_bios(void) check <= (union bios32 *) __va(0xffff0); ++check) { long sig; - if (probe_kernel_address(&check->fields.signature, sig)) + if (get_kernel_nofault(sig, &check->fields.signature)) continue; if (check->fields.signature != BIOS32_SIGNATURE) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index bef48da242cc..a508a3c08879 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -318,14 +318,14 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long strnlen_user_nofault(const void __user *unsafe_addr, long count); /** - * probe_kernel_address(): safely attempt to read from a location - * @addr: address to read from - * @retval: read into this variable + * get_kernel_nofault(): safely attempt to read from a location + * @val: read into this variable + * @ptr: address to read from * * Returns 0 on success, or -EFAULT. */ -#define probe_kernel_address(addr, retval) \ - copy_from_kernel_nofault(&retval, addr, sizeof(retval)) +#define get_kernel_nofault(val, ptr) \ + copy_from_kernel_nofault(&(val), (ptr), sizeof(val)) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) diff --git a/lib/test_lockup.c b/lib/test_lockup.c index f258743a0d83..bd7c7ff39f6b 100644 --- a/lib/test_lockup.c +++ b/lib/test_lockup.c @@ -419,8 +419,8 @@ static bool test_kernel_ptr(unsigned long addr, int size) /* should be at least readable kernel address */ if (access_ok(ptr, 1) || access_ok(ptr + size - 1, 1) || - probe_kernel_address(ptr, buf) || - probe_kernel_address(ptr + size - 1, buf)) { + get_kernel_nofault(buf, ptr) || + get_kernel_nofault(buf, ptr + size - 1)) { pr_err("invalid kernel ptr: %#lx\n", addr); return true; } @@ -437,7 +437,7 @@ static bool __maybe_unused test_magic(unsigned long addr, int offset, if (!addr) return false; - if (probe_kernel_address(ptr, magic) || magic != expected) { + if (get_kernel_nofault(magic, ptr) || magic != expected) { pr_err("invalid magic at %#lx + %#x = %#x, expected %#x\n", addr, offset, magic, expected); return true; -- cgit v1.2.3 From 0c389d89abc28edf70ae847ee2fa55acb267b826 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 18 Jun 2020 12:10:37 -0700 Subject: maccess: make get_kernel_nofault() check for minimal type compatibility Now that we've renamed probe_kernel_address() to get_kernel_nofault() and made it look and behave more in line with get_user(), some of the subtle type behavior differences end up being more obvious and possibly dangerous. When you do get_user(val, user_ptr); the type of the access comes from the "user_ptr" part, and the above basically acts as val = *user_ptr; by design (except, of course, for the fact that the actual dereference is done with a user access). Note how in the above case, the type of the end result comes from the pointer argument, and then the value is cast to the type of 'val' as part of the assignment. So the type of the pointer is ultimately the more important type both for the access itself. But 'get_kernel_nofault()' may now _look_ similar, but it behaves very differently. When you do get_kernel_nofault(val, kernel_ptr); it behaves like val = *(typeof(val) *)kernel_ptr; except, of course, for the fact that the actual dereference is done with exception handling so that a faulting access is suppressed and returned as the error code. But note how different the casting behavior of the two superficially similar accesses are: one does the actual access in the size of the type the pointer points to, while the other does the access in the size of the target, and ignores the pointer type entirely. Actually changing get_kernel_nofault() to act like get_user() is almost certainly the right thing to do eventually, but in the meantime this patch adds logit to at least verify that the pointer type is compatible with the type of the result. In many cases, this involves just casting the pointer to 'void *' to make it obvious that the type of the pointer is not the important part. It's not how 'get_user()' acts, but at least the behavioral difference is now obvious and explicit. Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/traps.c | 2 +- arch/ia64/include/asm/sections.h | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/include/asm/sections.h | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/x86/kernel/probe_roms.c | 4 ++-- include/linux/uaccess.h | 6 ++++-- 7 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 49ce15c3612d..17d5a785df28 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -396,7 +396,7 @@ int is_valid_bugaddr(unsigned long pc) u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif - if (get_kernel_nofault(bkpt, (unsigned *)pc)) + if (get_kernel_nofault(bkpt, (void *)pc)) return 0; return bkpt == insn; diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index ad4fc06e5f4b..3a033d2008b3 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct fdesc *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->ip)) + if (!get_kernel_nofault(p, (void *)&desc->ip)) ptr = p; return ptr; } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 6c435dbccca0..b7abb12edd3a 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -293,7 +293,7 @@ void *dereference_function_descriptor(void *ptr) Elf64_Fdesc *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->addr)) + if (!get_kernel_nofault(p, (void *)&desc->addr)) ptr = p; return ptr; } diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index bd311616fca8..324d7b298ec3 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -85,7 +85,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct ppc64_opd_entry *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->funcaddr)) + if (!get_kernel_nofault(p, (void *)&desc->funcaddr)) ptr = p; return ptr; } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index e14a1862a3ca..409080208a6c 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -421,7 +421,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) unsigned int instr; struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = get_kernel_nofault(instr, addr); + err = get_kernel_nofault(instr, (unsigned *) addr); if (err) return err; diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 65b0dd2bf25c..9e1def3744f2 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -94,7 +94,7 @@ static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short } static bool probe_list(struct pci_dev *pdev, unsigned short vendor, - const unsigned char *rom_list) + const void *rom_list) { unsigned short device; @@ -119,7 +119,7 @@ static struct resource *find_oprom(struct pci_dev *pdev) for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) { struct resource *res = &adapter_rom_resources[i]; unsigned short offset, vendor, device, list, rev; - const unsigned char *rom; + const void *rom; if (res->end == 0) break; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index a508a3c08879..0a76ddc07d59 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -324,8 +324,10 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count); * * Returns 0 on success, or -EFAULT. */ -#define get_kernel_nofault(val, ptr) \ - copy_from_kernel_nofault(&(val), (ptr), sizeof(val)) +#define get_kernel_nofault(val, ptr) ({ \ + const typeof(val) *__gk_ptr = (ptr); \ + copy_from_kernel_nofault(&(val), __gk_ptr, sizeof(val));\ +}) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) -- cgit v1.2.3 From 10011f7d95dea311c0f2a3ea6725b5a2e97015a8 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 4 Jun 2020 20:39:25 +0300 Subject: ARCv2: support loop buffer (LPB) disabling On HS cores, loop buffer (LPB) is programmable in runtime and can be optionally disabled. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 6 ++++++ arch/arc/kernel/head.S | 8 ++++++++ 2 files changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 323014149e48..197896cfbd23 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -474,6 +474,12 @@ config ARC_IRQ_NO_AUTOSAVE This is programmable and can be optionally disabled in which case software INTERRUPT_PROLOGUE/EPILGUE do the needed work +config ARC_LPB_DISABLE + bool "Disable loop buffer (LPB)" + help + On HS cores, loop buffer (LPB) is programmable in runtime and can + be optionally disabled. + endif # ISA_ARCV2 endmenu # "ARC CPU Configuration" diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 6eb23f1545ee..17fd1ed700cc 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -59,6 +59,14 @@ bclr r5, r5, STATUS_AD_BIT #endif kflag r5 + +#ifdef CONFIG_ARC_LPB_DISABLE + lr r5, [ARC_REG_LPB_BUILD] + breq r5, 0, 1f ; LPB doesn't exist + mov r5, 1 + sr r5, [ARC_REG_LPB_CTRL] +1: +#endif /* CONFIG_ARC_LPB_DISABLE */ #endif ; Config DSP_CTRL properly, so kernel may use integer multiply, ; multiply-accumulate, and divide operations -- cgit v1.2.3 From e0d17c842c0f824fd4df9f4688709fc6907201e1 Mon Sep 17 00:00:00 2001 From: Yash Shah Date: Tue, 16 Jun 2020 19:33:06 +0530 Subject: RISC-V: Don't allow write+exec only page mapping request in mmap As per the table 4.4 of version "20190608-Priv-MSU-Ratified" of the RISC-V instruction set manual[0], the PTE permission bit combination of "write+exec only" is reserved for future use. Hence, don't allow such mapping request in mmap call. An issue is been reported by David Abdurachmanov, that while running stress-ng with "sysbadaddr" argument, RCU stalls are observed on RISC-V specific kernel. This issue arises when the stress-sysbadaddr request for pages with "write+exec only" permission bits and then passes the address obtain from this mmap call to various system call. For the riscv kernel, the mmap call should fail for this particular combination of permission bits since it's not valid. [0]: http://dabbelt.com/~palmer/keep/riscv-isa-manual/riscv-privileged-20190608-1.pdf Signed-off-by: Yash Shah Reported-by: David Abdurachmanov [Palmer: Refer to the latest ISA specification at the only link I could find, and update the terminology.] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sys_riscv.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index f3619f59d85c..12f8a7fce78b 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -8,6 +8,7 @@ #include #include #include +#include static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -16,6 +17,11 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, { if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) return -EINVAL; + + if ((prot & PROT_WRITE) && (prot & PROT_EXEC)) + if (unlikely(!(prot & PROT_READ))) + return -EINVAL; + return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> (PAGE_SHIFT - page_shift_offset)); } -- cgit v1.2.3 From 0e2c09011d4de4161f615ff860a605a9186cf62a Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 17 Jun 2020 13:37:32 -0700 Subject: RISC-V: Acquire mmap lock before invoking walk_page_range As per walk_page_range documentation, mmap lock should be acquired by the caller before invoking walk_page_range. mmap_assert_locked gets triggered without that. The details can be found here. http://lists.infradead.org/pipermail/linux-riscv/2020-June/010335.html Fixes: 395a21ff859c(riscv: add ARCH_HAS_SET_DIRECT_MAP support) Signed-off-by: Atish Patra Reviewed-by: Michel Lespinasse Reviewed-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/pageattr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index ec2c70f84994..289a9a5ea5b5 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -151,6 +151,7 @@ int set_memory_nx(unsigned long addr, int numpages) int set_direct_map_invalid_noflush(struct page *page) { + int ret; unsigned long start = (unsigned long)page_address(page); unsigned long end = start + PAGE_SIZE; struct pageattr_masks masks = { @@ -158,11 +159,16 @@ int set_direct_map_invalid_noflush(struct page *page) .clear_mask = __pgprot(_PAGE_PRESENT) }; - return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); + mmap_read_lock(&init_mm); + ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); + mmap_read_unlock(&init_mm); + + return ret; } int set_direct_map_default_noflush(struct page *page) { + int ret; unsigned long start = (unsigned long)page_address(page); unsigned long end = start + PAGE_SIZE; struct pageattr_masks masks = { @@ -170,7 +176,11 @@ int set_direct_map_default_noflush(struct page *page) .clear_mask = __pgprot(0) }; - return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); + mmap_read_lock(&init_mm); + ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); + mmap_read_unlock(&init_mm); + + return ret; } void __kernel_map_pages(struct page *page, int numpages, int enable) -- cgit v1.2.3 From f04a5ba1752512a46ffb61b88a8fa7d4ab7e02f3 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Jun 2020 09:58:12 +0200 Subject: x86/platform/intel-mid: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- arch/x86/platform/intel-mid/sfi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index b8f7f193f383..30bd5714a3d4 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -287,8 +287,8 @@ void intel_scu_devices_create(void) adapter = i2c_get_adapter(i2c_bus[i]); if (adapter) { - client = i2c_new_device(adapter, i2c_devs[i]); - if (!client) + client = i2c_new_client_device(adapter, i2c_devs[i]); + if (IS_ERR(client)) pr_err("can't create i2c device %s\n", i2c_devs[i]->type); } else -- cgit v1.2.3 From 49097762fa405cdc16f8f597f6d27c078d4a31e9 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 19 Jun 2020 11:40:46 +0200 Subject: Revert "KVM: VMX: Micro-optimize vmexit time when not exposing PMU" Guest crashes are observed on a Cascade Lake system when 'perf top' is launched on the host, e.g. BUG: unable to handle kernel paging request at fffffe0000073038 PGD 7ffa7067 P4D 7ffa7067 PUD 7ffa6067 PMD 7ffa5067 PTE ffffffffff120 Oops: 0000 [#1] SMP PTI CPU: 1 PID: 1 Comm: systemd Not tainted 4.18.0+ #380 ... Call Trace: serial8250_console_write+0xfe/0x1f0 call_console_drivers.constprop.0+0x9d/0x120 console_unlock+0x1ea/0x460 Call traces are different but the crash is imminent. The problem was blindly bisected to the commit 041bc42ce2d0 ("KVM: VMX: Micro-optimize vmexit time when not exposing PMU"). It was also confirmed that the issue goes away if PMU is exposed to the guest. With some instrumentation of the guest we can see what is being switched (when we do atomic_switch_perf_msrs()): vmx_vcpu_run: switching 2 msrs vmx_vcpu_run: switching MSR38f guest: 70000000d host: 70000000f vmx_vcpu_run: switching MSR3f1 guest: 0 host: 2 The current guess is that PEBS (MSR_IA32_PEBS_ENABLE, 0x3f1) is to blame. Regardless of whether PMU is exposed to the guest or not, PEBS needs to be disabled upon switch. This reverts commit 041bc42ce2d0efac3b85bbb81dea8c74b81f4ef9. Reported-by: Maxime Coquelin Signed-off-by: Vitaly Kuznetsov Message-Id: <20200619094046.654019-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 36c771728c8c..b1a23ad986ff 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6728,8 +6728,7 @@ reenter_guest: pt_guest_enter(vmx); - if (vcpu_to_pmu(vcpu)->version) - atomic_switch_perf_msrs(vmx); + atomic_switch_perf_msrs(vmx); atomic_switch_umwait_control_msr(vmx); if (enable_preemption_timer) -- cgit v1.2.3 From bb5570ad3b54e7930997aec76ab68256d5236d94 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 18 Jun 2020 11:20:02 +0100 Subject: x86/asm/64: Align start of __clear_user() loop to 16-bytes x86 CPUs can suffer severe performance drops if a tight loop, such as the ones in __clear_user(), straddles a 16-byte instruction fetch window, or worse, a 64-byte cacheline. This issues was discovered in the SUSE kernel with the following commit, 1153933703d9 ("x86/asm/64: Micro-optimize __clear_user() - Use immediate constants") which increased the code object size from 10 bytes to 15 bytes and caused the 8-byte copy loop in __clear_user() to be split across a 64-byte cacheline. Aligning the start of the loop to 16-bytes makes this fit neatly inside a single instruction fetch window again and restores the performance of __clear_user() which is used heavily when reading from /dev/zero. Here are some numbers from running libmicro's read_z* and pread_z* microbenchmarks which read from /dev/zero: Zen 1 (Naples) libmicro-file 5.7.0-rc6 5.7.0-rc6 5.7.0-rc6 revert-1153933703d9+ align16+ Time mean95-pread_z100k 9.9195 ( 0.00%) 5.9856 ( 39.66%) 5.9938 ( 39.58%) Time mean95-pread_z10k 1.1378 ( 0.00%) 0.7450 ( 34.52%) 0.7467 ( 34.38%) Time mean95-pread_z1k 0.2623 ( 0.00%) 0.2251 ( 14.18%) 0.2252 ( 14.15%) Time mean95-pread_zw100k 9.9974 ( 0.00%) 6.0648 ( 39.34%) 6.0756 ( 39.23%) Time mean95-read_z100k 9.8940 ( 0.00%) 5.9885 ( 39.47%) 5.9994 ( 39.36%) Time mean95-read_z10k 1.1394 ( 0.00%) 0.7483 ( 34.33%) 0.7482 ( 34.33%) Note that this doesn't affect Haswell or Broadwell microarchitectures which seem to avoid the alignment issue by executing the loop straight out of the Loop Stream Detector (verified using perf events). Fixes: 1153933703d9 ("x86/asm/64: Micro-optimize __clear_user() - Use immediate constants") Signed-off-by: Matt Fleming Signed-off-by: Borislav Petkov Cc: # v4.19+ Link: https://lkml.kernel.org/r/20200618102002.30034-1-matt@codeblueprint.co.uk --- arch/x86/lib/usercopy_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index fff28c6f73a2..b0dfac3d3df7 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -24,6 +24,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) asm volatile( " testq %[size8],%[size8]\n" " jz 4f\n" + " .align 16\n" "0: movq $0,(%[dst])\n" " addq $8,%[dst]\n" " decl %%ecx ; jnz 0b\n" -- cgit v1.2.3 From c0e1c8c22bebecef40097c80c1c74492ff96d081 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 15 Jun 2020 12:57:59 +0000 Subject: powerpc/8xx: Provide ptep_get() with 16k pages READ_ONCE() now enforces atomic read, which leads to: CC mm/gup.o In file included from ./include/linux/kernel.h:11:0, from mm/gup.c:2: In function 'gup_hugepte.constprop', inlined from 'gup_huge_pd.isra.79' at mm/gup.c:2465:8: ./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_222' declared with attribute error: Unsupported access size for {READ,WRITE}_ONCE(). _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:373:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^ ./include/linux/compiler.h:392:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:405:2: note: in expansion of macro 'compiletime_assert' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ ./include/linux/compiler.h:291:2: note: in expansion of macro 'compiletime_assert_rwonce_type' compiletime_assert_rwonce_type(x); \ ^ mm/gup.c:2428:8: note: in expansion of macro 'READ_ONCE' pte = READ_ONCE(*ptep); ^ In function 'gup_get_pte', inlined from 'gup_pte_range' at mm/gup.c:2228:9, inlined from 'gup_pmd_range' at mm/gup.c:2613:15, inlined from 'gup_pud_range' at mm/gup.c:2641:15, inlined from 'gup_p4d_range' at mm/gup.c:2666:15, inlined from 'gup_pgd_range' at mm/gup.c:2694:15, inlined from 'internal_get_user_pages_fast' at mm/gup.c:2795:3: ./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_219' declared with attribute error: Unsupported access size for {READ,WRITE}_ONCE(). _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:373:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^ ./include/linux/compiler.h:392:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:405:2: note: in expansion of macro 'compiletime_assert' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ ./include/linux/compiler.h:291:2: note: in expansion of macro 'compiletime_assert_rwonce_type' compiletime_assert_rwonce_type(x); \ ^ mm/gup.c:2199:9: note: in expansion of macro 'READ_ONCE' return READ_ONCE(*ptep); ^ make[2]: *** [mm/gup.o] Error 1 Define ptep_get() on 8xx when using 16k pages. Fixes: 9e343b467c70 ("READ_ONCE: Enforce atomicity for {READ,WRITE}_ONCE() memory accesses") Signed-off-by: Christophe Leroy Acked-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/341688399c1b102756046d19ea6ce39db1ae4742.1592225558.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 5a590ceaec14..b0afbdd07740 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -284,6 +284,16 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return __pte(pte_update(mm, addr, ptep, ~0, 0, 0)); } +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) +#define __HAVE_ARCH_PTEP_GET +static inline pte_t ptep_get(pte_t *ptep) +{ + pte_t pte = {READ_ONCE(ptep->pte), 0, 0, 0}; + + return pte; +} +#endif + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -- cgit v1.2.3 From 7e4773f73dcfb92e7e33532162f722ec291e75a4 Mon Sep 17 00:00:00 2001 From: Arseny Solokha Date: Sat, 13 Jun 2020 23:28:01 +0700 Subject: powerpc/fsl_booke/32: Fix build with CONFIG_RANDOMIZE_BASE Building the current 5.8 kernel for an e500 machine with CONFIG_RANDOMIZE_BASE=y and CONFIG_BLOCK=n yields the following failure: arch/powerpc/mm/nohash/kaslr_booke.c: In function 'kaslr_early_init': arch/powerpc/mm/nohash/kaslr_booke.c:387:2: error: implicit declaration of function 'flush_icache_range'; did you mean 'flush_tlb_range'? Indeed, including asm/cacheflush.h into kaslr_booke.c fixes the build. Fixes: 2b0e86cc5de6 ("powerpc/fsl_booke/32: implement KASLR infrastructure") Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Arseny Solokha Reviewed-by: Jason Yan Acked-by: Scott Wood [mpe: Tweak change log to mention CONFIG_BLOCK=n] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200613162801.1946619-1-asolokha@kb.kras.ru --- arch/powerpc/mm/nohash/kaslr_booke.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 4a75f2d9bf0e..bce0e5349978 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From c1ed1754f271f6b7acb1bfdc8cfb62220fbed423 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 11 Jun 2020 17:31:59 +0530 Subject: powerpc/kvm/book3s64: Fix kernel crash with nested kvm & DEBUG_VIRTUAL With CONFIG_DEBUG_VIRTUAL=y, __pa() checks for addr value and if it's less than PAGE_OFFSET it leads to a BUG(). #define __pa(x) ({ VIRTUAL_BUG_ON((unsigned long)(x) < PAGE_OFFSET); (unsigned long)(x) & 0x0fffffffffffffffUL; }) kernel BUG at arch/powerpc/kvm/book3s_64_mmu_radix.c:43! cpu 0x70: Vector: 700 (Program Check) at [c0000018a2187360] pc: c000000000161b30: __kvmhv_copy_tofrom_guest_radix+0x130/0x1f0 lr: c000000000161d5c: kvmhv_copy_from_guest_radix+0x3c/0x80 ... kvmhv_copy_from_guest_radix+0x3c/0x80 kvmhv_load_from_eaddr+0x48/0xc0 kvmppc_ld+0x98/0x1e0 kvmppc_load_last_inst+0x50/0x90 kvmppc_hv_emulate_mmio+0x288/0x2b0 kvmppc_book3s_radix_page_fault+0xd8/0x2b0 kvmppc_book3s_hv_page_fault+0x37c/0x1050 kvmppc_vcpu_run_hv+0xbb8/0x1080 kvmppc_vcpu_run+0x34/0x50 kvm_arch_vcpu_ioctl_run+0x2fc/0x410 kvm_vcpu_ioctl+0x2b4/0x8f0 ksys_ioctl+0xf4/0x150 sys_ioctl+0x28/0x80 system_call_exception+0x104/0x1d0 system_call_common+0xe8/0x214 kvmhv_copy_tofrom_guest_radix() uses a NULL value for to/from to indicate direction of copy. Avoid calling __pa() if the value is NULL to avoid the BUG(). Signed-off-by: Aneesh Kumar K.V [mpe: Massage change log a bit to mention CONFIG_DEBUG_VIRTUAL] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200611120159.680284-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index e738ea652192..6a73714759ba 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -40,7 +40,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */ if (kvmhv_on_pseries()) return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr, - __pa(to), __pa(from), n); + (to != NULL) ? __pa(to): 0, + (from != NULL) ? __pa(from): 0, n); quadrant = 1; if (!pid) -- cgit v1.2.3 From 7733306bd593c737c63110175da6c35b4b8bb32c Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 18 Jun 2020 18:12:54 +0100 Subject: KVM: arm64: Annotate hyp NMI-related functions as __always_inline The "inline" keyword is a hint for the compiler to inline a function. The functions system_uses_irq_prio_masking() and gic_write_pmr() are used by the code running at EL2 on a non-VHE system, so mark them as __always_inline to make sure they'll always be part of the .hyp.text section. This fixes the following splat when trying to run a VM: [ 47.625273] Kernel panic - not syncing: HYP panic: [ 47.625273] PS:a00003c9 PC:0000ca0b42049fc4 ESR:86000006 [ 47.625273] FAR:0000ca0b42049fc4 HPFAR:0000000010001000 PAR:0000000000000000 [ 47.625273] VCPU:0000000000000000 [ 47.647261] CPU: 1 PID: 217 Comm: kvm-vcpu-0 Not tainted 5.8.0-rc1-ARCH+ #61 [ 47.654508] Hardware name: Globalscale Marvell ESPRESSOBin Board (DT) [ 47.661139] Call trace: [ 47.663659] dump_backtrace+0x0/0x1cc [ 47.667413] show_stack+0x18/0x24 [ 47.670822] dump_stack+0xb8/0x108 [ 47.674312] panic+0x124/0x2f4 [ 47.677446] panic+0x0/0x2f4 [ 47.680407] SMP: stopping secondary CPUs [ 47.684439] Kernel Offset: disabled [ 47.688018] CPU features: 0x240402,20002008 [ 47.692318] Memory Limit: none [ 47.695465] ---[ end Kernel panic - not syncing: HYP panic: [ 47.695465] PS:a00003c9 PC:0000ca0b42049fc4 ESR:86000006 [ 47.695465] FAR:0000ca0b42049fc4 HPFAR:0000000010001000 PAR:0000000000000000 [ 47.695465] VCPU:0000000000000000 ]--- The instruction abort was caused by the code running at EL2 trying to fetch an instruction which wasn't mapped in the EL2 translation tables. Using objdump showed the two functions as separate symbols in the .text section. Fixes: 85738e05dc38 ("arm64: kvm: Unmask PMR before entering guest") Cc: stable@vger.kernel.org Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Acked-by: James Morse Link: https://lore.kernel.org/r/20200618171254.1596055-1-alexandru.elisei@arm.com --- arch/arm64/include/asm/arch_gicv3.h | 2 +- arch/arm64/include/asm/cpufeature.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index a358e97572c1..6647ae4f0231 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -109,7 +109,7 @@ static inline u32 gic_read_pmr(void) return read_sysreg_s(SYS_ICC_PMR_EL1); } -static inline void gic_write_pmr(u32 val) +static __always_inline void gic_write_pmr(u32 val) { write_sysreg_s(val, SYS_ICC_PMR_EL1); } diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5d1f4ae42799..f7c3d1ff091d 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -675,7 +675,7 @@ static inline bool system_supports_generic_auth(void) cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH); } -static inline bool system_uses_irq_prio_masking(void) +static __always_inline bool system_uses_irq_prio_masking(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); -- cgit v1.2.3 From 66b7e05dc0239c5817859f261098ba9cc2efbd2b Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 17 Jun 2020 11:54:56 +0100 Subject: KVM: arm64: Fix kvm_reset_vcpu() return code being incorrect with SVE If SVE is enabled then 'ret' can be assigned the return value of kvm_vcpu_enable_sve() which may be 0 causing future "goto out" sites to erroneously return 0 on failure rather than -EINVAL as expected. Remove the initialisation of 'ret' and make setting the return value explicit to avoid this situation in the future. Fixes: 9a3cdf26e336 ("KVM: arm64/sve: Allow userspace to enable SVE for vcpus") Cc: stable@vger.kernel.org Reported-by: James Morse Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200617105456.28245-1-steven.price@arm.com --- arch/arm64/kvm/reset.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d3b209023727..6ed36be51b4b 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -245,7 +245,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - int ret = -EINVAL; + int ret; bool loaded; u32 pstate; @@ -269,15 +269,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) { - if (kvm_vcpu_enable_ptrauth(vcpu)) + if (kvm_vcpu_enable_ptrauth(vcpu)) { + ret = -EINVAL; goto out; + } } switch (vcpu->arch.target) { default: if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { - if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) + if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) { + ret = -EINVAL; goto out; + } pstate = VCPU_RESET_PSTATE_SVC; } else { pstate = VCPU_RESET_PSTATE_EL1; -- cgit v1.2.3 From a25e91028ac2f544e0140aff2c9360a0e995dd86 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 22 Jun 2020 16:27:10 +0200 Subject: KVM: arm64: pvtime: Ensure task delay accounting is enabled Ensure we're actually accounting run_delay before we claim that we'll expose it to the guest. If we're not, then we just pretend like steal time isn't supported in order to avoid any confusion. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200622142710.18677-1-drjones@redhat.com --- arch/arm64/kvm/pvtime.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index 1e0f4c284888..f7b52ce1557e 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -73,6 +74,11 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) return base; } +static bool kvm_arm_pvtime_supported(void) +{ + return !!sched_info_on(); +} + int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { @@ -82,7 +88,8 @@ int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, int ret = 0; int idx; - if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA) + if (!kvm_arm_pvtime_supported() || + attr->attr != KVM_ARM_VCPU_PVTIME_IPA) return -ENXIO; if (get_user(ipa, user)) @@ -110,7 +117,8 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, u64 __user *user = (u64 __user *)attr->addr; u64 ipa; - if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA) + if (!kvm_arm_pvtime_supported() || + attr->attr != KVM_ARM_VCPU_PVTIME_IPA) return -ENXIO; ipa = vcpu->arch.steal.base; @@ -125,7 +133,8 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, { switch (attr->attr) { case KVM_ARM_VCPU_PVTIME_IPA: - return 0; + if (kvm_arm_pvtime_supported()) + return 0; } return -ENXIO; } -- cgit v1.2.3 From af28dfacbe00d53df5dec2bf50640df33138b1fe Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 22 Jun 2020 12:08:30 -0400 Subject: kvm: lapic: fix broken vcpu hotplug Guest fails to online hotplugged CPU with error smpboot: do_boot_cpu failed(-1) to wakeup CPU#4 It's caused by the fact that kvm_apic_set_state(), which used to call recalculate_apic_map() unconditionally and pulled hotplugged CPU into apic map, is updating map conditionally on state changes. In this case the APIC map is not considered dirty and the is not updated. Fix the issue by forcing unconditional update from kvm_apic_set_state(), like it used to be. Fixes: 4abaffce4d25a ("KVM: LAPIC: Recalculate apic map in batch") Cc: stable@vger.kernel.org Signed-off-by: Igor Mammedov Message-Id: <20200622160830.426022-1-imammedo@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 34a7e0533dad..6dc177da19da 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2567,6 +2567,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); + apic->vcpu->kvm->arch.apic_map_dirty = true; kvm_recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); -- cgit v1.2.3 From 44d527170731c75587e95052f3eea72b8c651daf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 22 Jun 2020 16:37:42 +0200 Subject: KVM: LAPIC: ensure APIC map is up to date on concurrent update requests The following race can cause lost map update events: cpu1 cpu2 apic_map_dirty = true ------------------------------------------------------------ kvm_recalculate_apic_map: pass check mutex_lock(&kvm->arch.apic_map_lock); if (!kvm->arch.apic_map_dirty) and in process of updating map ------------------------------------------------------------- other calls to apic_map_dirty = true might be too late for affected cpu ------------------------------------------------------------- apic_map_dirty = false ------------------------------------------------------------- kvm_recalculate_apic_map: bail out on if (!kvm->arch.apic_map_dirty) To fix it, record the beginning of an update of the APIC map in apic_map_dirty. If another APIC map change switches apic_map_dirty back to DIRTY during the update, kvm_recalculate_apic_map should not make it CLEAN, and the other caller will go through the slow path. Reported-by: Igor Mammedov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/lapic.c | 51 +++++++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f8998e97457f..f852ee350beb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -943,7 +943,7 @@ struct kvm_arch { atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map *apic_map; - bool apic_map_dirty; + atomic_t apic_map_dirty; bool apic_access_page_done; unsigned long apicv_inhibit_reasons; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6dc177da19da..5bf72fc86a8e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -169,6 +169,18 @@ static void kvm_apic_map_free(struct rcu_head *rcu) kvfree(map); } +/* + * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock. + * + * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with + * apic_map_lock_held. + */ +enum { + CLEAN, + UPDATE_IN_PROGRESS, + DIRTY +}; + void kvm_recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -176,17 +188,17 @@ void kvm_recalculate_apic_map(struct kvm *kvm) int i; u32 max_id = 255; /* enough space for any xAPIC ID */ - if (!kvm->arch.apic_map_dirty) { - /* - * Read kvm->arch.apic_map_dirty before - * kvm->arch.apic_map - */ - smp_rmb(); + /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */ + if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN) return; - } mutex_lock(&kvm->arch.apic_map_lock); - if (!kvm->arch.apic_map_dirty) { + /* + * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map + * (if clean) or the APIC registers (if dirty). + */ + if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty, + DIRTY, UPDATE_IN_PROGRESS) == CLEAN) { /* Someone else has updated the map. */ mutex_unlock(&kvm->arch.apic_map_lock); return; @@ -256,11 +268,11 @@ out: lockdep_is_held(&kvm->arch.apic_map_lock)); rcu_assign_pointer(kvm->arch.apic_map, new); /* - * Write kvm->arch.apic_map before - * clearing apic->apic_map_dirty + * Write kvm->arch.apic_map before clearing apic->apic_map_dirty. + * If another update has come in, leave it DIRTY. */ - smp_wmb(); - kvm->arch.apic_map_dirty = false; + atomic_cmpxchg_release(&kvm->arch.apic_map_dirty, + UPDATE_IN_PROGRESS, CLEAN); mutex_unlock(&kvm->arch.apic_map_lock); if (old) @@ -282,20 +294,20 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) else static_key_slow_inc(&apic_sw_disabled.key); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } } static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) { kvm_lapic_set_reg(apic, APIC_ID, id << 24); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) { kvm_lapic_set_reg(apic, APIC_LDR, id); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) @@ -311,7 +323,7 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) kvm_lapic_set_reg(apic, APIC_ID, id); kvm_lapic_set_reg(apic, APIC_LDR, ldr); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) @@ -1976,7 +1988,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_DFR: if (!apic_x2apic_mode(apic)) { kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } else ret = 1; break; @@ -2232,7 +2244,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) static_key_slow_dec_deferred(&apic_hw_disabled); } else { static_key_slow_inc(&apic_hw_disabled.key); - vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } } @@ -2273,7 +2285,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) if (!apic) return; - vcpu->kvm->arch.apic_map_dirty = false; /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); @@ -2567,7 +2578,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); kvm_recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); -- cgit v1.2.3 From 312d16c7c06174f44f96ef4a61c2936e6e360414 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 22 Jun 2020 17:14:35 +0200 Subject: KVM: x86/mmu: Avoid mixing gpa_t with gfn_t in walk_addr_generic() translate_gpa() returns a GPA, assigning it to 'real_gfn' seems obviously wrong. There is no real issue because both 'gpa_t' and 'gfn_t' are u64 and we don't use the value in 'real_gfn' as a GFN, we do real_gfn = gpa_to_gfn(real_gfn); instead. 'If you see a "buffalo" sign on an elephant's cage, do not trust your eyes', but let's fix it for good. No functional change intended. Signed-off-by: Vitaly Kuznetsov Message-Id: <20200622151435.752560-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/paging_tmpl.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index a6d484ea110b..58234bfaca07 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -360,7 +360,6 @@ retry_walk: ++walker->level; do { - gfn_t real_gfn; unsigned long host_addr; pt_access = pte_access; @@ -375,7 +374,7 @@ retry_walk: walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), + real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), nested_access, &walker->fault); @@ -389,12 +388,10 @@ retry_walk: * information to fix the exit_qualification or exit_info_1 * fields. */ - if (unlikely(real_gfn == UNMAPPED_GVA)) + if (unlikely(real_gpa == UNMAPPED_GVA)) return 0; - real_gfn = gpa_to_gfn(real_gfn); - - host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, real_gfn, + host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa), &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; -- cgit v1.2.3 From 2dbebf7ae1ed9a420d954305e2c9d5ed39ec57c3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 22 Jun 2020 14:58:29 -0700 Subject: KVM: nVMX: Plumb L2 GPA through to PML emulation Explicitly pass the L2 GPA to kvm_arch_write_log_dirty(), which for all intents and purposes is vmx_write_pml_buffer(), instead of having the latter pull the GPA from vmcs.GUEST_PHYSICAL_ADDRESS. If the dirty bit update is the result of KVM emulation (rare for L2), then the GPA in the VMCS may be stale and/or hold a completely unrelated GPA. Fixes: c5f983f6e8455 ("nVMX: Implement emulated Page Modification Logging") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20200622215832.22090-2-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.h | 2 +- arch/x86/kvm/mmu/mmu.c | 4 ++-- arch/x86/kvm/mmu/paging_tmpl.h | 7 ++++--- arch/x86/kvm/vmx/vmx.c | 6 +++--- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f852ee350beb..be5363b21540 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1220,7 +1220,7 @@ struct kvm_x86_ops { void (*enable_log_dirty_pt_masked)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t offset, unsigned long mask); - int (*write_log_dirty)(struct kvm_vcpu *vcpu); + int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 0ad06bfe2c2c..444bb9c54548 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -222,7 +222,7 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn); -int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); +int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa); int kvm_mmu_post_init_vm(struct kvm *kvm); void kvm_mmu_pre_destroy_vm(struct kvm *kvm); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index fdd05c233308..76817d13c86e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1745,10 +1745,10 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, * Emulate arch specific page modification logging for the * nested hypervisor */ -int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu) +int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa) { if (kvm_x86_ops.write_log_dirty) - return kvm_x86_ops.write_log_dirty(vcpu); + return kvm_x86_ops.write_log_dirty(vcpu, l2_gpa); return 0; } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 58234bfaca07..bd70ece1ef8b 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -235,7 +235,7 @@ static inline unsigned FNAME(gpte_access)(u64 gpte) static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, struct guest_walker *walker, - int write_fault) + gpa_t addr, int write_fault) { unsigned level, index; pt_element_t pte, orig_pte; @@ -260,7 +260,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, !(pte & PT_GUEST_DIRTY_MASK)) { trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); #if PTTYPE == PTTYPE_EPT - if (kvm_arch_write_log_dirty(vcpu)) + if (kvm_arch_write_log_dirty(vcpu, addr)) return -EINVAL; #endif pte |= PT_GUEST_DIRTY_MASK; @@ -454,7 +454,8 @@ retry_walk: (PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT); if (unlikely(!accessed_dirty)) { - ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); + ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, + addr, write_fault); if (unlikely(ret < 0)) goto error; else if (ret) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b1a23ad986ff..ad0ac8bc85d9 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7500,11 +7500,11 @@ static void vmx_flush_log_dirty(struct kvm *kvm) kvm_flush_pml_buffers(kvm); } -static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) +static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa) { struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); - gpa_t gpa, dst; + gpa_t dst; if (is_guest_mode(vcpu)) { WARN_ON_ONCE(vmx->nested.pml_full); @@ -7523,7 +7523,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) return 1; } - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; + gpa &= ~0xFFFull; dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, -- cgit v1.2.3 From bf09fb6cba4f7099620cc9ed32d94c27c4af992e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 22 Jun 2020 17:51:35 -0700 Subject: KVM: VMX: Stop context switching MSR_IA32_UMWAIT_CONTROL Remove support for context switching between the guest's and host's desired UMWAIT_CONTROL. Propagating the guest's value to hardware isn't required for correct functionality, e.g. KVM intercepts reads and writes to the MSR, and the latency effects of the settings controlled by the MSR are not architecturally visible. As a general rule, KVM should not allow the guest to control power management settings unless explicitly enabled by userspace, e.g. see KVM_CAP_X86_DISABLE_EXITS. E.g. Intel's SDM explicitly states that C0.2 can improve the performance of SMT siblings. A devious guest could disable C0.2 so as to improve the performance of their workloads at the detriment to workloads running in the host or on other VMs. Wholesale removal of UMWAIT_CONTROL context switching also fixes a race condition where updates from the host may cause KVM to enter the guest with the incorrect value. Because updates are are propagated to all CPUs via IPI (SMP function callback), the value in hardware may be stale with respect to the cached value and KVM could enter the guest with the wrong value in hardware. As above, the guest can't observe the bad value, but it's a weird and confusing wart in the implementation. Removal also fixes the unnecessary usage of VMX's atomic load/store MSR lists. Using the lists is only necessary for MSRs that are required for correct functionality immediately upon VM-Enter/VM-Exit, e.g. EFER on old hardware, or for MSRs that need to-the-uop precision, e.g. perf related MSRs. For UMWAIT_CONTROL, the effects are only visible in the kernel via TPAUSE/delay(), and KVM doesn't do any form of delay in vcpu_vmx_run(). Using the atomic lists is undesirable as they are more expensive than direct RDMSR/WRMSR. Furthermore, even if giving the guest control of the MSR is legitimate, e.g. in pass-through scenarios, it's not clear that the benefits would outweigh the overhead. E.g. saving and restoring an MSR across a VMX roundtrip costs ~250 cycles, and if the guest diverged from the host that cost would be paid on every run of the guest. In other words, if there is a legitimate use case then it should be enabled by a new per-VM capability. Note, KVM still needs to emulate MSR_IA32_UMWAIT_CONTROL so that it can correctly expose other WAITPKG features to the guest, e.g. TPAUSE, UMWAIT and UMONITOR. Fixes: 6e3ba4abcea56 ("KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL") Cc: stable@vger.kernel.org Cc: Jingqi Liu Cc: Tao Xu Signed-off-by: Sean Christopherson Message-Id: <20200623005135.10414-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/mwait.h | 2 -- arch/x86/kernel/cpu/umwait.c | 6 ------ arch/x86/kvm/vmx/vmx.c | 18 ------------------ 3 files changed, 26 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 73d997aa2966..e039a933aca3 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -25,8 +25,6 @@ #define TPAUSE_C01_STATE 1 #define TPAUSE_C02_STATE 0 -u32 get_umwait_control_msr(void); - static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index 300e3fd5ade3..ec8064c0ae03 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -18,12 +18,6 @@ */ static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); -u32 get_umwait_control_msr(void) -{ - return umwait_control_cached; -} -EXPORT_SYMBOL_GPL(get_umwait_control_msr); - /* * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by * hardware or BIOS before kernel boot. diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ad0ac8bc85d9..cb22f33bf1d8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6606,23 +6606,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host, false); } -static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) -{ - u32 host_umwait_control; - - if (!vmx_has_waitpkg(vmx)) - return; - - host_umwait_control = get_umwait_control_msr(); - - if (vmx->msr_ia32_umwait_control != host_umwait_control) - add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, - vmx->msr_ia32_umwait_control, - host_umwait_control, false); - else - clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); -} - static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6729,7 +6712,6 @@ reenter_guest: pt_guest_enter(vmx); atomic_switch_perf_msrs(vmx); - atomic_switch_umwait_control_msr(vmx); if (enable_preemption_timer) vmx_update_hv_timer(vcpu); -- cgit v1.2.3 From 04a2c05179b732a4c097f0a9c701ef4c9a37e1e3 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Thu, 28 May 2020 14:43:42 +0000 Subject: ARM: dts: imx6ul-kontron: Move watchdog from Kontron i.MX6UL/ULL board to SoM The watchdog's WDOG_ANY signal is used to trigger a POR of the SoC, if a soft reset is issued. As the SoM hardware connects the WDOG_ANY and the POR signals, the watchdog node itself and the pin configuration should be part of the common SoM devicetree. Let's move it from the baseboard's devicetree to its proper place. Fixes: 1ea4b76cdfde ("ARM: dts: imx6ul-kontron-n6310: Add Kontron i.MX6UL N6310 SoM and boards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi | 13 ------------- arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi | 13 +++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi index f05e91841202..53a25fba34f6 100644 --- a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi +++ b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi @@ -232,13 +232,6 @@ status = "okay"; }; -&wdog1 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_wdog>; - fsl,ext-reset-output; - status = "okay"; -}; - &iomuxc { pinctrl-0 = <&pinctrl_reset_out &pinctrl_gpio>; @@ -409,10 +402,4 @@ MX6UL_PAD_NAND_DATA03__USDHC2_DATA3 0x170f9 >; }; - - pinctrl_wdog: wdoggrp { - fsl,pins = < - MX6UL_PAD_GPIO1_IO09__WDOG1_WDOG_ANY 0x30b0 - >; - }; }; diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi index a17af4d9bfdf..fc316408721d 100644 --- a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi +++ b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi @@ -57,6 +57,13 @@ status = "okay"; }; +&wdog1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_wdog>; + fsl,ext-reset-output; + status = "okay"; +}; + &iomuxc { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_reset_out>; @@ -106,4 +113,10 @@ MX6UL_PAD_SNVS_TAMPER9__GPIO5_IO09 0x1b0b0 >; }; + + pinctrl_wdog: wdoggrp { + fsl,pins = < + MX6UL_PAD_GPIO1_IO09__WDOG1_WDOG_ANY 0x30b0 + >; + }; }; -- cgit v1.2.3 From d22a16cc92e04d053fd807ef3587e4f135e4206f Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Thu, 28 May 2020 14:43:43 +0000 Subject: ARM: dts: imx6ul-kontron: Change WDOG_ANY signal from push-pull to open-drain The WDOG_ANY signal is connected to the RESET_IN signal of the SoM and baseboard. It is currently configured as push-pull, which means that if some external device like a programmer wants to assert the RESET_IN signal by pulling it to ground, it drives against the high level WDOG_ANY output of the SoC. To fix this we set the WDOG_ANY signal to open-drain configuration. That way we make sure that the RESET_IN can be asserted by the watchdog as well as by external devices. Fixes: 1ea4b76cdfde ("ARM: dts: imx6ul-kontron-n6310: Add Kontron i.MX6UL N6310 SoM and boards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi index fc316408721d..61ba21a605a8 100644 --- a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi +++ b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi @@ -116,7 +116,7 @@ pinctrl_wdog: wdoggrp { fsl,pins = < - MX6UL_PAD_GPIO1_IO09__WDOG1_WDOG_ANY 0x30b0 + MX6UL_PAD_GPIO1_IO09__WDOG1_WDOG_ANY 0x18b0 >; }; }; -- cgit v1.2.3 From bf10bd0be53282183f374af23577b18b5fbf7801 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 16 Jun 2020 15:33:07 +0800 Subject: KVM: X86: Fix MSR range of APIC registers in X2APIC mode Only MSR address range 0x800 through 0x8ff is architecturally reserved and dedicated for accessing APIC registers in x2APIC mode. Fixes: 0105d1a52640 ("KVM: x2apic interface to lapic") Signed-off-by: Xiaoyao Li Message-Id: <20200616073307.16440-1-xiaoyao.li@intel.com> Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 00c88c2f34e4..29d9b078ce69 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2856,7 +2856,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return kvm_mtrr_set_msr(vcpu, msr, data); case MSR_IA32_APICBASE: return kvm_set_apic_base(vcpu, msr_info); - case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_write(vcpu, msr, data); case MSR_IA32_TSCDEADLINE: kvm_set_lapic_tscdeadline_msr(vcpu, data); @@ -3196,7 +3196,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_APICBASE: msr_info->data = kvm_get_apic_base(vcpu); break; - case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data); case MSR_IA32_TSCDEADLINE: msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu); -- cgit v1.2.3 From 26769f96e6231095f6b1cc3090c903280d44bb57 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 16 Jun 2020 08:47:41 -0300 Subject: KVM: x86: allow TSC to differ by NTP correction bounds without TSC scaling The Linux TSC calibration procedure is subject to small variations (its common to see +-1 kHz difference between reboots on a given CPU, for example). So migrating a guest between two hosts with identical processor can fail, in case of a small variation in calibrated TSC between them. Without TSC scaling, the current kernel interface will either return an error (if user_tsc_khz <= tsc_khz) or enable TSC catchup mode. This change enables the following TSC tolerance check to accept KVM_SET_TSC_KHZ within tsc_tolerance_ppm (which is 250ppm by default). /* * Compute the variation in TSC rate which is acceptable * within the range of tolerance and decide if the * rate being applied is within that bounds of the hardware * rate. If so, no scaling or compensation need be done. */ thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm); thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm); if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) { pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi); use_scaling = 1; } NTP daemon in the guest can correct this difference (NTP can correct upto 500ppm). Signed-off-by: Marcelo Tosatti Message-Id: <20200616114741.GA298183@fuller.cnet> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 29d9b078ce69..3b92db412335 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4603,7 +4603,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; user_tsc_khz = (u32)arg; - if (user_tsc_khz >= kvm_max_guest_tsc_khz) + if (kvm_has_tsc_control && + user_tsc_khz >= kvm_max_guest_tsc_khz) goto out; if (user_tsc_khz == 0) -- cgit v1.2.3 From e4553b4976d1178c13da295cb5c7b21f55baf8f9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 16 Jun 2020 20:41:23 -0700 Subject: KVM: VMX: Remove vcpu_vmx's defunct copy of host_pkru Remove vcpu_vmx.host_pkru, which got left behind when PKRU support was moved to common x86 code. No functional change intended. Fixes: 37486135d3a7b ("KVM: x86: Fix pkru save/restore when guest CR4.PKE=0, move it to x86.c") Signed-off-by: Sean Christopherson Message-Id: <20200617034123.25647-1-sean.j.christopherson@intel.com> Reviewed-by: Vitaly Kuznetsov Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 8a83b5edc820..639798e4a6ca 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -288,8 +288,6 @@ struct vcpu_vmx { u64 current_tsc_ratio; - u32 host_pkru; - unsigned long host_debugctlmsr; /* -- cgit v1.2.3 From a3f574cd65487cd993f79ab235d70229d9302c1e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 23 Jun 2020 10:44:08 +0100 Subject: KVM: arm64: vgic-v4: Plug race between non-residency and v4.1 doorbell When making a vPE non-resident because it has hit a blocking WFI, the doorbell can fire at any time after the write to the RD. Crucially, it can fire right between the write to GICR_VPENDBASER and the write to the pending_last field in the its_vpe structure. This means that we would overwrite pending_last with stale data, and potentially not wakeup until some unrelated event (such as a timer interrupt) puts the vPE back on the CPU. GICv4 isn't affected by this as we actively mask the doorbell on entering the guest, while GICv4.1 automatically manages doorbell delivery without any hypervisor-driven masking. Use the vpe_lock to synchronize such update, which solves the problem altogether. Fixes: ae699ad348cdc ("irqchip/gic-v4.1: Move doorbell management to the GICv4 abstraction layer") Reported-by: Zenghui Yu Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-v4.c | 8 ++++++++ drivers/irqchip/irq-gic-v3-its.c | 8 ++++++++ 2 files changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 27ac833e5ec7..b5fa73c9fd35 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -90,7 +90,15 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) !irqd_irq_disabled(&irq_to_desc(irq)->irq_data)) disable_irq_nosync(irq); + /* + * The v4.1 doorbell can fire concurrently with the vPE being + * made non-resident. Ensure we only update pending_last + * *after* the non-residency sequence has completed. + */ + raw_spin_lock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock); vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; + raw_spin_unlock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock); + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index cd685f521c77..205f69592471 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4054,16 +4054,24 @@ static void its_vpe_4_1_deschedule(struct its_vpe *vpe, u64 val; if (info->req_db) { + unsigned long flags; + /* * vPE is going to block: make the vPE non-resident with * PendingLast clear and DB set. The GIC guarantees that if * we read-back PendingLast clear, then a doorbell will be * delivered when an interrupt comes. + * + * Note the locking to deal with the concurrent update of + * pending_last from the doorbell interrupt handler that can + * run concurrently. */ + raw_spin_lock_irqsave(&vpe->vpe_lock, flags); val = its_clear_vpend_valid(vlpi_base, GICR_VPENDBASER_PendingLast, GICR_VPENDBASER_4_1_DB); vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast); + raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags); } else { /* * We're not blocking, so just make the vPE non-resident -- cgit v1.2.3 From e64a1618af8566d20991607913a4d90d39b30118 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 17 Jun 2020 17:30:28 +0200 Subject: s390: fix system call single stepping When single stepping an svc instruction on s390, the kernel is entered with a PER program check interruption. The program check handler than jumps to the system call handler by reloading the PSW. The code didn't set GPR13 to the thread pointer in struct task_struct. This made the kernel access invalid memory while trying to fetch the syscall function address. Fix this by always assigned GPR13 after .Lsysc_per. Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Reported-and-tested-by: Christian Borntraeger Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 496f74d98473..969b35b177dd 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -378,9 +378,9 @@ ENTRY(system_call) stmg %r8,%r15,__LC_SAVE_AREA_SYNC BPOFF lg %r12,__LC_CURRENT - lghi %r13,__TASK_thread lghi %r14,_PIF_SYSCALL .Lsysc_per: + lghi %r13,__TASK_thread lg %r15,__LC_KERNEL_STACK la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER -- cgit v1.2.3 From 998f5bbe3dbdab81c1cfb1aef7c3892f5d24f6c7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 17 Jun 2020 15:05:49 +0200 Subject: s390/kasan: fix early pgm check handler execution Currently if early_pgm_check_handler is called it ends up in pgm check loop. The problem is that early_pgm_check_handler is instrumented by KASAN but executed without DAT flag enabled which leads to addressing exception when KASAN checks try to access shadow memory. Fix that by executing early handlers with DAT flag on under KASAN as expected. Reported-and-tested-by: Alexander Egorenkov Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/early.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index cd241ee66eff..078277231858 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -170,6 +170,8 @@ static noinline __init void setup_lowcore_early(void) psw_t psw; psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; + if (IS_ENABLED(CONFIG_KASAN)) + psw.mask |= PSW_MASK_DAT; psw.addr = (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; psw.addr = (unsigned long) s390_base_pgm_handler; -- cgit v1.2.3 From 827c4913923e0b441ba07ba4cc41e01181102303 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 31 Mar 2020 05:57:23 -0400 Subject: s390/debug: avoid kernel warning on too large number of pages When specifying insanely large debug buffers a kernel warning is printed. The debug code does handle the error gracefully, though. Instead of duplicating the check let us silence the warning to avoid crashes when panic_on_warn is used. Signed-off-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 636446003a06..263075a1af36 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -198,9 +198,10 @@ static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas) if (!areas) goto fail_malloc_areas; for (i = 0; i < nr_areas; i++) { + /* GFP_NOWARN to avoid user triggerable WARN, we handle fails */ areas[i] = kmalloc_array(pages_per_area, sizeof(debug_entry_t *), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!areas[i]) goto fail_malloc_areas2; for (j = 0; j < pages_per_area; j++) { -- cgit v1.2.3 From 87676cfca14171fc4c99d96ae2f3e87780488ac4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Jun 2020 20:24:22 +0100 Subject: arm64: vdso: Disable dwarf unwinding through the sigreturn trampoline Commit 7e9f5e6629f6 ("arm64: vdso: Add --eh-frame-hdr to ldflags") results in a .eh_frame_hdr section for the vDSO, which in turn causes the libgcc unwinder to unwind out of signal handlers using the .eh_frame information populated by our .cfi directives. In conjunction with a4eb355a3fda ("arm64: vdso: Fix CFI directives in sigreturn trampoline"), this has been shown to cause segmentation faults originating from within the unwinder during thread cancellation: | Thread 14 "virtio-net-rx" received signal SIGSEGV, Segmentation fault. | 0x0000000000435e24 in uw_frame_state_for () | (gdb) bt | #0 0x0000000000435e24 in uw_frame_state_for () | #1 0x0000000000436e88 in _Unwind_ForcedUnwind_Phase2 () | #2 0x00000000004374d8 in _Unwind_ForcedUnwind () | #3 0x0000000000428400 in __pthread_unwind (buf=) at unwind.c:121 | #4 0x0000000000429808 in __do_cancel () at ./pthreadP.h:304 | #5 sigcancel_handler (sig=32, si=0xffff33c743f0, ctx=) at nptl-init.c:200 | #6 sigcancel_handler (sig=, si=0xffff33c743f0, ctx=) at nptl-init.c:165 | #7 | #8 futex_wait_cancelable (private=0, expected=0, futex_word=0x3890b708) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 After considerable bashing of heads, it appears that our CFI directives for unwinding out of the sigreturn trampoline are only processed by libgcc when both a .eh_frame_hdr section is present *and* the mysterious NOP is covered by an entry in .eh_frame. With both of these now in place, it has highlighted that our CFI directives are not comprehensive enough to restore the stack pointer of the interrupted context. This results in libgcc falling back to an arm64-specific unwinder after computing a bogus PC value from the unwind tables. The unwinder promptly dereferences this bogus address in an attempt to see if the pointed-to instruction sequence looks like the sigreturn trampoline. Restore the old unwind behaviour, which relied solely on heuristics in the unwinder, by removing the .eh_frame_hdr section from the vDSO and commenting out the insufficient CFI directives for now. Add comments to explain the current, miserable state of affairs. Cc: Tamas Zsoldos Cc: Szabolcs Nagy Cc: Catalin Marinas Cc: Daniel Kiss Acked-by: Dave Martin Reviewed-by: Vincenzo Frascino Reviewed-by: Ard Biesheuvel Reported-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 2 +- arch/arm64/kernel/vdso/sigreturn.S | 54 ++++++++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 556d424c6f52..1e5a940532da 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -24,7 +24,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ - -Bsymbolic --eh-frame-hdr --build-id -n $(btildflags-y) -T + -Bsymbolic --no-eh-frame-hdr --build-id -n $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S index 620a3ef837b7..0e18729abc3b 100644 --- a/arch/arm64/kernel/vdso/sigreturn.S +++ b/arch/arm64/kernel/vdso/sigreturn.S @@ -18,29 +18,40 @@ .text +/* + * NOTE!!! You may notice that all of the .cfi directives in this file have + * been commented out. This is because they have been shown to trigger segfaults + * in libgcc when unwinding out of a SIGCANCEL handler to invoke pthread + * cleanup handlers during the thread cancellation dance. By omitting the + * directives, we trigger an arm64-specific fallback path in the unwinder which + * recognises the signal frame and restores many of the registers directly from + * the sigcontext. Re-enabling the cfi directives here therefore needs to be + * much more comprehensive to reduce the risk of further regressions. + */ + /* Ensure that the mysterious NOP can be associated with a function. */ - .cfi_startproc +// .cfi_startproc /* - * .cfi_signal_frame causes the corresponding Frame Description Entry in the - * .eh_frame section to be annotated as a signal frame. This allows DWARF - * unwinders (e.g. libstdc++) to implement _Unwind_GetIPInfo(), which permits - * unwinding out of the signal trampoline without the need for the mysterious - * NOP. + * .cfi_signal_frame causes the corresponding Frame Description Entry (FDE) in + * the .eh_frame section to be annotated as a signal frame. This allows DWARF + * unwinders (e.g. libstdc++) to implement _Unwind_GetIPInfo() and identify + * the next frame using the unmodified return address instead of subtracting 1, + * which may yield the wrong FDE. */ - .cfi_signal_frame +// .cfi_signal_frame /* * Tell the unwinder where to locate the frame record linking back to the - * interrupted context. We don't provide unwind info for registers other - * than the frame pointer and the link register here; in practice, this - * is sufficient for unwinding in C/C++ based runtimes and the values in - * the sigcontext may have been modified by this point anyway. Debuggers + * interrupted context. We don't provide unwind info for registers other than + * the frame pointer and the link register here; in practice, this is likely to + * be insufficient for unwinding in C/C++ based runtimes, especially without a + * means to restore the stack pointer. Thankfully, unwinders and debuggers * already have baked-in strategies for attempting to unwind out of signals. */ - .cfi_def_cfa x29, 0 - .cfi_offset x29, 0 * 8 - .cfi_offset x30, 1 * 8 +// .cfi_def_cfa x29, 0 +// .cfi_offset x29, 0 * 8 +// .cfi_offset x30, 1 * 8 /* * This mysterious NOP is required for some unwinders (e.g. libc++) that @@ -51,16 +62,19 @@ nop // Mysterious NOP /* - * GDB relies on being able to identify the sigreturn instruction sequence to - * unwind from signal handlers. We cannot, therefore, use SYM_FUNC_START() - * here, as it will emit a BTI C instruction and break the unwinder. Thankfully, - * this function is only ever called from a RET and so omitting the landing pad - * is perfectly fine. + * GDB, libgcc and libunwind rely on being able to identify the sigreturn + * instruction sequence to unwind from signal handlers. We cannot, therefore, + * use SYM_FUNC_START() here, as it will emit a BTI C instruction and break the + * unwinder. Thankfully, this function is only ever called from a RET and so + * omitting the landing pad is perfectly fine. */ SYM_CODE_START(__kernel_rt_sigreturn) +// PLEASE DO NOT MODIFY mov x8, #__NR_rt_sigreturn +// PLEASE DO NOT MODIFY svc #0 - .cfi_endproc +// PLEASE DO NOT MODIFY +// .cfi_endproc SYM_CODE_END(__kernel_rt_sigreturn) emit_aarch64_feature_1_and -- cgit v1.2.3 From a39060b009ca0b5b5fe0c0dab85ed437531aab52 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Jun 2020 12:35:41 +0100 Subject: arm64: compat: Allow 32-bit vdso and sigpage to co-exist In preparation for removing the signal trampoline from the compat vDSO, allow the sigpage and the compat vDSO to co-exist. For the moment the vDSO signal trampoline will still be used when built. Subsequent patches will move to the sigpage consistently. Acked-by: Dave Martin Reviewed-by: Vincenzo Frascino Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu.h | 3 +++ arch/arm64/kernel/Makefile | 2 -- arch/arm64/kernel/signal32.c | 2 +- arch/arm64/kernel/vdso.c | 61 ++++++++++++++++++++------------------------ 4 files changed, 32 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 68140fdd89d6..8444df000181 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -19,6 +19,9 @@ typedef struct { atomic64_t id; +#ifdef CONFIG_COMPAT + void *sigpage; +#endif void *vdso; unsigned long flags; } mm_context_t; diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 151f28521f1e..a561cbb91d4d 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -29,9 +29,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o -ifneq ($(CONFIG_COMPAT_VDSO), y) obj-$(CONFIG_COMPAT) += sigreturn32.o -endif obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 82feca6f7052..0aa0b33744de 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -371,7 +371,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, if (ka->sa.sa_flags & SA_SIGINFO) idx += 3; - retcode = (unsigned long)current->mm->context.vdso + + retcode = (unsigned long)current->mm->context.sigpage + (idx << 2) + thumb; #endif } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 4e016574bd91..e546df0efefb 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -191,15 +191,12 @@ enum aarch32_map { #ifdef CONFIG_COMPAT_VDSO AA32_MAP_VVAR, AA32_MAP_VDSO, -#else - AA32_MAP_SIGPAGE #endif + AA32_MAP_SIGPAGE }; static struct page *aarch32_vectors_page __ro_after_init; -#ifndef CONFIG_COMPAT_VDSO static struct page *aarch32_sig_page __ro_after_init; -#endif static struct vm_special_mapping aarch32_vdso_maps[] = { [AA32_MAP_VECTORS] = { @@ -214,12 +211,11 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { .name = "[vdso]", .mremap = aarch32_vdso_mremap, }, -#else +#endif /* CONFIG_COMPAT_VDSO */ [AA32_MAP_SIGPAGE] = { .name = "[sigpage]", /* ABI */ .pages = &aarch32_sig_page, }, -#endif /* CONFIG_COMPAT_VDSO */ }; static int aarch32_alloc_kuser_vdso_page(void) @@ -242,27 +238,11 @@ static int aarch32_alloc_kuser_vdso_page(void) return 0; } -#ifdef CONFIG_COMPAT_VDSO -static int __aarch32_alloc_vdso_pages(void) -{ - int ret; - - vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; - vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; - - ret = __vdso_init(VDSO_ABI_AA32); - if (ret) - return ret; - - return aarch32_alloc_kuser_vdso_page(); -} -#else -static int __aarch32_alloc_vdso_pages(void) +static int aarch32_alloc_sigpage(void) { extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; unsigned long sigpage; - int ret; sigpage = get_zeroed_page(GFP_ATOMIC); if (!sigpage) @@ -271,18 +251,34 @@ static int __aarch32_alloc_vdso_pages(void) memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz); aarch32_sig_page = virt_to_page(sigpage); flush_dcache_page(aarch32_sig_page); + return 0; +} - ret = aarch32_alloc_kuser_vdso_page(); - if (ret) - free_page(sigpage); +#ifdef CONFIG_COMPAT_VDSO +static int __aarch32_alloc_vdso_pages(void) +{ + vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; + vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; - return ret; + return __vdso_init(VDSO_ABI_AA32); } #endif /* CONFIG_COMPAT_VDSO */ static int __init aarch32_alloc_vdso_pages(void) { - return __aarch32_alloc_vdso_pages(); + int ret; + +#ifdef CONFIG_COMPAT_VDSO + ret = __aarch32_alloc_vdso_pages(); + if (ret) + return ret; +#endif + + ret = aarch32_alloc_sigpage(); + if (ret) + return ret; + + return aarch32_alloc_kuser_vdso_page(); } arch_initcall(aarch32_alloc_vdso_pages); @@ -305,7 +301,6 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm) return PTR_ERR_OR_ZERO(ret); } -#ifndef CONFIG_COMPAT_VDSO static int aarch32_sigreturn_setup(struct mm_struct *mm) { unsigned long addr; @@ -328,12 +323,11 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm) if (IS_ERR(ret)) goto out; - mm->context.vdso = (void *)addr; + mm->context.sigpage = (void *)addr; out: return PTR_ERR_OR_ZERO(ret); } -#endif /* !CONFIG_COMPAT_VDSO */ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { @@ -352,10 +346,11 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) mm, bprm, uses_interp); -#else - ret = aarch32_sigreturn_setup(mm); + if (ret) + goto out; #endif /* CONFIG_COMPAT_VDSO */ + ret = aarch32_sigreturn_setup(mm); out: mmap_write_unlock(mm); return ret; -- cgit v1.2.3 From 8e411be6aad1387f40d60cb2c11d3260222c590b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Jun 2020 13:09:49 +0100 Subject: arm64: compat: Always use sigpage for sigreturn trampoline The 32-bit sigreturn trampoline in the compat sigpage matches the binary representation of the arch/arm/ sigpage exactly. This is important for debuggers (e.g. GDB) and unwinders (e.g. libunwind) since they rely on matching the instruction sequence in order to identify that they are unwinding through a signal. The same cannot be said for the sigreturn trampoline in the compat vDSO, which defeats the unwinder heuristics and instead attempts to use unwind directives for the unwinding. This is in contrast to arch/arm/, which never uses the vDSO for sigreturn. Ensure compatibility with arch/arm/ and existing unwinders by always using the sigpage for the sigreturn trampoline, regardless of the presence of the compat vDSO. Reviewed-by: Vincenzo Frascino Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/signal32.c | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 0aa0b33744de..2f507f565c48 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -342,30 +342,6 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, retcode = ptr_to_compat(ka->sa.sa_restorer); } else { /* Set up sigreturn pointer */ -#ifdef CONFIG_COMPAT_VDSO - void *vdso_base = current->mm->context.vdso; - void *vdso_trampoline; - - if (ka->sa.sa_flags & SA_SIGINFO) { - if (thumb) { - vdso_trampoline = VDSO_SYMBOL(vdso_base, - compat_rt_sigreturn_thumb); - } else { - vdso_trampoline = VDSO_SYMBOL(vdso_base, - compat_rt_sigreturn_arm); - } - } else { - if (thumb) { - vdso_trampoline = VDSO_SYMBOL(vdso_base, - compat_sigreturn_thumb); - } else { - vdso_trampoline = VDSO_SYMBOL(vdso_base, - compat_sigreturn_arm); - } - } - - retcode = ptr_to_compat(vdso_trampoline) + thumb; -#else unsigned int idx = thumb << 1; if (ka->sa.sa_flags & SA_SIGINFO) @@ -373,7 +349,6 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, retcode = (unsigned long)current->mm->context.sigpage + (idx << 2) + thumb; -#endif } regs->regs[0] = usig; -- cgit v1.2.3 From 2d071968a4052e58681ace6488e2625b2a30a7f7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Jun 2020 13:13:58 +0100 Subject: arm64: compat: Remove 32-bit sigreturn code from the vDSO The sigreturn code in the compat vDSO is unused. Remove it. Reviewed-by: Vincenzo Frascino Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso32/Makefile | 1 - arch/arm64/kernel/vdso32/sigreturn.S | 58 ------------------------------------ arch/arm64/kernel/vdso32/vdso.lds.S | 12 -------- 3 files changed, 71 deletions(-) delete mode 100644 arch/arm64/kernel/vdso32/sigreturn.S (limited to 'arch') diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 7ea1e827e505..d88148bef6b0 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -140,7 +140,6 @@ hostprogs := $(munge) c-obj-vdso := note.o c-obj-vdso-gettimeofday := vgettimeofday.o -asm-obj-vdso := sigreturn.o ifneq ($(c-gettimeofday-y),) VDSO_CFLAGS_gettimeofday_o += -include $(c-gettimeofday-y) diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S deleted file mode 100644 index b0091064c3d6..000000000000 --- a/arch/arm64/kernel/vdso32/sigreturn.S +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file provides both A32 and T32 versions, in accordance with the - * arm sigreturn code. - * - * Please read the comments in arch/arm64/kernel/vdso/sigreturn.S to - * understand some of the craziness in here. - * - * Copyright (C) 2018 ARM Limited - */ - -#include -#include -#include - - .text - - .arm - .fnstart - .save {r0-r15} - .pad #COMPAT_SIGFRAME_REGS_OFFSET - nop -SYM_CODE_START(__kernel_sigreturn_arm) - mov r7, #__NR_compat_sigreturn - svc #0 - .fnend -SYM_CODE_END(__kernel_sigreturn_arm) - - .fnstart - .save {r0-r15} - .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET - nop -SYM_CODE_START(__kernel_rt_sigreturn_arm) - mov r7, #__NR_compat_rt_sigreturn - svc #0 - .fnend -SYM_CODE_END(__kernel_rt_sigreturn_arm) - - .thumb - .fnstart - .save {r0-r15} - .pad #COMPAT_SIGFRAME_REGS_OFFSET - nop -SYM_CODE_START(__kernel_sigreturn_thumb) - mov r7, #__NR_compat_sigreturn - svc #0 - .fnend -SYM_CODE_END(__kernel_sigreturn_thumb) - - .fnstart - .save {r0-r15} - .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET - nop -SYM_CODE_START(__kernel_rt_sigreturn_thumb) - mov r7, #__NR_compat_rt_sigreturn - svc #0 - .fnend -SYM_CODE_END(__kernel_rt_sigreturn_thumb) diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S index a3944927eaeb..337d03522048 100644 --- a/arch/arm64/kernel/vdso32/vdso.lds.S +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -64,19 +64,7 @@ VERSION __vdso_clock_gettime; __vdso_gettimeofday; __vdso_clock_getres; - __kernel_sigreturn_arm; - __kernel_sigreturn_thumb; - __kernel_rt_sigreturn_arm; - __kernel_rt_sigreturn_thumb; __vdso_clock_gettime64; local: *; }; } - -/* - * Make the sigreturn code visible to the kernel. - */ -VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm; -VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb; -VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm; -VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb; -- cgit v1.2.3 From 4dc9b282bf5fc80b1761bac467adf78cd417b777 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 19 Jun 2020 13:35:50 +0100 Subject: arm64: Depend on newer binutils when building PAC Versions of binutils prior to 2.33.1 don't understand the ELF notes that are added by modern compilers to indicate the PAC and BTI options used to build the code. This causes them to emit large numbers of warnings in the form: aarch64-linux-gnu-nm: warning: .tmp_vmlinux.kallsyms2: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0000000 during the kernel build which is currently causing quite a bit of disruption for automated build testing using clang. In commit 15cd0e675f3f76b (arm64: Kconfig: ptrauth: Add binutils version check to fix mismatch) we added a dependency on binutils to avoid this issue when building with versions of GCC that emit the notes but did not do so for clang as it was believed that the existing check for .cfi_negate_ra_state was already requiring a new enough binutils. This does not appear to be the case for some versions of binutils (eg, the binutils in Debian 10) so instead refactor so we require a new enough GNU binutils in all cases other than when we are using an old GCC version that does not emit notes. Other, more exotic, combinations of tools are possible such as using clang, lld and gas together are possible and may have further problems but rather than adding further version checks it looks like the most robust thing will be to just test that we can build cleanly with the configured tools but that will require more review and discussion so do this for now to address the immediate problem disrupting build testing. Reported-by: KernelCI Reported-by: Nick Desaulniers Signed-off-by: Mark Brown Reviewed-by: Nick Desaulniers Link: https://github.com/ClangBuiltLinux/linux/issues/1054 Link: https://lore.kernel.org/r/20200619123550.48098-1-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4ae2419c14a8..e391e6580bf7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1518,9 +1518,9 @@ config ARM64_PTR_AUTH default y depends on !KVM || ARM64_VHE depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC - # GCC 9.1 and later inserts a .note.gnu.property section note for PAC + # Modern compilers insert a .note.gnu.property section note for PAC # which is only understood by binutils starting with version 2.33.1. - depends on !CC_IS_GCC || GCC_VERSION < 90100 || LD_VERSION >= 233010000 + depends on LD_IS_LLD || LD_VERSION >= 233010000 || (CC_IS_GCC && GCC_VERSION < 90100) depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) help -- cgit v1.2.3 From 586745f1598ccf71b0a5a6df2222dee0a865954e Mon Sep 17 00:00:00 2001 From: yu kuai Date: Thu, 4 Jun 2020 20:42:06 +0800 Subject: ARM: imx5: add missing put_device() call in imx_suspend_alloc_ocram() if of_find_device_by_node() succeed, imx_suspend_alloc_ocram() doesn't have a corresponding put_device(). Thus add a jump target to fix the exception handling for this function implementation. Fixes: 1579c7b9fe01 ("ARM: imx53: Set DDR pins to high impedance when in suspend to RAM.") Signed-off-by: yu kuai Signed-off-by: Shawn Guo --- arch/arm/mach-imx/pm-imx5.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/pm-imx5.c b/arch/arm/mach-imx/pm-imx5.c index f057df813f83..e9962b48e30c 100644 --- a/arch/arm/mach-imx/pm-imx5.c +++ b/arch/arm/mach-imx/pm-imx5.c @@ -295,14 +295,14 @@ static int __init imx_suspend_alloc_ocram( if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; - goto put_node; + goto put_device; } ocram_base = gen_pool_alloc(ocram_pool, size); if (!ocram_base) { pr_warn("%s: unable to alloc ocram!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } phys = gen_pool_virt_to_phys(ocram_pool, ocram_base); @@ -312,6 +312,8 @@ static int __init imx_suspend_alloc_ocram( if (virt_out) *virt_out = virt; +put_device: + put_device(&pdev->dev); put_node: of_node_put(node); -- cgit v1.2.3 From 4845446036fc9c13f43b54a65c9b757c14f5141b Mon Sep 17 00:00:00 2001 From: yu kuai Date: Thu, 4 Jun 2020 20:54:49 +0800 Subject: ARM: imx6: add missing put_device() call in imx6q_suspend_init() if of_find_device_by_node() succeed, imx6q_suspend_init() doesn't have a corresponding put_device(). Thus add a jump target to fix the exception handling for this function implementation. Signed-off-by: yu kuai Signed-off-by: Shawn Guo --- arch/arm/mach-imx/pm-imx6.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index dd34dff13762..40c74b4c4d73 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -493,14 +493,14 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; - goto put_node; + goto put_device; } ocram_base = gen_pool_alloc(ocram_pool, MX6Q_SUSPEND_OCRAM_SIZE); if (!ocram_base) { pr_warn("%s: unable to alloc ocram!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } ocram_pbase = gen_pool_virt_to_phys(ocram_pool, ocram_base); @@ -523,7 +523,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) ret = imx6_pm_get_base(&pm_info->mmdc_base, socdata->mmdc_compat); if (ret) { pr_warn("%s: failed to get mmdc base %d!\n", __func__, ret); - goto put_node; + goto put_device; } ret = imx6_pm_get_base(&pm_info->src_base, socdata->src_compat); @@ -570,7 +570,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) &imx6_suspend, MX6Q_SUSPEND_OCRAM_SIZE - sizeof(*pm_info)); - goto put_node; + goto put_device; pl310_cache_map_failed: iounmap(pm_info->gpc_base.vbase); @@ -580,6 +580,8 @@ iomuxc_map_failed: iounmap(pm_info->src_base.vbase); src_map_failed: iounmap(pm_info->mmdc_base.vbase); +put_device: + put_device(&pdev->dev); put_node: of_node_put(node); -- cgit v1.2.3 From 49a3b0e1c05ab3601100a723f7ea207dc99a492a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 24 Jun 2020 13:23:10 +0100 Subject: arm64: vdso: Only pass --no-eh-frame-hdr when linker supports it Commit 87676cfca141 ("arm64: vdso: Disable dwarf unwinding through the sigreturn trampoline") unconditionally passes the '--no-eh-frame-hdr' option to the linker when building the native vDSO in an attempt to prevent generation of the .eh_frame_hdr section, the presence of which has been implicated in segfaults originating from the libgcc unwinder. Unfortunately, not all versions of binutils support this option, which has been shown to cause build failures in linux-next: | CALL scripts/atomic/check-atomics.sh | CALL scripts/checksyscalls.sh | LD arch/arm64/kernel/vdso/vdso.so.dbg | ld: unrecognized option '--no-eh-frame-hdr' | ld: use the --help option for usage information | arch/arm64/kernel/vdso/Makefile:64: recipe for target | 'arch/arm64/kernel/vdso/vdso.so.dbg' failed | make[1]: *** [arch/arm64/kernel/vdso/vdso.so.dbg] Error 1 | arch/arm64/Makefile:175: recipe for target 'vdso_prepare' failed | make: *** [vdso_prepare] Error 2 Only link the vDSO with '--no-eh-frame-hdr' when the linker supports it. If we end up with the section due to linker defaults, the absence of CFI information in the sigreturn trampoline will prevent the unwinder from breaking. Link: https://lore.kernel.org/r/7a7e31a8-9a7b-2428-ad83-2264f20bdc2d@hisilicon.com Fixes: 87676cfca141 ("arm64: vdso: Disable dwarf unwinding through the sigreturn trampoline") Reported-by: Shaokun Zhang Tested-by: Jon Hunter Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 1e5a940532da..97d3d3632093 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -23,8 +23,9 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # potential future proofing if we end up with internal calls to the exported # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). -ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ - -Bsymbolic --no-eh-frame-hdr --build-id -n $(btildflags-y) -T +ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ + -Bsymbolic $(call ld-option, --no-eh-frame-hdr) --build-id -n \ + $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -- cgit v1.2.3 From e56404e8e475c91489b2cca57f2c1b2bc5edf6b2 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Wed, 24 Jun 2020 15:33:28 +0300 Subject: arm64: vdso: Don't use gcc plugins for building vgettimeofday.c Don't use gcc plugins for building arch/arm64/kernel/vdso/vgettimeofday.c to avoid unneeded instrumentation. Signed-off-by: Alexander Popov Link: https://lore.kernel.org/r/20200624123330.83226-4-alex.popov@linux.com Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 97d3d3632093..45d5cfe46429 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -30,7 +30,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n UBSAN_SANITIZE := n -- cgit v1.2.3 From 2d21889f8b5c50f65f5162bc972b0b1626b97be2 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 24 Jun 2020 13:22:54 +0200 Subject: arm64: Don't insert a BTI instruction at inner labels Some ftrace features are broken since commit 714a8d02ca4d ("arm64: asm: Override SYM_FUNC_START when building the kernel with BTI"). For example the function_graph tracer: $ echo function_graph > /sys/kernel/debug/tracing/current_tracer [ 36.107016] WARNING: CPU: 0 PID: 115 at kernel/trace/ftrace.c:2691 ftrace_modify_all_code+0xc8/0x14c When ftrace_modify_graph_caller() attempts to write a branch at ftrace_graph_call, it finds the "BTI J" instruction inserted by SYM_INNER_LABEL() instead of a NOP, and aborts. It turns out we don't currently need the BTI landing pads inserted by SYM_INNER_LABEL: * ftrace_call and ftrace_graph_call are only used for runtime patching of the active tracer. The patched code is not reached from a branch. * install_el2_stub is reached from a CBZ instruction, which doesn't change PSTATE.BTYPE. * __guest_exit is reached from B instructions in the hyp-entry vectors, which aren't subject to BTI checks either. Remove the BTI annotation from SYM_INNER_LABEL. Fixes: 714a8d02ca4d ("arm64: asm: Override SYM_FUNC_START when building the kernel with BTI") Signed-off-by: Jean-Philippe Brucker Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20200624112253.1602786-1-jean-philippe@linaro.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/linkage.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index 81fefd2a1d02..ba89a9af820a 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -12,7 +12,6 @@ * instead. */ #define BTI_C hint 34 ; -#define BTI_J hint 36 ; /* * When using in-kernel BTI we need to ensure that PCS-conformant assembly @@ -43,11 +42,6 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ BTI_C -#define SYM_INNER_LABEL(name, linkage) \ - .type name SYM_T_NONE ASM_NL \ - SYM_ENTRY(name, linkage, SYM_A_NONE) \ - BTI_J - #endif /* -- cgit v1.2.3 From f4617be35b4b547e82d30993f56d631dfc2d5f88 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Wed, 24 Jun 2020 18:04:06 +0530 Subject: arm64: kpti: Add KRYO{3, 4}XX silver CPU cores to kpti safelist QCOM KRYO{3,4}XX silver/LITTLE CPU cores are based on Cortex-A55 and are meltdown safe, hence add them to kpti_safe_list[]. Signed-off-by: Sai Prakash Ranjan Link: https://lore.kernel.org/r/20200624123406.3472-1-saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 4ae41670c2e6..9f63053a63a9 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1290,6 +1290,8 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } }; char const *str = "kpti command line option"; -- cgit v1.2.3 From 9fbbb7ddd96ed68e75a4e8f194225b5297f62bd4 Mon Sep 17 00:00:00 2001 From: "João H. Spies" Date: Tue, 23 Jun 2020 18:19:45 -0300 Subject: MIPS: ingenic: gcw0: Fix HP detection GPIO. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously marked as active high, but is in reality active low. Cc: stable@vger.kernel.org Fixes: b1bfdb660516 ("MIPS: ingenic: DTS: Update GCW0 support") Signed-off-by: João H. Spies Tested-by: Paul Cercueil Reviewed-by: Paul Cercueil Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/dts/ingenic/gcw0.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/boot/dts/ingenic/gcw0.dts b/arch/mips/boot/dts/ingenic/gcw0.dts index 8d22828787d8..bc72304a2440 100644 --- a/arch/mips/boot/dts/ingenic/gcw0.dts +++ b/arch/mips/boot/dts/ingenic/gcw0.dts @@ -92,7 +92,7 @@ "MIC1N", "Built-in Mic"; simple-audio-card,pin-switches = "Speaker", "Headphones"; - simple-audio-card,hp-det-gpio = <&gpf 21 GPIO_ACTIVE_HIGH>; + simple-audio-card,hp-det-gpio = <&gpf 21 GPIO_ACTIVE_LOW>; simple-audio-card,aux-devs = <&speaker_amp>, <&headphones_amp>; simple-audio-card,bitclock-master = <&dai_codec>; -- cgit v1.2.3 From e3a9e681adb779b39565a28b3252c3be1033f994 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Jun 2020 18:21:16 +0200 Subject: x86/entry: Fixup bad_iret vs noinstr vmlinux.o: warning: objtool: fixup_bad_iret()+0x8e: call to memcpy() leaves .noinstr.text section Worse, when KASAN there is no telling what memcpy() actually is. Force the use of __memcpy() which is our assmebly implementation. Reported-by: Marco Elver Suggested-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200618144801.760070502@infradead.org --- arch/x86/kernel/traps.c | 6 +++--- arch/x86/lib/memcpy_64.S | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index af75109485c2..a7d157090572 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -690,13 +690,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; /* Copy the IRET target to the temporary storage. */ - memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8); + __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8); /* Copy the remainder of the stack from the current stack. */ - memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip)); + __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip)); /* Update the entry stack */ - memcpy(new_stack, &tmp, sizeof(tmp)); + __memcpy(new_stack, &tmp, sizeof(tmp)); BUG_ON(!user_mode(&new_stack->regs)); return new_stack; diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 56b243b14c3a..bbcc05bcefad 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -8,6 +8,8 @@ #include #include +.pushsection .noinstr.text, "ax" + /* * We build a jump to memcpy_orig by default which gets NOPped out on * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which @@ -184,6 +186,8 @@ SYM_FUNC_START_LOCAL(memcpy_orig) retq SYM_FUNC_END(memcpy_orig) +.popsection + #ifndef CONFIG_UML MCSAFE_TEST_CTL -- cgit v1.2.3 From c7aadc09321d8f9a1d3bd1e6d8a47222ecddf6c5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Jun 2020 18:25:57 +0200 Subject: x86/entry: Increase entry_stack size to a full page Marco crashed in bad_iret with a Clang11/KCSAN build due to overflowing the stack. Now that we run C code on it, expand it to a full page. Suggested-by: Andy Lutomirski Reported-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Lai Jiangshan Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200618144801.819246178@infradead.org --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 42cd333616c4..03b7c4ca425a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -370,7 +370,7 @@ struct x86_hw_tss { #define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) struct entry_stack { - unsigned long words[64]; + char stack[PAGE_SIZE]; }; struct entry_stack_page { -- cgit v1.2.3 From 145a773aef83181d47ebab21bb33c89233aadb1e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Jun 2020 13:28:36 +0200 Subject: x86/entry: Fix #UD vs WARN more vmlinux.o: warning: objtool: exc_invalid_op()+0x47: call to probe_kernel_read() leaves .noinstr.text section Since we use UD2 as a short-cut for 'CALL __WARN', treat it as such. Have the bare exception handler do the report_bug() thing. Fixes: 15a416e8aaa7 ("x86/entry: Treat BUG/WARN as NMI-like entries") Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200622114713.GE577403@hirez.programming.kicks-ass.net --- arch/x86/kernel/traps.c | 72 ++++++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a7d157090572..1d9ea2101b97 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -84,17 +84,16 @@ static inline void cond_local_irq_disable(struct pt_regs *regs) local_irq_disable(); } -int is_valid_bugaddr(unsigned long addr) +__always_inline int is_valid_bugaddr(unsigned long addr) { - unsigned short ud; - if (addr < TASK_SIZE_MAX) return 0; - if (probe_kernel_address((unsigned short *)addr, ud)) - return 0; - - return ud == INSN_UD0 || ud == INSN_UD2; + /* + * We got #UD, if the text isn't readable we'd have gotten + * a different exception. + */ + return *(unsigned short *)addr == INSN_UD2; } static nokprobe_inline int @@ -216,40 +215,45 @@ static inline void handle_invalid_op(struct pt_regs *regs) ILL_ILLOPN, error_get_trap_addr(regs)); } -DEFINE_IDTENTRY_RAW(exc_invalid_op) +static noinstr bool handle_bug(struct pt_regs *regs) { - bool rcu_exit; + bool handled = false; + + if (!is_valid_bugaddr(regs->ip)) + return handled; /* - * Handle BUG/WARN like NMIs instead of like normal idtentries: - * if we bugged/warned in a bad RCU context, for example, the last - * thing we want is to BUG/WARN again in the idtentry code, ad - * infinitum. + * All lies, just get the WARN/BUG out. */ - if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { - enum bug_trap_type type; + instrumentation_begin(); + /* + * Since we're emulating a CALL with exceptions, restore the interrupt + * state to what it was at the exception site. + */ + if (regs->flags & X86_EFLAGS_IF) + raw_local_irq_enable(); + if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { + regs->ip += LEN_UD2; + handled = true; + } + if (regs->flags & X86_EFLAGS_IF) + raw_local_irq_disable(); + instrumentation_end(); - nmi_enter(); - instrumentation_begin(); - trace_hardirqs_off_finish(); - type = report_bug(regs->ip, regs); - if (regs->flags & X86_EFLAGS_IF) - trace_hardirqs_on_prepare(); - instrumentation_end(); - nmi_exit(); + return handled; +} - if (type == BUG_TRAP_TYPE_WARN) { - /* Skip the ud2. */ - regs->ip += LEN_UD2; - return; - } +DEFINE_IDTENTRY_RAW(exc_invalid_op) +{ + bool rcu_exit; - /* - * Else, if this was a BUG and report_bug returns or if this - * was just a normal #UD, we want to continue onward and - * crash. - */ - } + /* + * We use UD2 as a short encoding for 'CALL __WARN', as such + * handle it before exception entry to avoid recursive WARN + * in case exception entry is the one triggering WARNs. + */ + if (!user_mode(regs) && handle_bug(regs)) + return; rcu_exit = idtentry_enter_cond_rcu(regs); instrumentation_begin(); -- cgit v1.2.3 From 8dfe804a4031ca6ba3a3efb2048534249b64f3a5 Mon Sep 17 00:00:00 2001 From: Jiping Ma Date: Mon, 11 May 2020 10:52:07 +0800 Subject: arm64: perf: Report the PC value in REGS_ABI_32 mode A 32-bit perf querying the registers of a compat task using REGS_ABI_32 will receive zeroes from w15, when it expects to find the PC. Return the PC value for register dwarf register 15 when returning register values for a compat task to perf. Cc: Acked-by: Mark Rutland Signed-off-by: Jiping Ma Link: https://lore.kernel.org/r/1589165527-188401-1-git-send-email-jiping.ma2@windriver.com [will: Shuffled code and added a comment] Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_regs.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 0bbac612146e..666b225aeb3a 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -15,15 +15,34 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) return 0; /* - * Compat (i.e. 32 bit) mode: - * - PC has been set in the pt_regs struct in kernel_entry, - * - Handle SP and LR here. + * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but + * we're stuck with it for ABI compatability reasons. + * + * For a 32-bit consumer inspecting a 32-bit task, then it will look at + * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h). + * These correspond directly to a prefix of the registers saved in our + * 'struct pt_regs', with the exception of the PC, so we copy that down + * (x15 corresponds to SP_hyp in the architecture). + * + * So far, so good. + * + * The oddity arises when a 64-bit consumer looks at a 32-bit task and + * asks for registers beyond PERF_REG_ARM_MAX. In this case, we return + * SP_usr, LR_usr and PC in the positions where the AArch64 SP, LR and + * PC registers would normally live. The initial idea was to allow a + * 64-bit unwinder to unwind a 32-bit task and, although it's not clear + * how well that works in practice, somebody might be relying on it. + * + * At the time we make a sample, we don't know whether the consumer is + * 32-bit or 64-bit, so we have to cater for both possibilities. */ if (compat_user_mode(regs)) { if ((u32)idx == PERF_REG_ARM64_SP) return regs->compat_sp; if ((u32)idx == PERF_REG_ARM64_LR) return regs->compat_lr; + if (idx == 15) + return regs->pc; } if ((u32)idx == PERF_REG_ARM64_SP) -- cgit v1.2.3 From 108447fd0d1a34b0929cd26dc637c917a734ebab Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Thu, 25 Jun 2020 16:01:23 +0530 Subject: arm64: Add KRYO{3,4}XX silver CPU cores to SSB safelist QCOM KRYO{3,4}XX silver/LITTLE CPU cores are based on Cortex-A55 and are SSB safe, hence add them to SSB safelist -> arm64_ssb_cpus[]. Reported-by: Stephen Boyd Signed-off-by: Sai Prakash Ranjan Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20200625103123.7240-1-saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ad06d6802d2e..cf50c53e9357 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -460,6 +460,8 @@ static const struct midr_range arm64_ssb_cpus[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), {}, }; -- cgit v1.2.3 From a0fc3b32893b29a7b3a2771b6d63bae16cb1e8de Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 09:13:22 +0800 Subject: riscv: Add -fPIC option to CFLAGS_vgettimeofday.o The time related vDSO functions use a variable, vdso_data, to access the vDSO data page to get the system time information. Because the vdso_data for CFLAGS_vgettimeofday.o is an external variable defined in vdso.o, the CFLAGS_vgettimeofday.o should be compiled with -fPIC to ensure that vdso_data is addressable. Reported-by: kernel test robot Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 38ba55b0eb9d..29cf052f6541 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -17,7 +17,7 @@ vdso-syms += flush_icache obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ifneq ($(c-gettimeofday-y),) - CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) endif # Build rules -- cgit v1.2.3 From e93b327dbf3d37f0dfb123b58f9627ad17be652e Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:40:21 +0800 Subject: riscv: Add extern declarations for vDSO time-related functions Add extern declarations for vDSO time-related functions to notify the compiler these functions will be used in somewhere to avoid "no previous prototype" compile warning. Reported-by: kernel test robot Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/vgettimeofday.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/riscv/kernel/vdso/vgettimeofday.c b/arch/riscv/kernel/vdso/vgettimeofday.c index d264943e2e47..cc0d80699c31 100644 --- a/arch/riscv/kernel/vdso/vgettimeofday.c +++ b/arch/riscv/kernel/vdso/vgettimeofday.c @@ -9,16 +9,22 @@ #include #include +extern +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { return __cvdso_clock_gettime(clock, ts); } +extern +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } +extern +int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res); int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res) { return __cvdso_clock_getres(clock_id, res); -- cgit v1.2.3 From e05d57dcb8c71492268ff46ba9bfe9a9cfb1f95d Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 23 Jun 2020 09:50:54 +0000 Subject: riscv: Fixup __vdso_gettimeofday broke dynamic ftrace For linux-5.8-rc1, enable ftrace of riscv will cause boot panic: [ 2.388980] Run /sbin/init as init process [ 2.529938] init[39]: unhandled signal 4 code 0x1 at 0x0000003ff449e000 [ 2.531078] CPU: 0 PID: 39 Comm: init Not tainted 5.8.0-rc1-dirty #13 [ 2.532719] epc: 0000003ff449e000 ra : 0000003ff449e954 sp : 0000003fffedb900 [ 2.534005] gp : 00000000000e8528 tp : 0000003ff449d800 t0 : 000000000000001e [ 2.534965] t1 : 000000000000000a t2 : 0000003fffedb89e s0 : 0000003fffedb920 [ 2.536279] s1 : 0000003fffedb940 a0 : 0000003ff43d4b2c a1 : 0000000000000000 [ 2.537334] a2 : 0000000000000001 a3 : 0000000000000000 a4 : fffffffffbad8000 [ 2.538466] a5 : 0000003ff449e93a a6 : 0000000000000000 a7 : 0000000000000000 [ 2.539511] s2 : 0000000000000000 s3 : 0000003ff448412c s4 : 0000000000000010 [ 2.541260] s5 : 0000000000000016 s6 : 00000000000d0a30 s7 : 0000003fffedba70 [ 2.542152] s8 : 0000000000000000 s9 : 0000000000000000 s10: 0000003fffedb960 [ 2.543335] s11: 0000000000000000 t3 : 0000000000000000 t4 : 0000003fffedb8a0 [ 2.544471] t5 : 0000000000000000 t6 : 0000000000000000 [ 2.545730] status: 0000000000004020 badaddr: 00000000464c457f cause: 0000000000000002 [ 2.549867] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000004 [ 2.551267] CPU: 0 PID: 1 Comm: init Not tainted 5.8.0-rc1-dirty #13 [ 2.552061] Call Trace: [ 2.552626] [] walk_stackframe+0x0/0xc4 [ 2.553486] [] show_stack+0x40/0x4c [ 2.553995] [] dump_stack+0x7a/0x98 [ 2.554615] [] panic+0x114/0x2f4 [ 2.555395] [] do_exit+0x89c/0x8c2 [ 2.555949] [] do_group_exit+0x3a/0x90 [ 2.556715] [] get_signal+0xe2/0x6e6 [ 2.557388] [] do_notify_resume+0x6a/0x37a [ 2.558089] [] ret_from_exception+0x0/0xc "ra:0x3ff449e954" is the return address of "call _mcount" in the prologue of __vdso_gettimeofday(). Without proper relocate, pc jmp to 0x0000003ff449e000 (vdso map base) with a illegal instruction trap. The solution comes from arch/arm64/kernel/vdso/Makefile: CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) - CC_FLAGS_SCS is ShadowCallStack feature in Clang and only implemented for arm64, no use for riscv. Fixes: ad5d1122b82f ("riscv: use vDSO common flow to reduce the latency of the time-related functions") Cc: stable@vger.kernel.org Signed-off-by: Guo Ren Reviewed-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 29cf052f6541..e4c7c2c8a02f 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -27,6 +27,9 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) obj-y += vdso.o vdso-syms.o CPPFLAGS_vdso.lds += -P -C -U$(ARCH) +# Disable -pg to prevent insert call site +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os + # Disable gcov profiling for VDSO code GCOV_PROFILE := n -- cgit v1.2.3 From 313a5257b84c26b7f080c5d294aabe7d38ca439c Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Thu, 25 Jun 2020 20:29:17 -0700 Subject: openrisc: fix boot oops when DEBUG_VM is enabled Since v5.8-rc1 OpenRISC Linux fails to boot when DEBUG_VM is enabled. This has been bisected to commit 42fc541404f2 ("mmap locking API: add mmap_assert_locked() and mmap_assert_write_locked()"). The added locking checks exposed the issue that OpenRISC was not taking this mmap lock when during page walks for DMA operations. This patch locks and unlocks the mmap lock for page walking. Link: http://lkml.kernel.org/r/20200617090247.1680188-1-shorne@gmail.com Fixes: 42fc541404f2 ("mmap locking API: add mmap_assert_locked() and mmap_assert_write_locked()" Signed-off-by: Stafford Horne Reviewed-by: Michel Lespinasse Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Jason Gunthorpe Cc: Steven Price Cc: Thomas Hellstrom Cc: Robin Murphy Cc: Vlastimil Babka Cc: Daniel Jordan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/openrisc/kernel/dma.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index c152a68811dd..345727638d52 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -74,8 +74,11 @@ void *arch_dma_set_uncached(void *cpu_addr, size_t size) * We need to iterate through the pages, clearing the dcache for * them and setting the cache-inhibit bit. */ + mmap_read_lock(&init_mm); error = walk_page_range(&init_mm, va, va + size, &set_nocache_walk_ops, NULL); + mmap_read_unlock(&init_mm); + if (error) return ERR_PTR(error); return cpu_addr; @@ -85,9 +88,11 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size) { unsigned long va = (unsigned long)cpu_addr; + mmap_read_lock(&init_mm); /* walk_page_range shouldn't be able to fail here */ WARN_ON(walk_page_range(&init_mm, va, va + size, &clear_nocache_walk_ops, NULL)); + mmap_read_unlock(&init_mm); } void arch_sync_dma_for_device(phys_addr_t addr, size_t size, -- cgit v1.2.3 From 800e26b81311dcc0080b8784f80620bb8f2baaa5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 25 Jun 2020 20:30:40 -0700 Subject: x86/hyperv: allocate the hypercall page with only read and execute bits Patch series "fix a hyperv W^X violation and remove vmalloc_exec" Dexuan reported a W^X violation due to the fact that the hyper hypercall page due switching it to be allocated using vmalloc_exec. The problem is that PAGE_KERNEL_EXEC as used by vmalloc_exec actually sets writable permissions in the pte. This series fixes the issue by switching to the low-level __vmalloc_node_range interface that allows specifing more detailed permissions instead. It then also open codes the other two callers and removes the somewhat confusing vmalloc_exec interface. Peter noted that the hyper hypercall page allocation also has another long standing issue in that it shouldn't use the full vmalloc but just the module space. This issue is so far theoretical as the allocation is done early in the boot process. I plan to fix it with another bigger series for 5.9. This patch (of 3): Avoid a W^X violation cause by the fact that PAGE_KERNEL_EXEC includes the writable bit. For this resurrect the removed PAGE_KERNEL_RX definition, but as PAGE_KERNEL_ROX to match arm64 and powerpc. Link: http://lkml.kernel.org/r/20200618064307.32739-2-hch@lst.de Fixes: 78bb17f76edc ("x86/hyperv: use vmalloc_exec for the hypercall page") Signed-off-by: Christoph Hellwig Reported-by: Dexuan Cui Tested-by: Vitaly Kuznetsov Acked-by: Wei Liu Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Will Deacon Cc: Jessica Yu Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/hyperv/hv_init.c | 4 +++- arch/x86/include/asm/pgtable_types.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a54c6a401581..2bdc72e6890e 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -375,7 +375,9 @@ void __init hyperv_init(void) guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); - hv_hypercall_pg = vmalloc_exec(PAGE_SIZE); + hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, + VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, + VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __func__); if (hv_hypercall_pg == NULL) { wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); goto remove_cpuhp_state; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 2da1f95b88d7..816b31c68550 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -194,6 +194,7 @@ enum page_cache_mode { #define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) #define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) #define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0|___D| 0|___G) #define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) #define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) #define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) @@ -219,6 +220,7 @@ enum page_cache_mode { #define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC) #define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC) #define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0) +#define PAGE_KERNEL_ROX __pgprot_mask(__PAGE_KERNEL_ROX | _ENC) #define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC) #define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC) #define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC) -- cgit v1.2.3 From 10d5e97c1bf816facbc7c431c6caf47ee35fc1ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 25 Jun 2020 20:30:43 -0700 Subject: arm64: use PAGE_KERNEL_ROX directly in alloc_insn_page Use PAGE_KERNEL_ROX directly instead of allocating RWX and setting the page read-only just after the allocation. Link: http://lkml.kernel.org/r/20200618064307.32739-3-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: David Hildenbrand Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Dexuan Cui Cc: Jessica Yu Cc: Vitaly Kuznetsov Cc: Wei Liu Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/probes/kprobes.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d1c95dcf1d78..cbe49cd117cf 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -120,15 +120,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) void *alloc_insn_page(void) { - void *page; - - page = vmalloc_exec(PAGE_SIZE); - if (page) { - set_memory_ro((unsigned long)page, 1); - set_vm_flush_reset_perms(page); - } - - return page; + return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS, + NUMA_NO_NODE, __func__); } /* arm kprobe: install breakpoint in text */ -- cgit v1.2.3 From 0f77ce26ebcf6ea384421d2dd47b924b83649692 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 18 Jun 2020 19:24:56 +0200 Subject: Revert "ARM: sti: Implement dummy L2 cache's write_sec" This reverts commit 7b8e0188fa717cd9abc4fb52587445b421835c2a. Initially, STiH410-B2260 was supposed to be secured, that's why l2c_write_sec was stubbed to avoid secure register access from non secure world. But by default, STiH410-B2260 is running in non secure mode, so L2 cache register accesses are authorized, l2c_write_sec stub is not needed. With this patch, L2 cache is configured and performance are enhanced. Link: https://lore.kernel.org/r/20200618172456.29475-1-patrice.chotard@st.com Signed-off-by: Patrice Chotard Cc: Alain Volmat Signed-off-by: Arnd Bergmann --- arch/arm/mach-sti/board-dt.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-sti/board-dt.c b/arch/arm/mach-sti/board-dt.c index dcb98937fcf5..ffecbf29646f 100644 --- a/arch/arm/mach-sti/board-dt.c +++ b/arch/arm/mach-sti/board-dt.c @@ -20,14 +20,6 @@ static const char *const stih41x_dt_match[] __initconst = { NULL }; -static void sti_l2_write_sec(unsigned long val, unsigned reg) -{ - /* - * We can't write to secure registers as we are in non-secure - * mode, until we have some SMI service available. - */ -} - DT_MACHINE_START(STM, "STi SoC with Flattened Device Tree") .dt_compat = stih41x_dt_match, .l2c_aux_val = L2C_AUX_CTRL_SHARED_OVERRIDE | @@ -36,5 +28,4 @@ DT_MACHINE_START(STM, "STi SoC with Flattened Device Tree") L2C_AUX_CTRL_WAY_SIZE(4), .l2c_aux_mask = 0xc0000fff, .smp = smp_ops(sti_smp_ops), - .l2c_write_sec = sti_l2_write_sec, MACHINE_END -- cgit v1.2.3 From 19ab500edb5d6020010caba48ce3b4ce4182ab63 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 27 Jun 2020 12:31:46 +0530 Subject: powerpc/mm/pkeys: Make pkey access check work on execute_only_key Jan reported that LTP mmap03 was getting stuck in a page fault loop after commit c46241a370a6 ("powerpc/pkeys: Check vma before returning key fault error to the user"), as well as a minimised reproducer: #include #include #include #include #include int main(int ac, char **av) { int page_sz = getpagesize(); int fildes; char *addr; fildes = open("tempfile", O_WRONLY | O_CREAT, 0666); write(fildes, &fildes, sizeof(fildes)); close(fildes); fildes = open("tempfile", O_RDONLY); unlink("tempfile"); addr = mmap(0, page_sz, PROT_EXEC, MAP_FILE | MAP_PRIVATE, fildes, 0); printf("%d\n", *addr); return 0; } And noticed that access_pkey_error() in page fault handler now always seem to return false: __do_page_fault access_pkey_error(is_pkey: 1, is_exec: 0, is_write: 0) arch_vma_access_permitted pkey_access_permitted if (!is_pkey_enabled(pkey)) return true return false pkey_access_permitted() should not check if the pkey is available in UAMOR (using is_pkey_enabled()). The kernel needs to do that check only when allocating keys. This also makes sure the execute_only_key which is marked as non-manageable via UAMOR is handled correctly in pkey_access_permitted(), and fixes the bug. Fixes: c46241a370a6 ("powerpc/pkeys: Check vma before returning key fault error to the user") Reported-by: Jan Stancek Signed-off-by: Aneesh Kumar K.V [mpe: Include bug report details etc. in the change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200627070147.297535-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/pkeys.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 1199fc2bfaec..ca5fcb4bff32 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -353,9 +353,6 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) int pkey_shift; u64 amr; - if (!is_pkey_enabled(pkey)) - return true; - pkey_shift = pkeyshift(pkey); if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift))) return true; -- cgit v1.2.3 From 7ad816762f9bf89e940e618ea40c43138b479e10 Mon Sep 17 00:00:00 2001 From: Petteri Aimonen Date: Tue, 16 Jun 2020 11:12:57 +0200 Subject: x86/fpu: Reset MXCSR to default in kernel_fpu_begin() Previously, kernel floating point code would run with the MXCSR control register value last set by userland code by the thread that was active on the CPU core just before kernel call. This could affect calculation results if rounding mode was changed, or a crash if a FPU/SIMD exception was unmasked. Restore MXCSR to the kernel's default value. [ bp: Carve out from a bigger patch by Petteri, add feature check, add FNINIT call too (amluto). ] Signed-off-by: Petteri Aimonen Signed-off-by: Borislav Petkov Link: https://bugzilla.kernel.org/show_bug.cgi?id=207979 Link: https://lkml.kernel.org/r/20200624114646.28953-2-bp@alien8.de --- arch/x86/include/asm/fpu/internal.h | 5 +++++ arch/x86/kernel/fpu/core.c | 6 ++++++ 2 files changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 42159f45bf9c..845e7481ab77 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -623,6 +623,11 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * MXCSR and XCR definitions: */ +static inline void ldmxcsr(u32 mxcsr) +{ + asm volatile("ldmxcsr %0" :: "m" (mxcsr)); +} + extern unsigned int mxcsr_feature_mask; #define XCR_XFEATURE_ENABLED_MASK 0x00000000 diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 06c818967bb6..15247b96c6ea 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -101,6 +101,12 @@ void kernel_fpu_begin(void) copy_fpregs_to_fpstate(¤t->thread.fpu); } __cpu_invalidate_fpregs_state(); + + if (boot_cpu_has(X86_FEATURE_XMM)) + ldmxcsr(MXCSR_DEFAULT); + + if (boot_cpu_has(X86_FEATURE_FPU)) + asm volatile ("fninit"); } EXPORT_SYMBOL_GPL(kernel_fpu_begin); -- cgit v1.2.3 From d63bd8c81d8ab64db506ffde569cc8ff197516e2 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 17 Jun 2020 09:53:40 +0300 Subject: m68k: nommu: register start of the memory with memblock The m68k nommu setup code didn't register the beginning of the physical memory with memblock because it was anyway occupied by the kernel. However, commit fa3354e4ea39 ("mm: free_area_init: use maximal zone PFNs rather than zone sizes") changed zones initialization to use memblock.memory to detect the zone extents and this caused inconsistency between zone PFNs and the actual PFNs: BUG: Bad page state in process swapper pfn:20165 page:41fe0ca0 refcount:0 mapcount:1 mapping:00000000 index:0x0 flags: 0x0() raw: 00000000 00000100 00000122 00000000 00000000 00000000 00000000 00000000 page dumped because: nonzero mapcount CPU: 0 PID: 1 Comm: swapper Not tainted 5.8.0-rc1-00001-g3a38f8a60c65-dirty #1 Stack from 404c9ebc: 404c9ebc 4029ab28 4029ab28 40088470 41fe0ca0 40299e21 40299df1 404ba2a4 00020165 00000000 41fd2c10 402c7ba0 41fd2c04 40088504 41fe0ca0 40299e21 00000000 40088a12 41fe0ca0 41fe0ca4 0000020a 00000000 00000001 402ca000 00000000 41fe0ca0 41fd2c10 41fd2c10 00000000 00000000 402b2388 00000001 400a0934 40091056 404c9f44 404c9f44 40088db4 402c7ba0 00000001 41fd2c04 41fe0ca0 41fd2000 41fe0ca0 40089e02 4026ecf4 40089e4e 41fe0ca0 ffffffff Call Trace: [<40088470>] 0x40088470 [<40088504>] 0x40088504 [<40088a12>] 0x40088a12 [<402ca000>] 0x402ca000 [<400a0934>] 0x400a0934 Adjust the memory registration with memblock to include the beginning of the physical memory and make sure that the area occupied by the kernel is marked as reserved. Signed-off-by: Mike Rapoport Signed-off-by: Greg Ungerer --- arch/m68k/kernel/setup_no.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index e779b19e0193..f66f4b1d062e 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -138,7 +138,8 @@ void __init setup_arch(char **cmdline_p) pr_debug("MEMORY -> ROMFS=0x%p-0x%06lx MEM=0x%06lx-0x%06lx\n ", __bss_stop, memory_start, memory_start, memory_end); - memblock_add(memory_start, memory_end - memory_start); + memblock_add(_rambase, memory_end - _rambase); + memblock_reserve(_rambase, memory_start - _rambase); /* Keep a copy of command line */ *cmdline_p = &command_line[0]; -- cgit v1.2.3 From c43e55796dd4d13f4855971a4d7970ce2cd94db4 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Wed, 17 Jun 2020 09:53:41 +0300 Subject: m68k: mm: fix node memblock init After pulling 5.7.0 (linux-next merge), mcf5441x mmu boot was hanging silently. memblock_add() seems not appropriate, since using MAX_NUMNODES as node id, while memblock_add_node() sets up memory for node id 0. Signed-off-by: Angelo Dureghello Signed-off-by: Mike Rapoport Signed-off-by: Greg Ungerer --- arch/m68k/mm/mcfmmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 29f47923aa46..7d04210d34f0 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -174,7 +174,7 @@ void __init cf_bootmem_alloc(void) m68k_memory[0].addr = _rambase; m68k_memory[0].size = _ramend - _rambase; - memblock_add(m68k_memory[0].addr, m68k_memory[0].size); + memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0); /* compute total pages in system */ num_pages = PFN_DOWN(_ramend - _rambase); -- cgit v1.2.3 From 3047766bc6ec9c6bc9ece85b45a41ff401e8d988 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 18 Jun 2020 17:16:27 +0200 Subject: s390/pci: fix enabling a reserved PCI function In usual IPL or hot plug scenarios a zPCI function transitions directly from reserved (invisible to Linux) to configured state or is configured by Linux itself using an SCLP, however it can also first go from reserved to standby and then from standby to configured without Linux initiative. In this scenario we first get a PEC event 0x302 and then 0x301. This may happen for example when the device is deconfigured at another LPAR and made available for this LPAR. It may also happen under z/VM when a device is attached while in some inconsistent state. However when we get the 0x301 the device is already known to zPCI so calling zpci_create() will add it twice resulting in the below BUG. Instead we should only enable the existing device and finally scan it through the PCI subsystem. list_add double add: new=00000000ed5a9008, prev=00000000ed5a9008, next=0000000083502300. kernel BUG at lib/list_debug.c:31! Krnl PSW : 0704c00180000000 0000000082dc2db8 (__list_add_valid+0x70/0xa8) Call Trace: [<0000000082dc2db8>] __list_add_valid+0x70/0xa8 ([<0000000082dc2db4>] __list_add_valid+0x6c/0xa8) [<00000000828ea920>] zpci_create_device+0x60/0x1b0 [<00000000828ef04a>] zpci_event_availability+0x282/0x2f0 [<000000008315f848>] chsc_process_crw+0x2b8/0xa18 [<000000008316735c>] crw_collect_info+0x254/0x348 [<00000000829226ea>] kthread+0x14a/0x168 [<000000008319d5c0>] ret_from_fork+0x24/0x2c Fixes: f606b3ef47c9 ("s390/pci: adapt events for zbus") Reported-by: Alexander Egorenkov Tested-by: Alexander Egorenkov Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_event.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 08e1d619398e..fdebd286f402 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -94,7 +94,18 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) } zdev->fh = ccdf->fh; zdev->state = ZPCI_FN_STATE_CONFIGURED; - zpci_create_device(zdev); + ret = zpci_enable_device(zdev); + if (ret) + break; + + pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn); + if (!pdev) + break; + + pci_bus_add_device(pdev); + pci_lock_rescan_remove(); + pci_bus_add_devices(zdev->zbus->bus); + pci_unlock_rescan_remove(); break; case 0x0302: /* Reserved -> Standby */ if (!zdev) { -- cgit v1.2.3 From d6df52e9996dcc2062c3d9c9123288468bb95b52 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 24 Jun 2020 17:39:14 +0200 Subject: s390/maccess: add no DAT mode to kernel_write To be able to patch kernel code before paging is initialized do plain memcpy if DAT is off. This is required to enable early jump label initialization. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/mm/maccess.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 22a0be655f27..1d17413b319a 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -62,11 +62,15 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size) long copied; spin_lock_irqsave(&s390_kernel_write_lock, flags); - while (size) { - copied = s390_kernel_write_odd(tmp, src, size); - tmp += copied; - src += copied; - size -= copied; + if (!(flags & PSW_MASK_DAT)) { + memcpy(dst, src, size); + } else { + while (size) { + copied = s390_kernel_write_odd(tmp, src, size); + tmp += copied; + src += copied; + size -= copied; + } } spin_unlock_irqrestore(&s390_kernel_write_lock, flags); -- cgit v1.2.3 From 95e61b1b5d6394b53d147c0fcbe2ae70fbe09446 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 18 Jun 2020 17:17:19 +0200 Subject: s390/setup: init jump labels before command line parsing Command line parameters might set static keys. This is true for s390 at least since commit 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options"). To avoid the following WARN: static_key_enable_cpuslocked(): static key 'init_on_alloc+0x0/0x40' used before call to jump_label_init() call jump_label_init() just before parse_early_param(). jump_label_init() is safe to call multiple times (x86 does that), doesn't do any memory allocations and hence should be safe to call that early. Fixes: 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options") Cc: # 5.3: d6df52e9996d: s390/maccess: add no DAT mode to kernel_write Cc: # 5.3 Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 5853c9872dfe..07aa15ba43b3 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1100,6 +1100,7 @@ void __init setup_arch(char **cmdline_p) if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) nospec_auto_detect(); + jump_label_init(); parse_early_param(); #ifdef CONFIG_CRASH_DUMP /* Deactivate elfcorehdr= kernel parameter */ -- cgit v1.2.3 From 9d3c447c72fb2337ca39f245c6ae89f2369de216 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 29 Jun 2020 18:26:31 +0800 Subject: KVM: X86: Fix async pf caused null-ptr-deref Syzbot reported that: CPU: 1 PID: 6780 Comm: syz-executor153 Not tainted 5.7.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__apic_accept_irq+0x46/0xb80 Call Trace: kvm_arch_async_page_present+0x7de/0x9e0 kvm_check_async_pf_completion+0x18d/0x400 kvm_arch_vcpu_ioctl_run+0x18bf/0x69f0 kvm_vcpu_ioctl+0x46a/0xe20 ksys_ioctl+0x11a/0x180 __x64_sys_ioctl+0x6f/0xb0 do_syscall_64+0xf6/0x7d0 entry_SYSCALL_64_after_hwframe+0x49/0xb3 The testcase enables APF mechanism in MSR_KVM_ASYNC_PF_EN with ASYNC_PF_INT enabled w/o setting MSR_KVM_ASYNC_PF_INT before, what's worse, interrupt based APF 'page ready' event delivery depends on in kernel lapic, however, we didn't bail out when lapic is not in kernel during guest setting MSR_KVM_ASYNC_PF_EN which causes the null-ptr-deref in host later. This patch fixes it. Reported-by: syzbot+1bf777dfdde86d64b89b@syzkaller.appspotmail.com Fixes: 2635b5c4a0 (KVM: x86: interrupt based APF 'page ready' event delivery) Signed-off-by: Wanpeng Li Message-Id: <1593426391-8231-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3b92db412335..a026d926072c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2693,6 +2693,9 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) if (data & 0x30) return 1; + if (!lapic_in_kernel(vcpu)) + return 1; + vcpu->arch.apf.msr_en_val = data; if (!kvm_pv_async_pf_enabled(vcpu)) { -- cgit v1.2.3 From 95ca6f06dd4827ff63be5154120c7a8511cd9a41 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 17 Jun 2020 14:53:46 +0200 Subject: arm64: dts: meson: add missing gxl rng clock The peripheral clock of the RNG is missing for gxl while it is present for gxbb. Fixes: 1b3f6d148692 ("ARM64: dts: meson-gx: add clock CLKID_RNG0 to hwrng node") Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20200617125346.1163527-1-jbrunet@baylibre.com --- arch/arm64/boot/dts/amlogic/meson-gxl.dtsi | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index fc59c8534c0f..6c8b189884ca 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -337,6 +337,11 @@ }; }; +&hwrng { + clocks = <&clkc CLKID_RNG0>; + clock-names = "core"; +}; + &i2c_A { clocks = <&clkc CLKID_I2C>; }; -- cgit v1.2.3 From b2037dafcf082cd24b88ae9283af628235df36e1 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 18 Jun 2020 15:27:37 +0200 Subject: arm64: dts: meson-gxl-s805x: reduce initial Mali450 core frequency When starting at 744MHz, the Mali 450 core crashes on S805X based boards: lima d00c0000.gpu: IRQ ppmmu3 not found lima d00c0000.gpu: IRQ ppmmu4 not found lima d00c0000.gpu: IRQ ppmmu5 not found lima d00c0000.gpu: IRQ ppmmu6 not found lima d00c0000.gpu: IRQ ppmmu7 not found Internal error: synchronous external abort: 96000210 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.7.2+ #492 Hardware name: Libre Computer AML-S805X-AC (DT) pstate: 40000005 (nZcv daif -PAN -UAO) pc : lima_gp_init+0x28/0x188 ... Call trace: lima_gp_init+0x28/0x188 lima_device_init+0x334/0x534 lima_pdev_probe+0xa4/0xe4 ... Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b Reverting to a safer 666Mhz frequency on the S805X that doesn't use the GP0 PLL makes it more stable. Fixes: fd47716479f5 ("ARM64: dts: add S805X based P241 board") Fixes: 0449b8e371ac ("arm64: dts: meson: add libretech aml-s805x-ac board") Signed-off-by: Neil Armstrong Signed-off-by: Kevin Hilman Link: https://lore.kernel.org/r/20200618132737.14243-1-narmstrong@baylibre.com --- .../dts/amlogic/meson-gxl-s805x-libretech-ac.dts | 2 +- .../boot/dts/amlogic/meson-gxl-s805x-p241.dts | 2 +- arch/arm64/boot/dts/amlogic/meson-gxl-s805x.dtsi | 24 ++++++++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/boot/dts/amlogic/meson-gxl-s805x.dtsi (limited to 'arch') diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts index 6a226faab183..9e43f4dca90d 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts @@ -10,7 +10,7 @@ #include #include -#include "meson-gxl-s905x.dtsi" +#include "meson-gxl-s805x.dtsi" / { compatible = "libretech,aml-s805x-ac", "amlogic,s805x", diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts index 867e30f1d62b..eb7f5a3fefd4 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts @@ -9,7 +9,7 @@ #include -#include "meson-gxl-s905x.dtsi" +#include "meson-gxl-s805x.dtsi" / { compatible = "amlogic,p241", "amlogic,s805x", "amlogic,meson-gxl"; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x.dtsi new file mode 100644 index 000000000000..f9d705648426 --- /dev/null +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x.dtsi @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2020 BayLibre SAS + * Author: Neil Armstrong + */ + +#include "meson-gxl-s905x.dtsi" + +/ { + compatible = "amlogic,s805x", "amlogic,meson-gxl"; +}; + +/* The S805X Package doesn't seem to handle the 744MHz OPP correctly */ +&mali { + assigned-clocks = <&clkc CLKID_MALI_0_SEL>, + <&clkc CLKID_MALI_0>, + <&clkc CLKID_MALI>; /* Glitch free mux */ + assigned-clock-parents = <&clkc CLKID_FCLK_DIV3>, + <0>, /* Do Nothing */ + <&clkc CLKID_MALI_0>; + assigned-clock-rates = <0>, /* Do Nothing */ + <666666666>, + <0>; /* Do Nothing */ +}; -- cgit v1.2.3 From 54320dcaa2522db3222c02d68b52cfed32a2e95b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 26 Jun 2020 10:06:26 +0200 Subject: ARM: dts: meson: Align L2 cache-controller nodename with dtschema Fix dtschema validator warnings like: l2-cache-controller@c4200000: $nodename:0: 'l2-cache-controller@c4200000' does not match '^(cache-controller|cpu)(@[0-9a-f,]+)*$' Signed-off-by: Krzysztof Kozlowski Signed-off-by: Kevin Hilman Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20200626080626.4080-1-krzk@kernel.org --- arch/arm/boot/dts/meson.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi index ae89deaa8c9c..91129dc70d83 100644 --- a/arch/arm/boot/dts/meson.dtsi +++ b/arch/arm/boot/dts/meson.dtsi @@ -11,7 +11,7 @@ #size-cells = <1>; interrupt-parent = <&gic>; - L2: l2-cache-controller@c4200000 { + L2: cache-controller@c4200000 { compatible = "arm,pl310-cache"; reg = <0xc4200000 0x1000>; cache-unified; -- cgit v1.2.3 From caef73cf207074a62701c95c317a97b2f8c1e04e Mon Sep 17 00:00:00 2001 From: Xiaofei Tan Date: Sun, 28 Jun 2020 08:57:06 +0800 Subject: arm/xen: remove the unused macro GRANT_TABLE_PHYSADDR Fix the following sparse warning: arch/arm64/xen/../../arm/xen/enlighten.c:244: warning: macro "GRANT_TABLE_PHYSADDR" is not used [-Wunused-macros] It is an isolated macro, and should be removed when its last user was deleted in the following commit 3cf4095d7446 ("arm/xen: Use xen_xlate_map_ballooned_pages to setup grant table") Signed-off-by: Xiaofei Tan Reviewed-by: Stefano Stabellini Signed-off-by: Stefano Stabellini --- arch/arm/xen/enlighten.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index fd4e1ce1daf9..e93145d72c26 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -241,7 +241,6 @@ static int __init fdt_find_hyper_node(unsigned long node, const char *uname, * see Documentation/devicetree/bindings/arm/xen.txt for the * documentation of the Xen Device Tree format. */ -#define GRANT_TABLE_PHYSADDR 0 void __init xen_early_init(void) { of_scan_flat_dt(fdt_find_hyper_node, NULL); -- cgit v1.2.3 From 5ecad245de2ae23dc4e2dbece92f8ccfbaed2fa7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 30 Jun 2020 07:07:20 -0400 Subject: KVM: x86: bit 8 of non-leaf PDPEs is not reserved Bit 8 would be the "global" bit, which does not quite make sense for non-leaf page table entries. Intel ignores it; AMD ignores it in PDEs and PDPEs, but reserves it in PML4Es. Probably, earlier versions of the AMD manual documented it as reserved in PDPEs as well, and that behavior made it into KVM as well as kvm-unit-tests; fix it. Cc: stable@vger.kernel.org Reported-by: Nadav Amit Fixes: a0c0feb57992 ("KVM: x86: reserve bit 8 of non-leaf PDPEs and PML4Es in 64-bit mode on AMD", 2014-09-03) Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 76817d13c86e..6d6a0ae7800c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4449,7 +4449,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd | - nonleaf_bit8_rsvd | gbpages_bit_rsvd | + gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 51); -- cgit v1.2.3 From 009bce1df0bb5eb970b9eb98d963861f7fe353c7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 5 Jun 2020 12:26:05 -0700 Subject: x86/split_lock: Don't write MSR_TEST_CTRL on CPUs that aren't whitelisted Choo! Choo! All aboard the Split Lock Express, with direct service to Wreckage! Skip split_lock_verify_msr() if the CPU isn't whitelisted as a possible SLD-enabled CPU model to avoid writing MSR_TEST_CTRL. MSR_TEST_CTRL exists, and is writable, on many generations of CPUs. Writing the MSR, even with '0', can result in bizarre, undocumented behavior. This fixes a crash on Haswell when resuming from suspend with a live KVM guest. Because APs use the standard SMP boot flow for resume, they will go through split_lock_init() and the subsequent RDMSR/WRMSR sequence, which runs even when sld_state==sld_off to ensure SLD is disabled. On Haswell (at least, my Haswell), writing MSR_TEST_CTRL with '0' will succeed and _may_ take the SMT _sibling_ out of VMX root mode. When KVM has an active guest, KVM performs VMXON as part of CPU onlining (see kvm_starting_cpu()). Because SMP boot is serialized, the resulting flow is effectively: on_each_ap_cpu() { WRMSR(MSR_TEST_CTRL, 0) VMXON } As a result, the WRMSR can disable VMX on a different CPU that has already done VMXON. This ultimately results in a #UD on VMPTRLD when KVM regains control and attempt run its vCPUs. The above voodoo was confirmed by reworking KVM's VMXON flow to write MSR_TEST_CTRL prior to VMXON, and to serialize the sequence as above. Further verification of the insanity was done by redoing VMXON on all APs after the initial WRMSR->VMXON sequence. The additional VMXON, which should VM-Fail, occasionally succeeded, and also eliminated the unexpected #UD on VMPTRLD. The damage done by writing MSR_TEST_CTRL doesn't appear to be limited to VMX, e.g. after suspend with an active KVM guest, subsequent reboots almost always hang (even when fudging VMXON), a #UD on a random Jcc was observed, suspend/resume stability is qualitatively poor, and so on and so forth. kernel BUG at arch/x86/kvm/x86.c:386! CPU: 1 PID: 2592 Comm: CPU 6/KVM Tainted: G D Hardware name: ASUS Q87M-E/Q87M-E, BIOS 1102 03/03/2014 RIP: 0010:kvm_spurious_fault+0xf/0x20 Call Trace: vmx_vcpu_load_vmcs+0x1fb/0x2b0 vmx_vcpu_load+0x3e/0x160 kvm_arch_vcpu_load+0x48/0x260 finish_task_switch+0x140/0x260 __schedule+0x460/0x720 _cond_resched+0x2d/0x40 kvm_arch_vcpu_ioctl_run+0x82e/0x1ca0 kvm_vcpu_ioctl+0x363/0x5c0 ksys_ioctl+0x88/0xa0 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x4c/0x170 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: dbaba47085b0c ("x86/split_lock: Rework the initialization flow of split lock detection") Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200605192605.7439-1-sean.j.christopherson@intel.com --- arch/x86/kernel/cpu/intel.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c25a67a34bd3..0ab48f1cdf84 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -49,6 +49,13 @@ enum split_lock_detect_state { static enum split_lock_detect_state sld_state __ro_after_init = sld_off; static u64 msr_test_ctrl_cache __ro_after_init; +/* + * With a name like MSR_TEST_CTL it should go without saying, but don't touch + * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it + * on CPUs that do not support SLD can cause fireworks, even when writing '0'. + */ +static bool cpu_model_supports_sld __ro_after_init; + /* * Processors which have self-snooping capability can handle conflicting * memory type across CPUs by snooping its own cache. However, there exists @@ -1071,7 +1078,8 @@ static void sld_update_msr(bool on) static void split_lock_init(void) { - split_lock_verify_msr(sld_state != sld_off); + if (cpu_model_supports_sld) + split_lock_verify_msr(sld_state != sld_off); } static void split_lock_warn(unsigned long ip) @@ -1177,5 +1185,6 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) return; } + cpu_model_supports_sld = true; split_lock_setup(); } -- cgit v1.2.3 From ed3e98e919aaaa47e9d9f8a40c3f6f4a22577842 Mon Sep 17 00:00:00 2001 From: Merlijn Wajer Date: Tue, 30 Jun 2020 11:47:40 -0700 Subject: ARM: dts: n900: remove mmc1 card detect gpio Instead, expose the key via the input framework, as SW_MACHINE_COVER The chip-detect GPIO is actually detecting if the cover is closed. Technically it's possible to use the SD card with open cover. The only downside is risk of battery falling out and user being able to physically remove the card. The behaviour of SD card not being available when the device is open is unexpected and creates more problems than it solves. There is a high chance, that more people accidentally break their rootfs by opening the case without physically removing the card. Reviewed-by: Sebastian Reichel Acked-by: Tony Lindgren Signed-off-by: Merlijn Wajer Link: https://lore.kernel.org/r/20200612125402.18393-3-merlijn@wizzup.org Signed-off-by: Dmitry Torokhov --- arch/arm/boot/dts/omap3-n900.dts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 4089d97405c9..3dbcae3d60d2 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -105,6 +105,14 @@ linux,code = ; linux,can-disable; }; + + machine_cover { + label = "Machine Cover"; + gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; /* 160 */ + linux,input-type = ; + linux,code = ; + linux,can-disable; + }; }; isp1707: isp1707 { @@ -819,10 +827,6 @@ pinctrl-0 = <&mmc1_pins>; vmmc-supply = <&vmmc1>; bus-width = <4>; - /* For debugging, it is often good idea to remove this GPIO. - It means you can remove back cover (to reboot by removing - battery) and still use the MMC card. */ - cd-gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; /* 160 */ }; /* most boards use vaux3, only some old versions use vmmc2 instead */ -- cgit v1.2.3 From c9c26150e61de441ab58b25c1f64afc049ee0fee Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 26 Jun 2020 10:21:11 -0700 Subject: x86/entry: Assert that syscalls are on the right stack Now that the entry stack is a full page, it's too easy to regress the system call entry code and end up on the wrong stack without noticing. Assert that all system calls (SYSCALL64, SYSCALL32, SYSENTER, and INT80) are on the right stack and have pt_regs in the right place. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/52059e42bb0ab8551153d012d68f7be18d72ff8e.1593191971.git.luto@kernel.org --- arch/x86/entry/common.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index bd3f14175193..ed8ccc820995 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -45,6 +45,15 @@ #define CREATE_TRACE_POINTS #include +/* Check that the stack and regs on entry from user mode are sane. */ +static void check_user_regs(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) { + WARN_ON_ONCE(!on_thread_stack()); + WARN_ON_ONCE(regs != task_pt_regs(current)); + } +} + #ifdef CONFIG_CONTEXT_TRACKING /** * enter_from_user_mode - Establish state when coming from user mode @@ -127,9 +136,6 @@ static long syscall_trace_enter(struct pt_regs *regs) unsigned long ret = 0; u32 work; - if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) - BUG_ON(regs != task_pt_regs(current)); - work = READ_ONCE(ti->flags); if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) { @@ -346,6 +352,8 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) { struct thread_info *ti; + check_user_regs(regs); + enter_from_user_mode(); instrumentation_begin(); @@ -409,6 +417,8 @@ static void do_syscall_32_irqs_on(struct pt_regs *regs) /* Handles int $0x80 */ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { + check_user_regs(regs); + enter_from_user_mode(); instrumentation_begin(); @@ -460,6 +470,8 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs) vdso_image_32.sym_int80_landing_pad; bool success; + check_user_regs(regs); + /* * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward * so that 'regs->ip -= 2' lands back on an int $0x80 instruction. -- cgit v1.2.3 From d1721250f3ffed9afba3e1fb729947cec64c5a8a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 26 Jun 2020 10:21:12 -0700 Subject: x86/entry: Move SYSENTER's regs->sp and regs->flags fixups into C The SYSENTER asm (32-bit and compat) contains fixups for regs->sp and regs->flags. Move the fixups into C and fix some comments while at it. This is a valid cleanup all by itself, and it also simplifies the subsequent patch that will fix Xen PV SYSENTER. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/fe62bef67eda7fac75b8f3dbafccf571dc4ece6b.1593191971.git.luto@kernel.org --- arch/x86/entry/common.c | 12 ++++++++++++ arch/x86/entry/entry_32.S | 5 ++--- arch/x86/entry/entry_64_compat.S | 11 +++++------ 3 files changed, 19 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index ed8ccc820995..f392a8bcd1c3 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -522,6 +522,18 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs) (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0; #endif } + +/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ +__visible noinstr long do_SYSENTER_32(struct pt_regs *regs) +{ + /* SYSENTER loses RSP, but the vDSO saved it in RBP. */ + regs->sp = regs->bp; + + /* SYSENTER clobbers EFLAGS.IF. Assume it was set in usermode. */ + regs->flags |= X86_EFLAGS_IF; + + return do_fast_syscall_32(regs); +} #endif SYSCALL_DEFINE0(ni_syscall) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 024d7d276cd4..2d0bd5d5f032 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -933,9 +933,8 @@ SYM_FUNC_START(entry_SYSENTER_32) .Lsysenter_past_esp: pushl $__USER_DS /* pt_regs->ss */ - pushl %ebp /* pt_regs->sp (stashed in bp) */ + pushl $0 /* pt_regs->sp (placeholder) */ pushfl /* pt_regs->flags (except IF = 0) */ - orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ pushl $__USER_CS /* pt_regs->cs */ pushl $0 /* pt_regs->ip = 0 (placeholder) */ pushl %eax /* pt_regs->orig_ax */ @@ -965,7 +964,7 @@ SYM_FUNC_START(entry_SYSENTER_32) .Lsysenter_flags_fixed: movl %esp, %eax - call do_fast_syscall_32 + call do_SYSENTER_32 /* XEN PV guests always use IRET path */ ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ "jmp .Lsyscall_32_done", X86_FEATURE_XENPV diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 0f974ae01e62..7b9d8150f652 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -68,16 +68,15 @@ SYM_CODE_START(entry_SYSENTER_compat) /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ - pushq %rbp /* pt_regs->sp (stashed in bp) */ + pushq $0 /* pt_regs->sp = 0 (placeholder) */ /* * Push flags. This is nasty. First, interrupts are currently - * off, but we need pt_regs->flags to have IF set. Second, even - * if TF was set when SYSENTER started, it's clear by now. We fix - * that later using TIF_SINGLESTEP. + * off, but we need pt_regs->flags to have IF set. Second, if TS + * was set in usermode, it's still set, and we're singlestepping + * through this code. do_SYSENTER_32() will fix up IF. */ pushfq /* pt_regs->flags (except IF = 0) */ - orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ pushq $__USER32_CS /* pt_regs->cs */ pushq $0 /* pt_regs->ip = 0 (placeholder) */ pushq %rax /* pt_regs->orig_ax */ @@ -135,7 +134,7 @@ SYM_CODE_START(entry_SYSENTER_compat) .Lsysenter_flags_fixed: movq %rsp, %rdi - call do_fast_syscall_32 + call do_SYSENTER_32 /* XEN PV guests always use IRET path */ ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \ "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV -- cgit v1.2.3 From ffae641f57476369b4d503402b37ebe489d23395 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 26 Jun 2020 10:21:13 -0700 Subject: x86/entry/64/compat: Fix Xen PV SYSENTER frame setup The SYSENTER frame setup was nonsense. It worked by accident because the normal code into which the Xen asm jumped (entry_SYSENTER_32/compat) threw away SP without touching the stack. entry_SYSENTER_compat was recently modified such that it relied on having a valid stack pointer, so now the Xen asm needs to invoke it with a valid stack. Fix it up like SYSCALL: use the Xen-provided frame and skip the bare metal prologue. Fixes: 1c3e5d3f60e2 ("x86/entry: Make entry_64_compat.S objtool clean") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Boris Ostrovsky Link: https://lkml.kernel.org/r/947880c41ade688ff4836f665d0c9fcaa9bd1201.1593191971.git.luto@kernel.org --- arch/x86/entry/entry_64_compat.S | 1 + arch/x86/xen/xen-asm_64.S | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 7b9d8150f652..381a6de7de9c 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -79,6 +79,7 @@ SYM_CODE_START(entry_SYSENTER_compat) pushfq /* pt_regs->flags (except IF = 0) */ pushq $__USER32_CS /* pt_regs->cs */ pushq $0 /* pt_regs->ip = 0 (placeholder) */ +SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 5d252aaeade8..e1e1c7eafa60 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -161,10 +161,22 @@ SYM_FUNC_END(xen_syscall32_target) /* 32-bit compat sysenter target */ SYM_FUNC_START(xen_sysenter_target) - mov 0*8(%rsp), %rcx - mov 1*8(%rsp), %r11 - mov 5*8(%rsp), %rsp - jmp entry_SYSENTER_compat + /* + * NB: Xen is polite and clears TF from EFLAGS for us. This means + * that we don't need to guard against single step exceptions here. + */ + popq %rcx + popq %r11 + + /* + * Neither Xen nor the kernel really knows what the old SS and + * CS were. The kernel expects __USER32_DS and __USER32_CS, so + * report those values even though Xen will guess its own values. + */ + movq $__USER32_DS, 4*8(%rsp) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSENTER_compat_after_hwframe SYM_FUNC_END(xen_sysenter_target) #else /* !CONFIG_IA32_EMULATION */ -- cgit v1.2.3 From 5aa98879efe77d33d1639e006d4b0c1579cde9f6 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 26 Jun 2020 11:24:34 +0200 Subject: s390/cpum_sf: prohibit callchain data collection CPU Measurement sampling facility on s390 does not support perf tool collection of callchain data using --call-graph option. The sampling facility collects samples in a ring buffer which includes only the instruction address the samples were taken. When the ring buffer hits a watermark, a measurement alert interrupt is triggered and handled by the performance measurement unit (PMU) device driver. It collects the samples and feeds each sample to the perf ring buffer in the common code via functions perf_prepare_sample()/perf_output_sample(). When function perf_prepare_sample() is called to collect sample data's callchain, user register values or stack area, invalid data is picked, because the context of the collected information does not match the context when the sample was taken. There is currently no way to provide the callchain and other information, because the hardware sampler does not collect this information. Therefore prohibit sampling when the user requests a callchain graph from the hardware sampler. Return -EOPNOTSUPP to the user in this case. If call chains are really wanted, users need to specify software event cpu-clock to get the callchain information from a software event. Signed-off-by: Thomas Richter Reviewed-by: Sumanth Korikkar Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_sf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 85a711d783eb..4f9e4626df55 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -881,12 +881,21 @@ out: return err; } +static bool is_callchain_event(struct perf_event *event) +{ + u64 sample_type = event->attr.sample_type; + + return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_STACK_USER); +} + static int cpumsf_pmu_event_init(struct perf_event *event) { int err; /* No support for taken branch sampling */ - if (has_branch_stack(event)) + /* No support for callchain, stacks and registers */ + if (has_branch_stack(event) || is_callchain_event(event)) return -EOPNOTSUPP; switch (event->attr.type) { -- cgit v1.2.3 From 9e9f85e029a2ee4167aacf3ff04e4288a5e5c74e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Jul 2020 07:32:13 +0200 Subject: s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 44 ++++++++++++++++++++++++++++-------- arch/s390/configs/defconfig | 43 +++++++++++++++++++++++++++-------- arch/s390/configs/zfcpdump_defconfig | 5 ++++ 3 files changed, 73 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 46038bc58c9e..0cf9a82326a8 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -1,5 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y @@ -14,7 +15,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y @@ -31,9 +31,9 @@ CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y -CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_BPF_LSM=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -51,14 +51,11 @@ CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y -CONFIG_HIBERNATION=y -CONFIG_PM_DEBUG=y CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m -CONFIG_VHOST_NET=m -CONFIG_VHOST_VSOCK=m +CONFIG_S390_UNWIND_SELFTEST=y CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y @@ -77,6 +74,8 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_INLINE_ENCRYPTION=y +CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -96,7 +95,6 @@ CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y -CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y @@ -130,6 +128,7 @@ CONFIG_SYN_COOKIES=y CONFIG_NET_IPVTI=m CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESPINTCP=y CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m @@ -144,6 +143,7 @@ CONFIG_TCP_CONG_ILLINOIS=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESPINTCP=y CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m CONFIG_IPV6_VTI=m @@ -151,7 +151,10 @@ CONFIG_IPV6_SIT=m CONFIG_IPV6_GRE=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y +CONFIG_IPV6_RPL_LWTUNNEL=y +CONFIG_MPTCP=y CONFIG_NETFILTER=y +CONFIG_BRIDGE_NETFILTER=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y @@ -317,6 +320,7 @@ CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m CONFIG_BRIDGE=m +CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y CONFIG_NET_SCHED=y @@ -341,6 +345,7 @@ CONFIG_NET_SCH_CODEL=m CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m @@ -364,6 +369,7 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_GATE=m CONFIG_DNS_RESOLVER=y CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m @@ -374,6 +380,7 @@ CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m # CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y +# CONFIG_PCIEASPM is not set CONFIG_PCI_DEBUG=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y @@ -435,6 +442,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_MULTIPATH_HST=m CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m @@ -448,6 +456,8 @@ CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_BAREUDP=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m @@ -481,7 +491,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -# CONFIG_MLXFW is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -514,6 +523,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_TI is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_XILINX is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -561,6 +571,8 @@ CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y +CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -608,6 +620,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_NTFS_FS=m CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y @@ -650,8 +663,8 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y -CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_FORTIFY_SOURCE=y @@ -675,8 +688,11 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m +CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m @@ -685,6 +701,7 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -701,6 +718,7 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m @@ -719,6 +737,9 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -774,6 +795,7 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y +CONFIG_TEST_LOCKUP=m CONFIG_DEBUG_TIMEKEEPING=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y @@ -786,7 +808,9 @@ CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 +# CONFIG_RCU_TRACE is not set CONFIG_LATENCYTOP=y +CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y CONFIG_IRQSOFF_TRACER=y @@ -808,10 +832,12 @@ CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m CONFIG_TEST_LIST_SORT=y +CONFIG_TEST_MIN_HEAP=y CONFIG_TEST_SORT=y CONFIG_KPROBES_SANITY_TEST=y CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y +CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 7cd0648c1f4e..5df9759e8ff6 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -1,5 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y @@ -13,7 +14,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y @@ -30,9 +30,9 @@ CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y -CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_BPF_LSM=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -41,7 +41,6 @@ CONFIG_LIVEPATCH=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y -# CONFIG_NUMA_EMU is not set CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y @@ -51,14 +50,11 @@ CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y -CONFIG_HIBERNATION=y -CONFIG_PM_DEBUG=y CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m -CONFIG_VHOST_NET=m -CONFIG_VHOST_VSOCK=m +CONFIG_S390_UNWIND_SELFTEST=m CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y @@ -74,6 +70,8 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_INLINE_ENCRYPTION=y +CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -91,7 +89,6 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y -CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y @@ -125,6 +122,7 @@ CONFIG_SYN_COOKIES=y CONFIG_NET_IPVTI=m CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESPINTCP=y CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m @@ -139,6 +137,7 @@ CONFIG_TCP_CONG_ILLINOIS=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESPINTCP=y CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m CONFIG_IPV6_VTI=m @@ -146,7 +145,10 @@ CONFIG_IPV6_SIT=m CONFIG_IPV6_GRE=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y +CONFIG_IPV6_RPL_LWTUNNEL=y +CONFIG_MPTCP=y CONFIG_NETFILTER=y +CONFIG_BRIDGE_NETFILTER=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y @@ -311,6 +313,7 @@ CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m CONFIG_BRIDGE=m +CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y CONFIG_NET_SCHED=y @@ -335,6 +338,7 @@ CONFIG_NET_SCH_CODEL=m CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m @@ -358,6 +362,7 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_GATE=m CONFIG_DNS_RESOLVER=y CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m @@ -368,6 +373,7 @@ CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m # CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y +# CONFIG_PCIEASPM is not set CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y @@ -430,6 +436,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_MULTIPATH_HST=m CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m @@ -444,6 +451,8 @@ CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_BAREUDP=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m @@ -477,7 +486,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -# CONFIG_MLXFW is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -510,6 +518,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_TI is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_XILINX is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -557,6 +566,8 @@ CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y +CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -600,6 +611,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_NTFS_FS=m CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y @@ -642,8 +654,8 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y -CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y @@ -667,8 +679,11 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m +CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_OFB=m @@ -678,6 +693,7 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -694,6 +710,7 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m @@ -712,6 +729,9 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -725,6 +745,7 @@ CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CORDIC=m +CONFIG_PRIME_NUMBERS=m CONFIG_CRC4=m CONFIG_CRC7=m CONFIG_CRC8=m @@ -739,10 +760,12 @@ CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y +CONFIG_TEST_LOCKUP=m CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y +CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y CONFIG_SCHED_TRACER=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 20c51e5d9353..4091c50449cd 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -30,6 +30,7 @@ CONFIG_IBM_PARTITION=y # CONFIG_BOUNCE is not set CONFIG_NET=y # CONFIG_IUCV is not set +# CONFIG_ETHTOOL_NETLINK is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_RAM=y # CONFIG_BLK_DEV_XPRAM is not set @@ -55,6 +56,8 @@ CONFIG_RAW_DRIVER=y # CONFIG_MONWRITER is not set # CONFIG_S390_VMUR is not set # CONFIG_HID is not set +# CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set @@ -62,7 +65,9 @@ CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" +# CONFIG_ZLIB_DFLTCC is not set CONFIG_PRINTK_TIME=y +# CONFIG_SYMBOLIC_ERRNAME is not set CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y -- cgit v1.2.3 From f7b93d42945cc71e1346dd5ae07c59061d56745e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 30 Jun 2020 10:19:21 +0200 Subject: arm64/alternatives: use subsections for replacement sequences When building very large kernels, the logic that emits replacement sequences for alternatives fails when relative branches are present in the code that is emitted into the .altinstr_replacement section and patched in at the original site and fixed up. The reason is that the linker will insert veneers if relative branches go out of range, and due to the relative distance of the .altinstr_replacement from the .text section where its branch targets usually live, veneers may be emitted at the end of the .altinstr_replacement section, with the relative branches in the sequence pointed at the veneers instead of the actual target. The alternatives patching logic will attempt to fix up the branch to point to its original target, which will be the veneer in this case, but given that the patch site is likely to be far away as well, it will be out of range and so patching will fail. There are other cases where these veneers are problematic, e.g., when the target of the branch is in .text while the patch site is in .init.text, in which case putting the replacement sequence inside .text may not help either. So let's use subsections to emit the replacement code as closely as possible to the patch site, to ensure that veneers are only likely to be emitted if they are required at the patch site as well, in which case they will be in range for the replacement sequence both before and after it is transported to the patch site. This will prevent alternative sequences in non-init code from being released from memory after boot, but this is tolerable given that the entire section is only 512 KB on an allyesconfig build (which weighs in at 500+ MB for the entire Image). Also, note that modules today carry the replacement sequences in non-init sections as well, and any of those that target init code will be emitted into init sections after this change. This fixes an early crash when booting an allyesconfig kernel on a system where any of the alternatives sequences containing relative branches are activated at boot (e.g., ARM64_HAS_PAN on TX2) Signed-off-by: Ard Biesheuvel Cc: Suzuki K Poulose Cc: James Morse Cc: Andre Przywara Cc: Dave P Martin Link: https://lore.kernel.org/r/20200630081921.13443-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative.h | 16 ++++++++-------- arch/arm64/kernel/vmlinux.lds.S | 3 --- 2 files changed, 8 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 5e5dc05d63a0..12f0eb56a1cc 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -73,11 +73,11 @@ static inline void apply_alternatives_module(void *start, size_t length) { } ".pushsection .altinstructions,\"a\"\n" \ ALTINSTR_ENTRY(feature) \ ".popsection\n" \ - ".pushsection .altinstr_replacement, \"a\"\n" \ + ".subsection 1\n" \ "663:\n\t" \ newinstr "\n" \ "664:\n\t" \ - ".popsection\n\t" \ + ".previous\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ ".org . - (662b-661b) + (664b-663b)\n" \ ".endif\n" @@ -117,9 +117,9 @@ static inline void apply_alternatives_module(void *start, size_t length) { } 662: .pushsection .altinstructions, "a" altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f .popsection - .pushsection .altinstr_replacement, "ax" + .subsection 1 663: \insn2 -664: .popsection +664: .previous .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) .endif @@ -160,7 +160,7 @@ static inline void apply_alternatives_module(void *start, size_t length) { } .pushsection .altinstructions, "a" altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f .popsection - .pushsection .altinstr_replacement, "ax" + .subsection 1 .align 2 /* So GAS knows label 661 is suitably aligned */ 661: .endm @@ -179,9 +179,9 @@ static inline void apply_alternatives_module(void *start, size_t length) { } .macro alternative_else 662: .if .Lasm_alt_mode==0 - .pushsection .altinstr_replacement, "ax" + .subsection 1 .else - .popsection + .previous .endif 663: .endm @@ -192,7 +192,7 @@ static inline void apply_alternatives_module(void *start, size_t length) { } .macro alternative_endif 664: .if .Lasm_alt_mode==0 - .popsection + .previous .endif .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 6827da7f3aa5..5423ffe0a987 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -165,9 +165,6 @@ SECTIONS *(.altinstructions) __alt_instructions_end = .; } - .altinstr_replacement : { - *(.altinstr_replacement) - } . = ALIGN(SEGMENT_ALIGN); __inittext_end = .; -- cgit v1.2.3 From 73f9941306d5ce030f3ffc7db425c7b2a798cf8e Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 4 Jun 2020 13:37:10 -0700 Subject: xtensa: fix __sync_fetch_and_{and,or}_4 declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building xtensa kernel with gcc-10 produces the following warnings: arch/xtensa/kernel/xtensa_ksyms.c:90:15: warning: conflicting types for built-in function ‘__sync_fetch_and_and_4’; expected ‘unsigned int(volatile void *, unsigned int)’ [-Wbuiltin-declaration-mismatch] arch/xtensa/kernel/xtensa_ksyms.c:96:15: warning: conflicting types for built-in function ‘__sync_fetch_and_or_4’; expected ‘unsigned int(volatile void *, unsigned int)’ [-Wbuiltin-declaration-mismatch] Fix declarations of these functions to avoid the warning. Signed-off-by: Max Filippov --- arch/xtensa/kernel/xtensa_ksyms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 4092555828b1..24cf6972eace 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -87,13 +87,13 @@ void __xtensa_libgcc_window_spill(void) } EXPORT_SYMBOL(__xtensa_libgcc_window_spill); -unsigned long __sync_fetch_and_and_4(unsigned long *p, unsigned long v) +unsigned int __sync_fetch_and_and_4(volatile void *p, unsigned int v) { BUG(); } EXPORT_SYMBOL(__sync_fetch_and_and_4); -unsigned long __sync_fetch_and_or_4(unsigned long *p, unsigned long v) +unsigned int __sync_fetch_and_or_4(volatile void *p, unsigned int v) { BUG(); } -- cgit v1.2.3 From 0d5ab144429e8bd80889b856a44d56ab4a5cd59b Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 2 Jul 2020 08:32:25 -0700 Subject: xtensa: update *pos in cpuinfo_op.next Increment *pos in the cpuinfo_op.next to fix the following warning triggered by cat /proc/cpuinfo: seq_file: buggy .next function c_next did not update position index Signed-off-by: Max Filippov --- arch/xtensa/kernel/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index d9204dc2656e..be2c78f71695 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -724,7 +724,8 @@ c_start(struct seq_file *f, loff_t *pos) static void * c_next(struct seq_file *f, void *v, loff_t *pos) { - return NULL; + ++*pos; + return c_start(f, pos); } static void -- cgit v1.2.3 From dce4f2807f6920bb907eb4cd4f95e7f42d918bc6 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Tue, 30 Jun 2020 23:30:53 +0530 Subject: arm64: Add MIDR value for KRYO4XX gold CPU cores Add MIDR value for KRYO4XX gold/big CPU cores which are used in Qualcomm Technologies, Inc. SoCs. This will be used to identify and apply erratum which are applicable for these CPU cores. Signed-off-by: Sai Prakash Ranjan Link: https://lore.kernel.org/r/9093fb82e22441076280ca1b729242ffde80c432.1593539394.git.saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index a87a93f67671..7219cddeba66 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define QCOM_CPU_PART_FALKOR 0xC00 #define QCOM_CPU_PART_KRYO 0x200 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 +#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 #define NVIDIA_CPU_PART_DENVER 0x003 @@ -114,6 +115,7 @@ #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) +#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) -- cgit v1.2.3 From a9e821b89daa55cc940c546b124101939d3f0451 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Tue, 30 Jun 2020 23:30:54 +0530 Subject: arm64: Add KRYO4XX gold CPU cores to erratum list 1463225 and 1418040 KRYO4XX gold/big CPU core revisions r0p0 to r3p1 are affected by erratum 1463225 and 1418040, so add them to the respective list. The variant and revision bits are implementation defined and are different from the their Cortex CPU counterparts on which they are based on, i.e., (r0p0 to r3p1) is equivalent to (rcpe to rfpf). Signed-off-by: Sai Prakash Ranjan Link: https://lore.kernel.org/r/83780e80c6377c12ca51b5d53186b61241685e49.1593539394.git.saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 4 ++++ arch/arm64/kernel/cpu_errata.c | 19 +++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 936cf2a59ca4..f3c0c4393e7e 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -147,6 +147,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 | +----------------+-----------------+-----------------+-----------------------------+ +| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1463225 | ++----------------+-----------------+-----------------+-----------------------------+ +| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1418040 | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index cf50c53e9357..044f1d7aebdf 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -472,12 +472,7 @@ static bool has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry, int scope) { - u32 midr = read_cpuid_id(); - /* Cortex-A76 r0p0 - r3p1 */ - struct midr_range range = MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1); - - WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return is_midr_in_range(midr, &range) && is_kernel_in_hyp_mode(); + return is_affected_midr_range_list(entry, scope) && is_kernel_in_hyp_mode(); } #endif @@ -728,6 +723,8 @@ static const struct midr_range erratum_1418040_list[] = { MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1), /* Neoverse-N1 r0p0 to r3p1 */ MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 0, 3, 1), + /* Kryo4xx Gold (rcpe to rfpf) => (r0p0 to r3p1) */ + MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xf), {}, }; #endif @@ -777,6 +774,15 @@ static const struct midr_range erratum_speculative_at_list[] = { }; #endif +#ifdef CONFIG_ARM64_ERRATUM_1463225 +static const struct midr_range erratum_1463225[] = { + /* Cortex-A76 r0p0 - r3p1 */ + MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1), + /* Kryo4xx Gold (rcpe to rfpf) => (r0p0 to r3p1) */ + MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xf), +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -916,6 +922,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_1463225, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = has_cortex_a76_erratum_1463225, + .midr_range_list = erratum_1463225, }, #endif #ifdef CONFIG_CAVIUM_TX2_ERRATUM_219 -- cgit v1.2.3 From 9b23d95c539ebc5d6d6b5d6f20d2d7922384e76e Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Tue, 30 Jun 2020 23:30:55 +0530 Subject: arm64: Add KRYO4XX silver CPU cores to erratum list 1530923 and 1024718 KRYO4XX silver/LITTLE CPU cores with revision r1p0 are affected by erratum 1530923 and 1024718, so add them to the respective list. The variant and revision bits are implementation defined and are different from the their Cortex CPU counterparts on which they are based on, i.e., r1p0 is equivalent to rdpe. Signed-off-by: Sai Prakash Ranjan Link: https://lore.kernel.org/r/7013e8a3f857ca7e82863cc9e34a614293d7f80c.1593539394.git.saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 4 ++++ arch/arm64/kernel/cpu_errata.c | 2 ++ arch/arm64/kernel/cpufeature.c | 2 ++ 3 files changed, 8 insertions(+) (limited to 'arch') diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index f3c0c4393e7e..3f7c3a7e8a2b 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -151,6 +151,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ +| Qualcomm Tech. | Kryo4xx Silver | N/A | ARM64_ERRATUM_1530923 | ++----------------+-----------------+-----------------+-----------------------------+ +| Qualcomm Tech. | Kryo4xx Silver | N/A | ARM64_ERRATUM_1024718 | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 044f1d7aebdf..8e302dc093d0 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -769,6 +769,8 @@ static const struct midr_range erratum_speculative_at_list[] = { #ifdef CONFIG_ARM64_ERRATUM_1530923 /* Cortex A55 r0p0 to r2p0 */ MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 2, 0), + /* Kryo4xx Silver (rdpe => r1p0) */ + MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe), #endif {}, }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9f63053a63a9..9fae0efc80c1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1408,6 +1408,8 @@ static bool cpu_has_broken_dbm(void) static const struct midr_range cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_1024718 MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0), // A55 r0p0 -r1p0 + /* Kryo4xx Silver (rdpe => r1p0) */ + MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe), #endif {}, }; -- cgit v1.2.3 From d74fcfc1f0ff4b6c26ecef1f9e48d8089ab4eaac Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 2 Jul 2020 19:17:14 -0700 Subject: KVM: x86: Inject #GP if guest attempts to toggle CR4.LA57 in 64-bit mode Inject a #GP on MOV CR4 if CR4.LA57 is toggled in 64-bit mode, which is illegal per Intel's SDM: CR4.LA57 57-bit linear addresses (bit 12 of CR4) ... blah blah blah ... This bit cannot be modified in IA-32e mode. Note, the pseudocode for MOV CR doesn't call out the fault condition, which is likely why the check was missed during initial development. This is arguably an SDM bug and will hopefully be fixed in future release of the SDM. Fixes: fd8cb433734ee ("KVM: MMU: Expose the LA57 feature to VM.") Cc: stable@vger.kernel.org Reported-by: Sebastien Boeuf Signed-off-by: Sean Christopherson Message-Id: <20200703021714.5549-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a026d926072c..88c593f83b28 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -975,6 +975,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (is_long_mode(vcpu)) { if (!(cr4 & X86_CR4_PAE)) return 1; + if ((cr4 ^ old_cr4) & X86_CR4_LA57) + return 1; } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) && ((cr4 ^ old_cr4) & pdptr_bits) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, -- cgit v1.2.3 From 7c83d096aed055a7763a03384f92115363448b71 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 2 Jul 2020 21:04:21 -0700 Subject: KVM: x86: Mark CR4.TSD as being possibly owned by the guest Mark CR4.TSD as being possibly owned by the guest as that is indeed the case on VMX. Without TSD being tagged as possibly owned by the guest, a targeted read of CR4 to get TSD could observe a stale value. This bug is benign in the current code base as the sole consumer of TSD is the emulator (for RDTSC) and the emulator always "reads" the entirety of CR4 when grabbing bits. Add a build-time assertion in to ensure VMX doesn't hand over more CR4 bits without also updating x86. Fixes: 52ce3c21aec3 ("x86,kvm,vmx: Don't trap writes to CR4.TSD") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20200703040422.31536-2-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm_cache_regs.h | 2 +- arch/x86/kvm/vmx/vmx.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index ff2d0e9ca3bc..cfe83d4ae625 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -7,7 +7,7 @@ #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS #define KVM_POSSIBLE_CR4_GUEST_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE | X86_CR4_TSD) #define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cb22f33bf1d8..5c9bfc0b9ab9 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4034,6 +4034,8 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) { + BUILD_BUG_ON(KVM_CR4_GUEST_OWNED_BITS & ~KVM_POSSIBLE_CR4_GUEST_BITS); + vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; if (enable_ept) vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; -- cgit v1.2.3 From fa71e9527f6a0153ae6a880031b902818af1bdaf Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 2 Jul 2020 21:04:22 -0700 Subject: KVM: VMX: Use KVM_POSSIBLE_CR*_GUEST_BITS to initialize guest/host masks Use the "common" KVM_POSSIBLE_CR*_GUEST_BITS defines to initialize the CR0/CR4 guest host masks instead of duplicating most of the CR4 mask and open coding the CR0 mask. SVM doesn't utilize the masks, i.e. the masks are effectively VMX specific even if they're not named as such. This avoids duplicate code, better documents the guest owned CR0 bit, and eliminates the need for a build-time assertion to keep VMX and x86 synchronized. Signed-off-by: Sean Christopherson Message-Id: <20200703040422.31536-3-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 4 ++-- arch/x86/kvm/vmx/vmx.c | 15 +++++---------- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d1af20b050a8..b26655104d4a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4109,7 +4109,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * CR0_GUEST_HOST_MASK is already set in the original vmcs01 * (KVM doesn't change it); */ - vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; + vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; vmx_set_cr0(vcpu, vmcs12->host_cr0); /* Same as above - no reason to call set_cr4_guest_host_mask(). */ @@ -4259,7 +4259,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) */ vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); - vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; + vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5c9bfc0b9ab9..13745f2a5ecd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -133,9 +133,6 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); #define KVM_VM_CR0_ALWAYS_ON \ (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) -#define KVM_CR4_GUEST_OWNED_BITS \ - (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) #define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) @@ -4034,11 +4031,9 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) { - BUILD_BUG_ON(KVM_CR4_GUEST_OWNED_BITS & ~KVM_POSSIBLE_CR4_GUEST_BITS); - - vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; - if (enable_ept) - vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; + vmx->vcpu.arch.cr4_guest_owned_bits = KVM_POSSIBLE_CR4_GUEST_BITS; + if (!enable_ept) + vmx->vcpu.arch.cr4_guest_owned_bits &= ~X86_CR4_PGE; if (is_guest_mode(&vmx->vcpu)) vmx->vcpu.arch.cr4_guest_owned_bits &= ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask; @@ -4335,8 +4330,8 @@ static void init_vmcs(struct vcpu_vmx *vmx) /* 22.2.1, 20.8.1 */ vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); - vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; - vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); + vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; + vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits); set_cr4_guest_host_mask(vmx); -- cgit v1.2.3 From a3a66c3822e03692ed7c5888e8f2d384cc698d34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Jul 2020 15:15:27 -0700 Subject: vmalloc: fix the owner argument for the new __vmalloc_node_range callers Fix the recently added new __vmalloc_node_range callers to pass the correct values as the owner for display in /proc/vmallocinfo. Fixes: 800e26b81311 ("x86/hyperv: allocate the hypercall page with only read and execute bits") Fixes: 10d5e97c1bf8 ("arm64: use PAGE_KERNEL_ROX directly in alloc_insn_page") Fixes: 7a0e27b2a0ce ("mm: remove vmalloc_exec") Reported-by: Ard Biesheuvel Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/20200627075649.2455097-1-hch@lst.de Signed-off-by: Linus Torvalds --- arch/arm64/kernel/probes/kprobes.c | 2 +- arch/x86/hyperv/hv_init.c | 3 ++- kernel/module.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index cbe49cd117cf..5290f17a4d80 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -122,7 +122,7 @@ void *alloc_insn_page(void) { return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __func__); + NUMA_NO_NODE, __builtin_return_address(0)); } /* arm kprobe: install breakpoint in text */ diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 2bdc72e6890e..6035df1b49e1 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -377,7 +377,8 @@ void __init hyperv_init(void) hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, - VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __func__); + VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + __builtin_return_address(0)); if (hv_hypercall_pg == NULL) { wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); goto remove_cpuhp_state; diff --git a/kernel/module.c b/kernel/module.c index 0c6573b98c36..bee1c25ca5c5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2785,7 +2785,7 @@ void * __weak module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __func__); + NUMA_NO_NODE, __builtin_return_address(0)); } bool __weak module_init_section(const char *name) -- cgit v1.2.3 From db5b2c5a90a111618f071d231a8b945cf522313e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Jul 2020 10:02:53 -0700 Subject: x86/entry/compat: Clear RAX high bits on Xen PV SYSENTER Move the clearing of the high bits of RAX after Xen PV joins the SYSENTER path so that Xen PV doesn't skip it. Arguably this code should be deleted instead, but that would belong in the merge window. Fixes: ffae641f5747 ("x86/entry/64/compat: Fix Xen PV SYSENTER frame setup") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/9d33b3f3216dcab008070f1c28b6091ae7199969.1593795633.git.luto@kernel.org --- arch/x86/entry/entry_64_compat.S | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 381a6de7de9c..541fdaf64045 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -57,15 +57,6 @@ SYM_CODE_START(entry_SYSENTER_compat) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - /* - * User tracing code (ptrace or signal handlers) might assume that - * the saved RAX contains a 32-bit number when we're invoking a 32-bit - * syscall. Just in case the high bits are nonzero, zero-extend - * the syscall number. (This could almost certainly be deleted - * with no ill effects.) - */ - movl %eax, %eax - /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ pushq $0 /* pt_regs->sp = 0 (placeholder) */ @@ -80,6 +71,16 @@ SYM_CODE_START(entry_SYSENTER_compat) pushq $__USER32_CS /* pt_regs->cs */ pushq $0 /* pt_regs->ip = 0 (placeholder) */ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) + + /* + * User tracing code (ptrace or signal handlers) might assume that + * the saved RAX contains a 32-bit number when we're invoking a 32-bit + * syscall. Just in case the high bits are nonzero, zero-extend + * the syscall number. (This could almost certainly be deleted + * with no ill effects.) + */ + movl %eax, %eax + pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ -- cgit v1.2.3 From 3c73b81a9164d0c1b6379d6672d2772a9e95168e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Jul 2020 10:02:54 -0700 Subject: x86/entry, selftests: Further improve user entry sanity checks Chasing down a Xen bug caused me to realize that the new entry sanity checks are still fairly weak. Add some more checks. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/881de09e786ab93ce56ee4a2437ba2c308afe7a9.1593795633.git.luto@kernel.org --- arch/x86/entry/common.c | 19 +++++++++++++++++++ tools/testing/selftests/x86/syscall_nt.c | 11 +++++++++++ 2 files changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index f392a8bcd1c3..e83b3f14897c 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -49,6 +49,23 @@ static void check_user_regs(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) { + /* + * Make sure that the entry code gave us a sensible EFLAGS + * register. Native because we want to check the actual CPU + * state, not the interrupt state as imagined by Xen. + */ + unsigned long flags = native_save_fl(); + WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF | + X86_EFLAGS_NT)); + + /* We think we came from user mode. Make sure pt_regs agrees. */ + WARN_ON_ONCE(!user_mode(regs)); + + /* + * All entries from user mode (except #DF) should be on the + * normal thread stack and should have user pt_regs in the + * correct location. + */ WARN_ON_ONCE(!on_thread_stack()); WARN_ON_ONCE(regs != task_pt_regs(current)); } @@ -577,6 +594,7 @@ SYSCALL_DEFINE0(ni_syscall) bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) { if (user_mode(regs)) { + check_user_regs(regs); enter_from_user_mode(); return false; } @@ -710,6 +728,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit) */ void noinstr idtentry_enter_user(struct pt_regs *regs) { + check_user_regs(regs); enter_from_user_mode(); } diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c index 970e5e14d96d..a108b80dd082 100644 --- a/tools/testing/selftests/x86/syscall_nt.c +++ b/tools/testing/selftests/x86/syscall_nt.c @@ -81,5 +81,16 @@ int main(void) printf("[RUN]\tSet NT|AC|TF and issue a syscall\n"); do_it(X86_EFLAGS_NT | X86_EFLAGS_AC | X86_EFLAGS_TF); + /* + * Now try DF. This is evil and it's plausible that we will crash + * glibc, but glibc would have to do something rather surprising + * for this to happen. + */ + printf("[RUN]\tSet DF and issue a syscall\n"); + do_it(X86_EFLAGS_DF); + + printf("[RUN]\tSet TF|DF and issue a syscall\n"); + do_it(X86_EFLAGS_TF | X86_EFLAGS_DF); + return nerrs == 0 ? 0 : 1; } -- cgit v1.2.3 From f41f0824224eb12ad84de8972962dd54be5abe3b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Jul 2020 10:02:55 -0700 Subject: x86/entry/xen: Route #DB correctly on Xen PV On Xen PV, #DB doesn't use IST. It still needs to be correctly routed depending on whether it came from user or kernel mode. Get rid of DECLARE/DEFINE_IDTENTRY_XEN -- it was too hard to follow the logic. Instead, route #DB and NMI through DECLARE/DEFINE_IDTENTRY_RAW on Xen, and do the right thing for #DB. Also add more warnings to the exc_debug* handlers to make this type of failure more obvious. This fixes various forms of corruption that happen when usermode triggers #DB on Xen PV. Fixes: 4c0dcd8350a0 ("x86/entry: Implement user mode C entry points for #DB and #MCE") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/4163e733cce0b41658e252c6c6b3464f33fdff17.1593795633.git.luto@kernel.org --- arch/x86/include/asm/idtentry.h | 24 ++++++------------------ arch/x86/kernel/traps.c | 12 ++++++++++++ arch/x86/xen/enlighten_pv.c | 28 ++++++++++++++++++++++++---- arch/x86/xen/xen-asm_64.S | 5 ++--- 4 files changed, 44 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index cf51c50eb356..94333ac3092b 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -398,18 +398,6 @@ __visible noinstr void func(struct pt_regs *regs, \ #define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST #define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST -/** - * DECLARE_IDTENTRY_XEN - Declare functions for XEN redirect IDT entry points - * @vector: Vector number (ignored for C) - * @func: Function name of the entry point - * - * Used for xennmi and xendebug redirections. No DEFINE as this is all ASM - * indirection magic. - */ -#define DECLARE_IDTENTRY_XEN(vector, func) \ - asmlinkage void xen_asm_exc_xen##func(void); \ - asmlinkage void asm_exc_xen##func(void) - #else /* !__ASSEMBLY__ */ /* @@ -469,10 +457,6 @@ __visible noinstr void func(struct pt_regs *regs, \ /* No ASM code emitted for NMI */ #define DECLARE_IDTENTRY_NMI(vector, func) -/* XEN NMI and DB wrapper */ -#define DECLARE_IDTENTRY_XEN(vector, func) \ - idtentry vector asm_exc_xen##func exc_##func has_error_code=0 - /* * ASM code to emit the common vector entry stubs where each stub is * packed into 8 bytes. @@ -570,11 +554,15 @@ DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); /* NMI */ DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi); -DECLARE_IDTENTRY_XEN(X86_TRAP_NMI, nmi); +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi); +#endif /* #DB */ DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug); -DECLARE_IDTENTRY_XEN(X86_TRAP_DB, debug); +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_RAW(X86_TRAP_DB, xenpv_exc_debug); +#endif /* #DF */ DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f9727b96961f..c17f9b57171f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -865,6 +865,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, instrumentation_begin(); trace_hardirqs_off_finish(); + /* + * If something gets miswired and we end up here for a user mode + * #DB, we will malfunction. + */ + WARN_ON_ONCE(user_mode(regs)); + /* * Catch SYSENTER with TF set and clear DR_STEP. If this hit a * watchpoint at the same time then that will still be handled. @@ -883,6 +889,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, static __always_inline void exc_debug_user(struct pt_regs *regs, unsigned long dr6) { + /* + * If something gets miswired and we end up here for a kernel mode + * #DB, we will malfunction. + */ + WARN_ON_ONCE(!user_mode(regs)); + idtentry_enter_user(regs); instrumentation_begin(); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index acc49fa6a097..0d68948c82ad 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -598,6 +598,26 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, } #ifdef CONFIG_X86_64 +void noist_exc_debug(struct pt_regs *regs); + +DEFINE_IDTENTRY_RAW(xenpv_exc_nmi) +{ + /* On Xen PV, NMI doesn't use IST. The C part is the sane as native. */ + exc_nmi(regs); +} + +DEFINE_IDTENTRY_RAW(xenpv_exc_debug) +{ + /* + * There's no IST on Xen PV, but we still need to dispatch + * to the correct handler. + */ + if (user_mode(regs)) + noist_exc_debug(regs); + else + exc_debug(regs); +} + struct trap_array_entry { void (*orig)(void); void (*xen)(void); @@ -609,18 +629,18 @@ struct trap_array_entry { .xen = xen_asm_##func, \ .ist_okay = ist_ok } -#define TRAP_ENTRY_REDIR(func, xenfunc, ist_ok) { \ +#define TRAP_ENTRY_REDIR(func, ist_ok) { \ .orig = asm_##func, \ - .xen = xen_asm_##xenfunc, \ + .xen = xen_asm_xenpv_##func, \ .ist_okay = ist_ok } static struct trap_array_entry trap_array[] = { - TRAP_ENTRY_REDIR(exc_debug, exc_xendebug, true ), + TRAP_ENTRY_REDIR(exc_debug, true ), TRAP_ENTRY(exc_double_fault, true ), #ifdef CONFIG_X86_MCE TRAP_ENTRY(exc_machine_check, true ), #endif - TRAP_ENTRY_REDIR(exc_nmi, exc_xennmi, true ), + TRAP_ENTRY_REDIR(exc_nmi, true ), TRAP_ENTRY(exc_int3, false ), TRAP_ENTRY(exc_overflow, false ), #ifdef CONFIG_IA32_EMULATION diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index e1e1c7eafa60..aab1d99b2b48 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -29,10 +29,9 @@ _ASM_NOKPROBE(xen_\name) .endm xen_pv_trap asm_exc_divide_error -xen_pv_trap asm_exc_debug -xen_pv_trap asm_exc_xendebug +xen_pv_trap asm_xenpv_exc_debug xen_pv_trap asm_exc_int3 -xen_pv_trap asm_exc_xennmi +xen_pv_trap asm_xenpv_exc_nmi xen_pv_trap asm_exc_overflow xen_pv_trap asm_exc_bounds xen_pv_trap asm_exc_invalid_op -- cgit v1.2.3 From 13cbc0cd4a30c815984ad88e3a2e5976493516a3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Jul 2020 10:02:56 -0700 Subject: x86/entry/32: Fix #MC and #DB wiring on x86_32 DEFINE_IDTENTRY_MCE and DEFINE_IDTENTRY_DEBUG were wired up as non-RAW on x86_32, but the code expected them to be RAW. Get rid of all the macro indirection for them on 32-bit and just use DECLARE_IDTENTRY_RAW and DEFINE_IDTENTRY_RAW directly. Also add a warning to make sure that we only hit the _kernel paths in kernel mode. Reported-by: Naresh Kamboju Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/9e90a7ee8e72fd757db6d92e1e5ff16339c1ecf9.1593795633.git.luto@kernel.org --- arch/x86/include/asm/idtentry.h | 23 +++++++++++++---------- arch/x86/kernel/cpu/mce/core.c | 4 +++- arch/x86/kernel/traps.c | 2 +- 3 files changed, 17 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 94333ac3092b..eeac6dc2adaa 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -353,10 +353,6 @@ static __always_inline void __##func(struct pt_regs *regs) #else /* CONFIG_X86_64 */ -/* Maps to a regular IDTENTRY on 32bit for now */ -# define DECLARE_IDTENTRY_IST DECLARE_IDTENTRY -# define DEFINE_IDTENTRY_IST DEFINE_IDTENTRY - /** * DECLARE_IDTENTRY_DF - Declare functions for double fault 32bit variant * @vector: Vector number (ignored for C) @@ -387,16 +383,18 @@ __visible noinstr void func(struct pt_regs *regs, \ #endif /* !CONFIG_X86_64 */ /* C-Code mapping */ +#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW +#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW + +#ifdef CONFIG_X86_64 #define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST #define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST #define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST -#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW -#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW - #define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST #define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST #define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST +#endif #else /* !__ASSEMBLY__ */ @@ -443,9 +441,6 @@ __visible noinstr void func(struct pt_regs *regs, \ # define DECLARE_IDTENTRY_MCE(vector, func) \ DECLARE_IDTENTRY(vector, func) -# define DECLARE_IDTENTRY_DEBUG(vector, func) \ - DECLARE_IDTENTRY(vector, func) - /* No ASM emitted for DF as this goes through a C shim */ # define DECLARE_IDTENTRY_DF(vector, func) @@ -549,7 +544,11 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); #ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_64 DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); +#else +DECLARE_IDTENTRY_RAW(X86_TRAP_MC, exc_machine_check); +#endif #endif /* NMI */ @@ -559,7 +558,11 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi); #endif /* #DB */ +#ifdef CONFIG_X86_64 DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug); +#else +DECLARE_IDTENTRY_RAW(X86_TRAP_DB, exc_debug); +#endif #ifdef CONFIG_XEN_PV DECLARE_IDTENTRY_RAW(X86_TRAP_DB, xenpv_exc_debug); #endif diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index ce9120c4f740..a6a90b5d7c83 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1901,6 +1901,8 @@ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check; static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) { + WARN_ON_ONCE(user_mode(regs)); + /* * Only required when from kernel mode. See * mce_check_crashing_cpu() for details. @@ -1954,7 +1956,7 @@ DEFINE_IDTENTRY_MCE_USER(exc_machine_check) } #else /* 32bit unified entry point */ -DEFINE_IDTENTRY_MCE(exc_machine_check) +DEFINE_IDTENTRY_RAW(exc_machine_check) { unsigned long dr7; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c17f9b57171f..6ed8cc5fbe8f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -925,7 +925,7 @@ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) } #else /* 32 bit does not have separate entry points. */ -DEFINE_IDTENTRY_DEBUG(exc_debug) +DEFINE_IDTENTRY_RAW(exc_debug) { unsigned long dr6, dr7; -- cgit v1.2.3 From cc801833a171163edb6385425349ba8903bd1b20 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Jul 2020 10:02:57 -0700 Subject: x86/ldt: Disable 16-bit segments on Xen PV Xen PV doesn't implement ESPFIX64, so they don't work right. Disable them. Also print a warning the first time anyone tries to use a 16-bit segment on a Xen PV guest that would otherwise allow it to help people diagnose this change in behavior. This gets us closer to having all x86 selftests pass on Xen PV. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/92b2975459dfe5929ecf34c3896ad920bd9e3f2d.1593795633.git.luto@kernel.org --- arch/x86/kernel/ldt.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 8748321c4486..34e918ad34d4 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -29,6 +29,8 @@ #include #include +#include + /* This is a multiple of PAGE_SIZE. */ #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) @@ -543,6 +545,37 @@ static int read_default_ldt(void __user *ptr, unsigned long bytecount) return bytecount; } +static bool allow_16bit_segments(void) +{ + if (!IS_ENABLED(CONFIG_X86_16BIT)) + return false; + +#ifdef CONFIG_XEN_PV + /* + * Xen PV does not implement ESPFIX64, which means that 16-bit + * segments will not work correctly. Until either Xen PV implements + * ESPFIX64 and can signal this fact to the guest or unless someone + * provides compelling evidence that allowing broken 16-bit segments + * is worthwhile, disallow 16-bit segments under Xen PV. + */ + if (xen_pv_domain()) { + static DEFINE_MUTEX(xen_warning); + static bool warned; + + mutex_lock(&xen_warning); + if (!warned) { + pr_info("Warning: 16-bit segments do not work correctly in a Xen PV guest\n"); + warned = true; + } + mutex_unlock(&xen_warning); + + return false; + } +#endif + + return true; +} + static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) { struct mm_struct *mm = current->mm; @@ -574,7 +607,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) /* The user wants to clear the entry. */ memset(&ldt, 0, sizeof(ldt)); } else { - if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { + if (!ldt_info.seg_32bit && !allow_16bit_segments()) { error = -EINVAL; goto out; } -- cgit v1.2.3 From fcec538ef8cca0ad0b84432235dccd9059c8e6f8 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Fri, 3 Jul 2020 00:53:34 +0200 Subject: MIPS: Add missing EHB in mtc0 -> mfc0 sequence for DSPen This resolves the hazard between the mtc0 in the change_c0_status() and the mfc0 in configure_exception_vector(). Without resolving this hazard configure_exception_vector() could read an old value and would restore this old value again. This would revert the changes change_c0_status() did. I checked this by printing out the read_c0_status() at the end of per_cpu_trap_init() and the ST0_MX is not set without this patch. The hazard is documented in the MIPS Architecture Reference Manual Vol. III: MIPS32/microMIPS32 Privileged Resource Architecture (MD00088), rev 6.03 table 8.1 which includes: Producer | Consumer | Hazard ----------|----------|---------------------------- mtc0 | mfc0 | any coprocessor 0 register I saw this hazard on an Atheros AR9344 rev 2 SoC with a MIPS 74Kc CPU. There the change_c0_status() function would activate the DSPen by setting ST0_MX in the c0_status register. This was reverted and then the system got a DSP exception when the DSP registers were saved in save_dsp() in the first process switch. The crash looks like this: [ 0.089999] Mount-cache hash table entries: 1024 (order: 0, 4096 bytes, linear) [ 0.097796] Mountpoint-cache hash table entries: 1024 (order: 0, 4096 bytes, linear) [ 0.107070] Kernel panic - not syncing: Unexpected DSP exception [ 0.113470] Rebooting in 1 seconds.. We saw this problem in OpenWrt only on the MIPS 74Kc based Atheros SoCs, not on the 24Kc based SoCs. We only saw it with kernel 5.4 not with kernel 4.19, in addition we had to use GCC 8.4 or 9.X, with GCC 8.3 it did not happen. In the kernel I bisected this problem to commit 9012d011660e ("compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING"), but when this was reverted it also happened after commit 172dcd935c34b ("MIPS: Always allocate exception vector for MIPSr2+"). Commit 0b24cae4d535 ("MIPS: Add missing EHB in mtc0 -> mfc0 sequence.") does similar changes to a different file. I am not sure if there are more places affected by this problem. Signed-off-by: Hauke Mehrtens Cc: Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/traps.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 7c32c956156a..1234ea21dd8f 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2169,6 +2169,7 @@ static void configure_status(void) change_c0_status(ST0_CU|ST0_MX|ST0_RE|ST0_FR|ST0_BEV|ST0_TS|ST0_KX|ST0_SX|ST0_UX, status_set); + back_to_back_c0_hazard(); } unsigned int hwrena; -- cgit v1.2.3 From 5868347a192afb99b189d72946ab6a321b6115ac Mon Sep 17 00:00:00 2001 From: Xingxing Su Date: Fri, 3 Jul 2020 12:11:58 +0800 Subject: MIPS: Do not use smp_processor_id() in preemptible code Use preempt_disable() to fix the following bug under CONFIG_DEBUG_PREEMPT. [ 21.915305] BUG: using smp_processor_id() in preemptible [00000000] code: qemu-system-mip/1056 [ 21.923996] caller is do_ri+0x1d4/0x690 [ 21.927921] CPU: 0 PID: 1056 Comm: qemu-system-mip Not tainted 5.8.0-rc2 #3 [ 21.934913] Stack : 0000000000000001 ffffffff81370000 ffffffff8071cd60 a80f926d5ac95694 [ 21.942984] a80f926d5ac95694 0000000000000000 98000007f0043c88 ffffffff80f2fe40 [ 21.951054] 0000000000000000 0000000000000000 0000000000000001 0000000000000000 [ 21.959123] ffffffff802d60cc 98000007f0043dd8 ffffffff81f4b1e8 ffffffff81f60000 [ 21.967192] ffffffff81f60000 ffffffff80fe0000 ffff000000000000 0000000000000000 [ 21.975261] fffffffff500cce1 0000000000000001 0000000000000002 0000000000000000 [ 21.983331] ffffffff80fe1a40 0000000000000006 ffffffff8077f940 0000000000000000 [ 21.991401] ffffffff81460000 98000007f0040000 98000007f0043c80 000000fffba8cf20 [ 21.999471] ffffffff8071cd60 0000000000000000 0000000000000000 0000000000000000 [ 22.007541] 0000000000000000 0000000000000000 ffffffff80212ab4 a80f926d5ac95694 [ 22.015610] ... [ 22.018086] Call Trace: [ 22.020562] [] show_stack+0xa4/0x138 [ 22.025732] [] dump_stack+0xf0/0x150 [ 22.030903] [] check_preemption_disabled+0xf4/0x100 [ 22.037375] [] do_ri+0x1d4/0x690 [ 22.042198] [] handle_ri_int+0x44/0x5c [ 24.359386] BUG: using smp_processor_id() in preemptible [00000000] code: qemu-system-mip/1072 [ 24.368204] caller is do_ri+0x1a8/0x690 [ 24.372169] CPU: 4 PID: 1072 Comm: qemu-system-mip Not tainted 5.8.0-rc2 #3 [ 24.379170] Stack : 0000000000000001 ffffffff81370000 ffffffff8071cd60 a80f926d5ac95694 [ 24.387246] a80f926d5ac95694 0000000000000000 98001007ef06bc88 ffffffff80f2fe40 [ 24.395318] 0000000000000000 0000000000000000 0000000000000001 0000000000000000 [ 24.403389] ffffffff802d60cc 98001007ef06bdd8 ffffffff81f4b818 ffffffff81f60000 [ 24.411461] ffffffff81f60000 ffffffff80fe0000 ffff000000000000 0000000000000000 [ 24.419533] fffffffff500cce1 0000000000000001 0000000000000002 0000000000000000 [ 24.427603] ffffffff80fe0000 0000000000000006 ffffffff8077f940 0000000000000020 [ 24.435673] ffffffff81460020 98001007ef068000 98001007ef06bc80 000000fffbbbb370 [ 24.443745] ffffffff8071cd60 0000000000000000 0000000000000000 0000000000000000 [ 24.451816] 0000000000000000 0000000000000000 ffffffff80212ab4 a80f926d5ac95694 [ 24.459887] ... [ 24.462367] Call Trace: [ 24.464846] [] show_stack+0xa4/0x138 [ 24.470029] [] dump_stack+0xf0/0x150 [ 24.475208] [] check_preemption_disabled+0xf4/0x100 [ 24.481682] [] do_ri+0x1a8/0x690 [ 24.486509] [] handle_ri_int+0x44/0x5c Signed-off-by: Xingxing Su Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/traps.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 1234ea21dd8f..f655af68176c 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -723,12 +723,14 @@ static int simulate_loongson3_cpucfg(struct pt_regs *regs, perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); /* Do not emulate on unsupported core models. */ - if (!loongson3_cpucfg_emulation_enabled(¤t_cpu_data)) + preempt_disable(); + if (!loongson3_cpucfg_emulation_enabled(¤t_cpu_data)) { + preempt_enable(); return -1; - + } regs->regs[rd] = loongson3_cpucfg_read_synthesized( ¤t_cpu_data, sel); - + preempt_enable(); return 0; } -- cgit v1.2.3 From a4c0e91d1d65bc58f928b80ed824e10e165da22c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Jul 2020 21:33:11 +0200 Subject: x86/entry/32: Fix XEN_PV build dependency xenpv_exc_nmi() and xenpv_exc_debug() are only defined on 64-bit kernels, but they snuck into the 32-bit build via , causing the link to fail: ld: arch/x86/entry/entry_32.o: in function `asm_xenpv_exc_nmi': (.entry.text+0x817): undefined reference to `xenpv_exc_nmi' ld: arch/x86/entry/entry_32.o: in function `asm_xenpv_exc_debug': (.entry.text+0x827): undefined reference to `xenpv_exc_debug' Only use them on 64-bit kernels. Fixes: f41f0824224e: ("x86/entry/xen: Route #DB correctly on Xen PV") Cc: Andy Lutomirski Cc: Thomas Gleixner Cc: Peter Zijlstra (Intel) Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/idtentry.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index eeac6dc2adaa..f3d70830bf2a 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -553,7 +553,7 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC, exc_machine_check); /* NMI */ DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi); -#ifdef CONFIG_XEN_PV +#if defined(CONFIG_XEN_PV) && defined(CONFIG_X86_64) DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi); #endif @@ -563,7 +563,7 @@ DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug); #else DECLARE_IDTENTRY_RAW(X86_TRAP_DB, exc_debug); #endif -#ifdef CONFIG_XEN_PV +#if defined(CONFIG_XEN_PV) && defined(CONFIG_X86_64) DECLARE_IDTENTRY_RAW(X86_TRAP_DB, xenpv_exc_debug); #endif -- cgit v1.2.3 From bb5a93aaf25261321db0c499cde7da6ee9d8b164 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 5 Jul 2020 12:50:20 -0700 Subject: x86/ldt: use "pr_info_once()" instead of open-coding it badly Using a mutex for "print this warning only once" is so overdesigned as to be actively offensive to my sensitive stomach. Just use "pr_info_once()" that already does this, although in a (harmlessly) racy manner that can in theory cause the message to be printed twice if more than one CPU races on that "is this the first time" test. [ If somebody really cares about that harmless data race (which sounds very unlikely indeed), that person can trivially fix printk_once() by using a simple atomic access, preferably with an optimistic non-atomic test first before even bothering to treat the pointless "make sure it is _really_ just once" case. A mutex is most definitely never the right primitive to use for something like this. ] Yes, this is a small and meaningless detail in a code path that hardly matters. But let's keep some code quality standards here, and not accept outrageously bad code. Link: https://lore.kernel.org/lkml/CAHk-=wgV9toS7GU3KmNpj8hCS9SeF+A0voHS8F275_mgLhL4Lw@mail.gmail.com/ Cc: Andy Lutomirski Cc: Thomas Gleixner Cc: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- arch/x86/kernel/ldt.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 34e918ad34d4..b8aee71840ae 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -559,16 +559,7 @@ static bool allow_16bit_segments(void) * is worthwhile, disallow 16-bit segments under Xen PV. */ if (xen_pv_domain()) { - static DEFINE_MUTEX(xen_warning); - static bool warned; - - mutex_lock(&xen_warning); - if (!warned) { - pr_info("Warning: 16-bit segments do not work correctly in a Xen PV guest\n"); - warned = true; - } - mutex_unlock(&xen_warning); - + pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n"); return false; } #endif -- cgit v1.2.3 From 146f76cc84b787c4eec6ed73ebeec708a06e4ae4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 4 Jul 2020 13:30:55 +0100 Subject: KVM: arm64: PMU: Fix per-CPU access in preemptible context Commit 07da1ffaa137 ("KVM: arm64: Remove host_cpu_context member from vcpu structure") has, by removing the host CPU context pointer, exposed that kvm_vcpu_pmu_restore_guest is called in preemptible contexts: [ 266.932442] BUG: using smp_processor_id() in preemptible [00000000] code: qemu-system-aar/779 [ 266.939721] caller is debug_smp_processor_id+0x20/0x30 [ 266.944157] CPU: 2 PID: 779 Comm: qemu-system-aar Tainted: G E 5.8.0-rc3-00015-g8d4aa58b2fe3 #1374 [ 266.954268] Hardware name: amlogic w400/w400, BIOS 2020.04 05/22/2020 [ 266.960640] Call trace: [ 266.963064] dump_backtrace+0x0/0x1e0 [ 266.966679] show_stack+0x20/0x30 [ 266.969959] dump_stack+0xe4/0x154 [ 266.973338] check_preemption_disabled+0xf8/0x108 [ 266.977978] debug_smp_processor_id+0x20/0x30 [ 266.982307] kvm_vcpu_pmu_restore_guest+0x2c/0x68 [ 266.986949] access_pmcr+0xf8/0x128 [ 266.990399] perform_access+0x8c/0x250 [ 266.994108] kvm_handle_sys_reg+0x10c/0x2f8 [ 266.998247] handle_exit+0x78/0x200 [ 267.001697] kvm_arch_vcpu_ioctl_run+0x2ac/0xab8 Note that the bug was always there, it is only the switch to using percpu accessors that made it obvious. The fix is to wrap these accesses in a preempt-disabled section, so that we sample a coherent context on trap from the guest. Fixes: 435e53fb5e21 ("arm64: KVM: Enable VHE support for :G/:H perf event modifiers") Cc:: Andrew Murray Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index b5ae3a5d509e..3c224162b3dd 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -159,7 +159,10 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events) } /* - * On VHE ensure that only guest events have EL0 counting enabled + * On VHE ensure that only guest events have EL0 counting enabled. + * This is called from both vcpu_{load,put} and the sysreg handling. + * Since the latter is preemptible, special care must be taken to + * disable preemption. */ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) { @@ -169,12 +172,14 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) if (!has_vhe()) return; + preempt_disable(); host = this_cpu_ptr(&kvm_host_data); events_guest = host->pmu_events.events_guest; events_host = host->pmu_events.events_host; kvm_vcpu_pmu_enable_el0(events_guest); kvm_vcpu_pmu_disable_el0(events_host); + preempt_enable(); } /* -- cgit v1.2.3 From b9e10d4a6c9f5cbe6369ce2c17ebc67d2e5a4be5 Mon Sep 17 00:00:00 2001 From: Andrew Scull Date: Mon, 6 Jul 2020 10:52:59 +0100 Subject: KVM: arm64: Stop clobbering x0 for HVC_SOFT_RESTART HVC_SOFT_RESTART is given values for x0-2 that it should installed before exiting to the new address so should not set x0 to stub HVC success or failure code. Fixes: af42f20480bf1 ("arm64: hyp-stub: Zero x0 on successful stub handling") Cc: stable@vger.kernel.org Signed-off-by: Andrew Scull Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200706095259.1338221-1-ascull@google.com --- arch/arm64/kvm/hyp-init.S | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 6e6ed5581eed..e76c0e89d48e 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -136,11 +136,15 @@ SYM_CODE_START(__kvm_handle_stub_hvc) 1: cmp x0, #HVC_RESET_VECTORS b.ne 1f -reset: + /* - * Reset kvm back to the hyp stub. Do not clobber x0-x4 in - * case we coming via HVC_SOFT_RESTART. + * Set the HVC_RESET_VECTORS return code before entering the common + * path so that we do not clobber x0-x2 in case we are coming via + * HVC_SOFT_RESTART. */ + mov x0, xzr +reset: + /* Reset kvm back to the hyp stub. */ mrs x5, sctlr_el2 mov_q x6, SCTLR_ELx_FLAGS bic x5, x5, x6 // Clear SCTL_M and etc @@ -151,7 +155,6 @@ reset: /* Install stub vectors */ adr_l x5, __hyp_stub_vectors msr vbar_el2, x5 - mov x0, xzr eret 1: /* Bad stub call */ -- cgit v1.2.3 From ee769ebbe9e5fc7219e979fb7c5ed5bb5722649e Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Wed, 8 Jul 2020 06:20:23 +0000 Subject: xtensa: simplify xtensa_pmu_irq_handler Use for_each_set_bit() instead of open-coding it to simplify the code. Signed-off-by: Xu Wang Message-Id: <20200708062023.7986-1-vulab@iscas.ac.cn> Signed-off-by: Max Filippov --- arch/xtensa/kernel/perf_event.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c index 9bae79f70301..99fcd63ce597 100644 --- a/arch/xtensa/kernel/perf_event.c +++ b/arch/xtensa/kernel/perf_event.c @@ -362,9 +362,7 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id) struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); unsigned i; - for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS); - i < XCHAL_NUM_PERF_COUNTERS; - i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) { + for_each_set_bit(i, ev->used_mask, XCHAL_NUM_PERF_COUNTERS) { uint32_t v = get_er(XTENSA_PMU_PMSTAT(i)); struct perf_event *event = ev->event[i]; struct hw_perf_event *hwc = &event->hw; -- cgit v1.2.3 From 4557ac6b344b8cdf948ff8b007e8e1de34832f2e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 8 Jul 2020 17:49:42 +1000 Subject: powerpc/64s/exception: Fix 0x1500 interrupt handler crash A typo caused the interrupt handler to branch immediately to the common "unknown interrupt" handler and skip the special case test for denormal cause. This does not affect KVM softpatch handling (e.g., for POWER9 TM assist) because the KVM test was moved to common code by commit 9600f261acaa ("powerpc/64s/exception: Move KVM test to common code") just before this bug was introduced. Fixes: 3f7fbd97d07d ("powerpc/64s/exception: Clean up SRR specifiers") Reported-by: Paul Menzel Signed-off-by: Nicholas Piggin Tested-by: Paul Menzel [mpe: Split selftest into a separate patch] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200708074942.1713396-1-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fa080694e581..0fc8bad878b2 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2551,7 +2551,7 @@ EXC_VIRT_NONE(0x5400, 0x100) INT_DEFINE_BEGIN(denorm_exception) IVEC=0x1500 IHSRR=1 - IBRANCH_COMMON=0 + IBRANCH_TO_COMMON=0 IKVM_REAL=1 INT_DEFINE_END(denorm_exception) -- cgit v1.2.3 From 6ec4476ac82512f09c94aff5972654b70f3772b2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Jul 2020 10:48:35 -0700 Subject: Raise gcc version requirement to 4.9 I realize that we fairly recently raised it to 4.8, but the fact is, 4.9 is a much better minimum version to target. We have a number of workarounds for actual bugs in pre-4.9 gcc versions (including things like internal compiler errors on ARM), but we also have some syntactic workarounds for lacking features. In particular, raising the minimum to 4.9 means that we can now just assume _Generic() exists, which is likely the much better replacement for a lot of very convoluted built-time magic with conditionals on sizeof and/or __builtin_choose_expr() with same_type() etc. Using _Generic also means that you will need to have a very recent version of 'sparse', but thats easy to build yourself, and much less of a hassle than some old gcc version can be. The latest (in a long string) of reasons for minimum compiler version upgrades was commit 5435f73d5c4a ("efi/x86: Fix build with gcc 4"). Ard points out that RHEL 7 uses gcc-4.8, but the people who stay back on old RHEL versions persumably also don't build their own kernels anyway. And maybe they should cross-built or just have a little side affair with a newer compiler? Acked-by: Ard Biesheuvel Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- arch/arm/kernel/asm-offsets.c | 9 --------- arch/mips/include/asm/unroll.h | 7 +++---- include/linux/bits.h | 3 +-- include/linux/compiler-gcc.h | 2 +- include/linux/compiler_types.h | 27 +-------------------------- mm/migrate.c | 13 +------------ tools/include/linux/bits.h | 3 +-- 7 files changed, 8 insertions(+), 56 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index c036a4a2f8e2..a1570c8bab25 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -31,15 +31,6 @@ #if defined(__APCS_26__) #error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32 #endif -/* - * GCC 4.8.0-4.8.2: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58854 - * miscompiles find_get_entry(), and can result in EXT3 and EXT4 - * filesystem corruption (possibly other FS too). - */ -#if defined(GCC_VERSION) && GCC_VERSION >= 40800 && GCC_VERSION < 40803 -#error Your compiler is too buggy; it is known to miscompile kernels -#error and result in filesystem corruption and oopses. -#endif int main(void) { diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h index c628747d4ecd..8ed660adc84f 100644 --- a/arch/mips/include/asm/unroll.h +++ b/arch/mips/include/asm/unroll.h @@ -19,14 +19,13 @@ \ /* \ * We can't unroll if the number of iterations isn't \ - * compile-time constant. Unfortunately GCC versions \ - * up until 4.6 tend to miss obvious constants & cause \ + * compile-time constant. Unfortunately clang versions \ + * up until 8.0 tend to miss obvious constants & cause \ * this check to fail, even though they go on to \ * generate reasonable code for the switch statement, \ * so we skip the sanity check for those compilers. \ */ \ - BUILD_BUG_ON((CONFIG_GCC_VERSION >= 40700 || \ - CONFIG_CLANG_VERSION >= 80000) && \ + BUILD_BUG_ON((CONFIG_CLANG_VERSION >= 80000) && \ !__builtin_constant_p(times)); \ \ switch (times) { \ diff --git a/include/linux/bits.h b/include/linux/bits.h index 4671fbf28842..7f475d59a097 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -18,8 +18,7 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#if !defined(__ASSEMBLY__) && \ - (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000) +#if !defined(__ASSEMBLY__) #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 1c74464c80c6..0b1dc61f3955 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -11,7 +11,7 @@ + __GNUC_PATCHLEVEL__) /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ -#if GCC_VERSION < 40800 +#if GCC_VERSION < 40900 # error Sorry, your compiler is too old - please upgrade it. #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index c3bf7710f69a..01dd58c74d80 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -252,32 +252,8 @@ struct ftrace_likely_data { * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving * non-scalar types unchanged. */ -#if (defined(CONFIG_CC_IS_GCC) && CONFIG_GCC_VERSION < 40900) || defined(__CHECKER__) /* - * We build this out of a couple of helper macros in a vain attempt to - * help you keep your lunch down while reading it. - */ -#define __pick_scalar_type(x, type, otherwise) \ - __builtin_choose_expr(__same_type(x, type), (type)0, otherwise) - -/* - * 'char' is not type-compatible with either 'signed char' or 'unsigned char', - * so we include the naked type here as well as the signed/unsigned variants. - */ -#define __pick_integer_type(x, type, otherwise) \ - __pick_scalar_type(x, type, \ - __pick_scalar_type(x, unsigned type, \ - __pick_scalar_type(x, signed type, otherwise))) - -#define __unqual_scalar_typeof(x) typeof( \ - __pick_integer_type(x, char, \ - __pick_integer_type(x, short, \ - __pick_integer_type(x, int, \ - __pick_integer_type(x, long, \ - __pick_integer_type(x, long long, x)))))) -#else -/* - * If supported, prefer C11 _Generic for better compile-times. As above, 'char' + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' * is not type-compatible with 'signed char', and we define a separate case. */ #define __scalar_type_to_expr_cases(type) \ @@ -293,7 +269,6 @@ struct ftrace_likely_data { __scalar_type_to_expr_cases(long), \ __scalar_type_to_expr_cases(long long), \ default: (x))) -#endif /* Is this type a native word size -- useful for atomic operations */ #define __native_word(t) \ diff --git a/mm/migrate.c b/mm/migrate.c index f37729673558..40cd7016ae6f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1160,22 +1160,11 @@ out: return rc; } -/* - * gcc 4.7 and 4.8 on arm get an ICEs when inlining unmap_and_move(). Work - * around it. - */ -#if defined(CONFIG_ARM) && \ - defined(GCC_VERSION) && GCC_VERSION < 40900 && GCC_VERSION >= 40700 -#define ICE_noinline noinline -#else -#define ICE_noinline -#endif - /* * Obtain the lock on page, remove all ptes and migrate the page * to the newly allocated page in newpage. */ -static ICE_noinline int unmap_and_move(new_page_t get_new_page, +static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, unsigned long private, struct page *page, int force, enum migrate_mode mode, diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 4671fbf28842..7f475d59a097 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -18,8 +18,7 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#if !defined(__ASSEMBLY__) && \ - (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000) +#if !defined(__ASSEMBLY__) #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ -- cgit v1.2.3 From 68cf617309b5f6f3a651165f49f20af1494753ae Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 Jul 2020 17:25:46 +0100 Subject: KVM: arm64: Fix definition of PAGE_HYP_DEVICE PAGE_HYP_DEVICE is intended to encode attribute bits for an EL2 stage-1 pte mapping a device. Unfortunately, it includes PROT_DEVICE_nGnRE which encodes attributes for EL1 stage-1 mappings such as UXN and nG, which are RES0 for EL2, and DBM which is meaningless as TCR_EL2.HD is not set. Fix the definition of PAGE_HYP_DEVICE so that it doesn't set RES0 bits at EL2. Acked-by: Marc Zyngier Cc: Marc Zyngier Cc: Catalin Marinas Cc: James Morse Cc: Link: https://lore.kernel.org/r/20200708162546.26176-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 2e7e0f452301..4d867c6446c4 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -67,7 +67,7 @@ extern bool arm64_use_ng_mappings; #define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) #define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) #define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) -#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) +#define PAGE_HYP_DEVICE __pgprot(_PROT_DEFAULT | PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_HYP | PTE_HYP_XN) #define PAGE_S2_MEMATTR(attr) \ ({ \ -- cgit v1.2.3 From b8c1c9fe6a042dfbb169d14ab2000d9163f06d10 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Fri, 17 Apr 2020 18:32:11 +0800 Subject: arm64: entry: Fix the typo in the comment of el1_dbg() The function name should be local_daif_mask(). Signed-off-by: Kevin Hao Acked-by: Mark Rutlamd Link: https://lore.kernel.org/r/20200417103212.45812-2-haokexin@gmail.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 3dbdf9752b11..d3be9dbf5490 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -57,7 +57,7 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) /* * The CPU masked interrupts, and we are leaving them masked during * do_debug_exception(). Update PMR as if we had called - * local_mask_daif(). + * local_daif_mask(). */ if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); -- cgit v1.2.3 From 97884ca8c2925d14c32188e865069f21378b4b4f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 6 Jul 2020 17:37:59 +0100 Subject: arm64: Introduce a way to disable the 32bit vdso We have a class of errata (grouped under the ARM64_WORKAROUND_1418040 banner) that force the trapping of counter access from 32bit EL0. We would normally disable the whole vdso for such defect, except that it would disable it for 64bit userspace as well, which is a shame. Instead, add a new vdso_clock_mode, which signals that the vdso isn't usable for compat tasks. This gets checked in the new vdso_clocksource_ok() helper, now provided for the 32bit vdso. Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200706163802.1836732-2-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/vdso/clocksource.h | 7 +++++-- arch/arm64/include/asm/vdso/compat_gettimeofday.h | 8 +++++++- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/vdso/clocksource.h b/arch/arm64/include/asm/vdso/clocksource.h index df6ea65c1dec..b054d9febfb5 100644 --- a/arch/arm64/include/asm/vdso/clocksource.h +++ b/arch/arm64/include/asm/vdso/clocksource.h @@ -2,7 +2,10 @@ #ifndef __ASM_VDSOCLOCKSOURCE_H #define __ASM_VDSOCLOCKSOURCE_H -#define VDSO_ARCH_CLOCKMODES \ - VDSO_CLOCKMODE_ARCHTIMER +#define VDSO_ARCH_CLOCKMODES \ + /* vdso clocksource for both 32 and 64bit tasks */ \ + VDSO_CLOCKMODE_ARCHTIMER, \ + /* vdso clocksource for 64bit tasks only */ \ + VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT #endif diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index b6907ae78e53..9a625e8947ff 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -111,7 +111,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) * update. Return something. Core will do another round and then * see the mode change and fallback to the syscall. */ - if (clock_mode == VDSO_CLOCKMODE_NONE) + if (clock_mode != VDSO_CLOCKMODE_ARCHTIMER) return 0; /* @@ -152,6 +152,12 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) return ret; } +static inline bool vdso_clocksource_ok(const struct vdso_data *vd) +{ + return vd->clock_mode == VDSO_CLOCKMODE_ARCHTIMER; +} +#define vdso_clocksource_ok vdso_clocksource_ok + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ -- cgit v1.2.3 From c1fbec4ac0d701f350a581941d35643d5a9cd184 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 6 Jul 2020 17:38:00 +0100 Subject: arm64: arch_timer: Allow an workaround descriptor to disable compat vdso As we are about to disable the vdso for compat tasks in some circumstances, let's allow a workaround descriptor to express exactly that. Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200706163802.1836732-3-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_timer.h | 1 + drivers/clocksource/arm_arch_timer.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 7ae54d7d333a..9f0ec21d6327 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -58,6 +58,7 @@ struct arch_timer_erratum_workaround { u64 (*read_cntvct_el0)(void); int (*set_next_event_phys)(unsigned long, struct clock_event_device *); int (*set_next_event_virt)(unsigned long, struct clock_event_device *); + bool disable_compat_vdso; }; DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *, diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index ecf7b7db2d05..a8e4fb429f52 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -566,6 +566,9 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa if (wa->read_cntvct_el0) { clocksource_counter.vdso_clock_mode = VDSO_CLOCKMODE_NONE; vdso_default = VDSO_CLOCKMODE_NONE; + } else if (wa->disable_compat_vdso && vdso_default != VDSO_CLOCKMODE_NONE) { + vdso_default = VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT; + clocksource_counter.vdso_clock_mode = vdso_default; } } -- cgit v1.2.3 From dc802f2bc0208f4abca420705a860c5175db4bee Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 6 Jul 2020 17:38:02 +0100 Subject: arm64: Rework ARM_ERRATUM_1414080 handling The current handling of erratum 1414080 has the side effect that cntkctl_el1 can get changed for both 32 and 64bit tasks. This isn't a problem so far, but if we ever need to mitigate another of these errata on the 64bit side, we'd better keep the messing with cntkctl_el1 local to 32bit tasks. For that, make sure that on entering the kernel from a 32bit tasks, userspace access to cntvct gets enabled, and disabled returning to userspace, while it never gets changed for 64bit tasks. Signed-off-by: Marc Zyngier Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20200706163802.1836732-5-maz@kernel.org [will: removed branch instructions per Mark's review comments] Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5304d193c79d..9757a8d5fd94 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -167,6 +167,17 @@ alternative_cb_end stp x28, x29, [sp, #16 * 14] .if \el == 0 + .if \regsize == 32 + // If we're returning from a 32-bit task on a system affected by + // 1418040 then re-enable userspace access to the virtual counter. +#ifdef CONFIG_ARM64_ERRATUM_1418040 +alternative_if ARM64_WORKAROUND_1418040 + mrs x0, cntkctl_el1 + orr x0, x0, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN + msr cntkctl_el1, x0 +alternative_else_nop_endif +#endif + .endif clear_gp_regs mrs x21, sp_el0 ldr_this_cpu tsk, __entry_task, x20 @@ -320,6 +331,14 @@ alternative_else_nop_endif tst x22, #PSR_MODE32_BIT // native task? b.eq 3f +#ifdef CONFIG_ARM64_ERRATUM_1418040 +alternative_if ARM64_WORKAROUND_1418040 + mrs x0, cntkctl_el1 + bic x0, x0, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN + msr cntkctl_el1, x0 +alternative_else_nop_endif +#endif + #ifdef CONFIG_ARM64_ERRATUM_845719 alternative_if ARM64_WORKAROUND_845719 #ifdef CONFIG_PID_IN_CONTEXTIDR @@ -331,21 +350,6 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: -#ifdef CONFIG_ARM64_ERRATUM_1418040 -alternative_if_not ARM64_WORKAROUND_1418040 - b 4f -alternative_else_nop_endif - /* - * if (x22.mode32 == cntkctl_el1.el0vcten) - * cntkctl_el1.el0vcten = ~cntkctl_el1.el0vcten - */ - mrs x1, cntkctl_el1 - eon x0, x1, x22, lsr #3 - tbz x0, #1, 4f - eor x1, x1, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN - msr cntkctl_el1, x1 -4: -#endif scs_save tsk, x0 /* No kernel C function calls after this as user keys are set. */ -- cgit v1.2.3 From 8c3001b9252d8dbf72289d3590a723eea8cfe824 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 Jul 2020 22:10:01 +0100 Subject: arm64: entry: Tidy up block comments and label numbers Continually butchering our entry code with CPU errata workarounds has led to it looking a little scruffy. Consistently used /* */ comment style for multi-line block comments and ensure that small numeric labels use consecutive integers. No functional change, but the state of things was irritating. Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 9757a8d5fd94..35de8ba60e3d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -126,8 +126,10 @@ alternative_else_nop_endif add \dst, \dst, #(\sym - .entry.tramp.text) .endm - // This macro corrupts x0-x3. It is the caller's duty - // to save/restore them if required. + /* + * This macro corrupts x0-x3. It is the caller's duty to save/restore + * them if required. + */ .macro apply_ssbd, state, tmp1, tmp2 #ifdef CONFIG_ARM64_SSBD alternative_cb arm64_enable_wa2_handling @@ -168,8 +170,10 @@ alternative_cb_end .if \el == 0 .if \regsize == 32 - // If we're returning from a 32-bit task on a system affected by - // 1418040 then re-enable userspace access to the virtual counter. + /* + * If we're returning from a 32-bit task on a system affected by + * 1418040 then re-enable userspace access to the virtual counter. + */ #ifdef CONFIG_ARM64_ERRATUM_1418040 alternative_if ARM64_WORKAROUND_1418040 mrs x0, cntkctl_el1 @@ -183,8 +187,10 @@ alternative_else_nop_endif ldr_this_cpu tsk, __entry_task, x20 msr sp_el0, tsk - // Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions - // when scheduling. + /* + * Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions + * when scheduling. + */ ldr x19, [tsk, #TSK_TI_FLAGS] disable_step_tsk x19, x20 @@ -381,11 +387,11 @@ alternative_else_nop_endif .if \el == 0 alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - bne 5f + bne 4f msr far_el1, x30 tramp_alias x30, tramp_exit_native br x30 -5: +4: tramp_alias x30, tramp_exit_compat br x30 #endif -- cgit v1.2.3 From 8523c006264df65aac7d77284cc69aac46a6f842 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Sun, 10 May 2020 05:41:56 +0800 Subject: arm64: kgdb: Fix single-step exception handling oops After entering kdb due to breakpoint, when we execute 'ss' or 'go' (will delay installing breakpoints, do single-step first), it won't work correctly, and it will enter kdb due to oops. It's because the reason gotten in kdb_stub() is not as expected, and it seems that the ex_vector for single-step should be 0, like what arch powerpc/sh/parisc has implemented. Before the patch: Entering kdb (current=0xffff8000119e2dc0, pid 0) on processor 0 due to Keyboard Entry [0]kdb> bp printk Instruction(i) BP #0 at 0xffff8000101486cc (printk) is enabled addr at ffff8000101486cc, hardtype=0 installed=0 [0]kdb> g / # echo h > /proc/sysrq-trigger Entering kdb (current=0xffff0000fa878040, pid 266) on processor 3 due to Breakpoint @ 0xffff8000101486cc [3]kdb> ss Entering kdb (current=0xffff0000fa878040, pid 266) on processor 3 Oops: (null) due to oops @ 0xffff800010082ab8 CPU: 3 PID: 266 Comm: sh Not tainted 5.7.0-rc4-13839-gf0e5ad491718 #6 Hardware name: linux,dummy-virt (DT) pstate: 00000085 (nzcv daIf -PAN -UAO) pc : el1_irq+0x78/0x180 lr : __handle_sysrq+0x80/0x190 sp : ffff800015003bf0 x29: ffff800015003d20 x28: ffff0000fa878040 x27: 0000000000000000 x26: ffff80001126b1f0 x25: ffff800011b6a0d8 x24: 0000000000000000 x23: 0000000080200005 x22: ffff8000101486cc x21: ffff800015003d30 x20: 0000ffffffffffff x19: ffff8000119f2000 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffff800015003e50 x7 : 0000000000000002 x6 : 00000000380b9990 x5 : ffff8000106e99e8 x4 : ffff0000fadd83c0 x3 : 0000ffffffffffff x2 : ffff800011b6a0d8 x1 : ffff800011b6a000 x0 : ffff80001130c9d8 Call trace: el1_irq+0x78/0x180 printk+0x0/0x84 write_sysrq_trigger+0xb0/0x118 proc_reg_write+0xb4/0xe0 __vfs_write+0x18/0x40 vfs_write+0xb0/0x1b8 ksys_write+0x64/0xf0 __arm64_sys_write+0x14/0x20 el0_svc_common.constprop.2+0xb0/0x168 do_el0_svc+0x20/0x98 el0_sync_handler+0xec/0x1a8 el0_sync+0x140/0x180 [3]kdb> After the patch: Entering kdb (current=0xffff8000119e2dc0, pid 0) on processor 0 due to Keyboard Entry [0]kdb> bp printk Instruction(i) BP #0 at 0xffff8000101486cc (printk) is enabled addr at ffff8000101486cc, hardtype=0 installed=0 [0]kdb> g / # echo h > /proc/sysrq-trigger Entering kdb (current=0xffff0000fa852bc0, pid 268) on processor 0 due to Breakpoint @ 0xffff8000101486cc [0]kdb> g Entering kdb (current=0xffff0000fa852bc0, pid 268) on processor 0 due to Breakpoint @ 0xffff8000101486cc [0]kdb> ss Entering kdb (current=0xffff0000fa852bc0, pid 268) on processor 0 due to SS trap @ 0xffff800010082ab8 [0]kdb> Fixes: 44679a4f142b ("arm64: KGDB: Add step debugging support") Signed-off-by: Wei Li Tested-by: Douglas Anderson Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20200509214159.19680-2-liwei391@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 43119922341f..1a157ca33262 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -252,7 +252,7 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) if (!kgdb_single_step) return DBG_HOOK_ERROR; - kgdb_handle_exception(1, SIGTRAP, 0, regs); + kgdb_handle_exception(0, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_step_brk_fn); -- cgit v1.2.3 From c1aac64ddc01112e137121a43645b96c3633c41b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 4 Jun 2020 11:20:30 +0900 Subject: efi/libstub/arm64: link stub lib.a conditionally Since commit 799c43415442 ("kbuild: thin archives make default for all archs"), core-y is passed to the linker with --whole-archive. Hence, the whole of stub library is linked to vmlinux. Use libs-y so that lib.a is passed after --no-whole-archive for conditional linking. The unused drivers/firmware/efi/libstub/relocate.o will be dropped for ARCH=arm64. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20200604022031.164207-1-masahiroy@kernel.org Signed-off-by: Ard Biesheuvel --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 76359cfb328a..4621fb690d9c 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -143,7 +143,7 @@ export TEXT_OFFSET core-y += arch/arm64/ libs-y := arch/arm64/lib/ $(libs-y) -core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a +libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a # Default target when executing plain make boot := arch/arm64/boot -- cgit v1.2.3 From 09c717c92b52df54918e12cbfe6a4658233fda69 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 8 Jul 2020 22:13:40 -0700 Subject: arm64: Add missing sentinel to erratum_1463225 When the erratum_1463225 array was introduced a sentinel at the end was missing thus causing a KASAN: global-out-of-bounds in is_affected_midr_range_list on arm64 error. Fixes: a9e821b89daa ("arm64: Add KRYO4XX gold CPU cores to erratum list 1463225 and 1418040") Signed-off-by: Florian Fainelli Reviewed-by: Sai Prakash Ranjan Link: https://lore.kernel.org/linux-arm-kernel/CA+G9fYs3EavpU89-rTQfqQ9GgxAMgMAk7jiiVrfP0yxj5s+Q6g@mail.gmail.com/ Link: https://lore.kernel.org/r/20200709051345.14544-1-f.fainelli@gmail.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 8e302dc093d0..79728bfb5351 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -782,6 +782,7 @@ static const struct midr_range erratum_1463225[] = { MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1), /* Kryo4xx Gold (rcpe to rfpf) => (r0p0 to r3p1) */ MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xf), + {}, }; #endif -- cgit v1.2.3 From bce9b042ec73e8662b8119d4ca47e7c78b20d0bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Jul 2020 21:28:05 +0200 Subject: x86/traps: Disable interrupts in exc_aligment_check() exc_alignment_check() fails to disable interrupts before returning to the entry code. Fixes: ca4c6a9858c2 ("x86/traps: Make interrupt enable/disable symmetric in C code") Reported-by: syzbot+0889df9502bc0f112b31@syzkaller.appspotmail.com Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200708192934.076519438@linutronix.de --- arch/x86/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6ed8cc5fbe8f..4f3a509e5547 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -299,6 +299,8 @@ DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check) do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs, error_code, BUS_ADRALN, NULL); + + local_irq_disable(); } #ifdef CONFIG_VMAP_STACK -- cgit v1.2.3 From 006e1ced516d2bfd9db63a32b5dba3c2abf43b04 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Jul 2020 21:28:06 +0200 Subject: x86/entry: Mark check_user_regs() noinstr It's called from the non-instrumentable section. Fixes: c9c26150e61d ("x86/entry: Assert that syscalls are on the right stack") Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200708192934.191497962@linutronix.de --- arch/x86/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index e83b3f14897c..ea7b515e3bc2 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -46,7 +46,7 @@ #include /* Check that the stack and regs on entry from user mode are sane. */ -static void check_user_regs(struct pt_regs *regs) +static noinstr void check_user_regs(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) { /* -- cgit v1.2.3 From bd87e6f6610aa96fde01ee6653e162213f7ec836 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 Jul 2020 21:28:07 +0200 Subject: x86/entry/common: Make prepare_exit_to_usermode() static No users outside this file anymore. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200708192934.301116609@linutronix.de --- arch/x86/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index ea7b515e3bc2..f09288431f28 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -294,7 +294,7 @@ static void __prepare_exit_to_usermode(struct pt_regs *regs) #endif } -__visible noinstr void prepare_exit_to_usermode(struct pt_regs *regs) +static noinstr void prepare_exit_to_usermode(struct pt_regs *regs) { instrumentation_begin(); __prepare_exit_to_usermode(regs); -- cgit v1.2.3 From 528a9539348a0234375dfaa1ca5dbbb2f8f8e8d2 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 7 Jul 2020 15:38:54 +0200 Subject: s390/mm: fix huge pte soft dirty copying If the pmd is soft dirty we must mark the pte as soft dirty (and not dirty). This fixes some cases for guest migration with huge page backings. Cc: # 4.8 Fixes: bc29b7ac1d9f ("s390/mm: clean up pte/pmd encoding") Reviewed-by: Christian Borntraeger Reviewed-by: Gerald Schaefer Signed-off-by: Janosch Frank Signed-off-by: Heiko Carstens --- arch/s390/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 82df06d720e8..3b5a4d25ca9b 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -117,7 +117,7 @@ static inline pte_t __rste_to_pte(unsigned long rste) _PAGE_YOUNG); #ifdef CONFIG_MEM_SOFT_DIRTY pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, - _PAGE_DIRTY); + _PAGE_SOFT_DIRTY); #endif pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); -- cgit v1.2.3 From 5679b28142193a62f6af93249c0477be9f0c669b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 9 Jul 2020 15:59:53 +0300 Subject: arm64/alternatives: don't patch up internal branches Commit f7b93d42945c ("arm64/alternatives: use subsections for replacement sequences") moved the alternatives replacement sequences into subsections, in order to keep the as close as possible to the code that they replace. Unfortunately, this broke the logic in branch_insn_requires_update, which assumed that any branch into kernel executable code was a branch that required updating, which is no longer the case now that the code sequences that are patched in are in the same section as the patch site itself. So the only way to discriminate branches that require updating and ones that don't is to check whether the branch targets the replacement sequence itself, and so we can drop the call to kernel_text_address() entirely. Fixes: f7b93d42945c ("arm64/alternatives: use subsections for replacement sequences") Reported-by: Alexandru Elisei Signed-off-by: Ard Biesheuvel Tested-by: Alexandru Elisei Link: https://lore.kernel.org/r/20200709125953.30918-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/alternative.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index d1757ef1b1e7..73039949b5ce 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -43,20 +43,8 @@ bool alternative_is_applied(u16 cpufeature) */ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) { - unsigned long replptr; - - if (kernel_text_address(pc)) - return true; - - replptr = (unsigned long)ALT_REPL_PTR(alt); - if (pc >= replptr && pc <= (replptr + alt->alt_len)) - return false; - - /* - * Branching into *another* alternate sequence is doomed, and - * we're not even trying to fix it up. - */ - BUG(); + unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt); + return !(pc >= replptr && pc <= (replptr + alt->alt_len)); } #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) -- cgit v1.2.3 From d4e60453266b95b9dc19e0af2a819617e556bc4e Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Thu, 9 Jul 2020 19:11:02 -0300 Subject: Restore gcc check in mips asm/unroll.h While raising the gcc version requirement to 4.9, the compile-time check in the unroll macro was accidentally changed from being used on gcc and clang to being used on clang only. Restore the gcc check, changing it from "gcc >= 4.7" to "all gcc". [ We should probably remove this all entirely: if we remove the check for CLANG, then the check for GCC can go away. Older versions of clang are not really appropriate or supported for kernel builds - Linus ] Fixes: 6ec4476ac825 ("Raise gcc version requirement to 4.9") Signed-off-by: Cesar Eduardo Barros Signed-off-by: Linus Torvalds --- arch/mips/include/asm/unroll.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h index 8ed660adc84f..49009319ac2c 100644 --- a/arch/mips/include/asm/unroll.h +++ b/arch/mips/include/asm/unroll.h @@ -25,7 +25,8 @@ * generate reasonable code for the switch statement, \ * so we skip the sanity check for those compilers. \ */ \ - BUILD_BUG_ON((CONFIG_CLANG_VERSION >= 80000) && \ + BUILD_BUG_ON((CONFIG_CC_IS_GCC || \ + CONFIG_CLANG_VERSION >= 80000) && \ !__builtin_constant_p(times)); \ \ switch (times) { \ -- cgit v1.2.3 From fc0c769ffd926312848912a7c2296e1c503898c3 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:36:59 +0800 Subject: riscv: enable the Kconfig prompt of STRICT_KERNEL_RWX Due to lack of hardware breakpoint support, the kernel option CONFIG_STRICT_KERNEL_RWX should be disabled when using KGDB. However, CONFIG_STRICT_KERNEL_RWX is always enabled now. Therefore, select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT to enable CONFIG_STRICT_KERNEL_RWX by default, and then select ARCH_OPTIONAL_KERNEL_RWX to enable the Kconfig prompt of CONFIG_STRICT_KERNEL_RWX so that users can turn it off. Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 128192e14ff2..3230c1d48562 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -23,6 +23,8 @@ config RISCV select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if MMU + select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if 64BIT -- cgit v1.2.3 From f7fc752815f8e2337548497b3afb4aef791db4ef Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:37:10 +0800 Subject: riscv: Fix "no previous prototype" compile warning in kgdb.c file Some functions are only used in the kgdb.c file. Add static properities to these functions to avoid "no previous prototype" compile warnings Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/kgdb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index c3275f42d1ac..963ed7edcff2 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -44,18 +44,18 @@ DECLARE_INSN(c_beqz, MATCH_C_BEQZ, MASK_C_BEQZ) DECLARE_INSN(c_bnez, MATCH_C_BNEZ, MASK_C_BNEZ) DECLARE_INSN(sret, MATCH_SRET, MASK_SRET) -int decode_register_index(unsigned long opcode, int offset) +static int decode_register_index(unsigned long opcode, int offset) { return (opcode >> offset) & 0x1F; } -int decode_register_index_short(unsigned long opcode, int offset) +static int decode_register_index_short(unsigned long opcode, int offset) { return ((opcode >> offset) & 0x7) + 8; } /* Calculate the new address for after a step */ -int get_step_address(struct pt_regs *regs, unsigned long *next_addr) +static int get_step_address(struct pt_regs *regs, unsigned long *next_addr) { unsigned long pc = regs->epc; unsigned long *regs_ptr = (unsigned long *)regs; @@ -136,7 +136,7 @@ int get_step_address(struct pt_regs *regs, unsigned long *next_addr) return 0; } -int do_single_step(struct pt_regs *regs) +static int do_single_step(struct pt_regs *regs) { /* Determine where the target instruction will send us to */ unsigned long addr = 0; @@ -320,7 +320,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, return err; } -int kgdb_riscv_kgdbbreak(unsigned long addr) +static int kgdb_riscv_kgdbbreak(unsigned long addr) { if (stepped_address == addr) return KGDB_SW_SINGLE_STEP; -- cgit v1.2.3 From def0aa218e6d42231540329e6f5741fdec9e7da4 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:37:25 +0800 Subject: kgdb: Move the extern declaration kgdb_has_hit_break() to generic kgdb.h Currently, only riscv kgdb.c uses the kgdb_has_hit_break() to identify the kgdb breakpoint. It causes other architectures will encounter the "no previous prototype" warnings if the compile option has W=1. Moving the declaration of extern kgdb_has_hit_break() from risc-v kgdb.h to generic kgdb.h to avoid generating these warnings. Signed-off-by: Vincent Chen Acked-by: Daniel Thompson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kgdb.h | 1 - include/linux/kgdb.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index 8177a457caff..f45889bbb965 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -19,7 +19,6 @@ #ifndef __ASSEMBLY__ -extern int kgdb_has_hit_break(unsigned long addr); extern unsigned long kgdb_compiled_break; static inline void arch_kgdb_breakpoint(void) diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 0e4e3a80d58c..477b8b7c908f 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -325,6 +325,7 @@ extern int kgdb_hex2mem(char *buf, char *mem, int count); extern int kgdb_isremovedbreak(unsigned long addr); extern void kgdb_schedule_breakpoint(void); +extern int kgdb_has_hit_break(unsigned long addr); extern int kgdb_handle_exception(int ex_vector, int signo, int err_code, -- cgit v1.2.3 From 70ee5731a40b1f07f151e52c3c4ed27d70d4f9fe Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:37:35 +0800 Subject: riscv: Avoid kgdb.h including gdb_xml.h to solve unused-const-variable warning The constant arrays in gdb_xml.h are only used in arch/riscv/kernel/kgdb.c, but other c files may include the gdb_xml.h indirectly via including the kgdb.h. Hence, It will cause many unused-const-variable warnings. This patch makes the kgdb.h not to include the gdb_xml.h to solve this problem. Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/gdb_xml.h | 3 +-- arch/riscv/include/asm/kgdb.h | 4 +++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/gdb_xml.h b/arch/riscv/include/asm/gdb_xml.h index 041b45f5b997..09342111f227 100644 --- a/arch/riscv/include/asm/gdb_xml.h +++ b/arch/riscv/include/asm/gdb_xml.h @@ -3,8 +3,7 @@ #ifndef __ASM_GDB_XML_H_ #define __ASM_GDB_XML_H_ -#define kgdb_arch_gdb_stub_feature riscv_gdb_stub_feature -static const char riscv_gdb_stub_feature[64] = +const char riscv_gdb_stub_feature[64] = "PacketSize=800;qXfer:features:read+;"; static const char gdb_xfer_read_target[31] = "qXfer:features:read:target.xml:"; diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index f45889bbb965..46677daf708b 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -105,7 +105,9 @@ static inline void arch_kgdb_breakpoint(void) #define DBG_REG_BADADDR_OFF 34 #define DBG_REG_CAUSE_OFF 35 -#include +extern const char riscv_gdb_stub_feature[64]; + +#define kgdb_arch_gdb_stub_feature riscv_gdb_stub_feature #endif #endif -- cgit v1.2.3 From 83d31e5271ac74aad14b5a1a2ed26923e1446329 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Jul 2020 13:12:09 -0400 Subject: KVM: nVMX: fixes for preemption timer migration Commit 850448f35aaf ("KVM: nVMX: Fix VMX preemption timer migration", 2020-06-01) accidentally broke nVMX live migration from older version by changing the userspace ABI. Restore it and, while at it, ensure that vmx->nested.has_preemption_timer_deadline is always initialized according to the KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE flag. Cc: Makarand Sonare Fixes: 850448f35aaf ("KVM: nVMX: Fix VMX preemption timer migration") Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 5 +++-- arch/x86/include/uapi/asm/kvm.h | 5 +++-- arch/x86/kvm/vmx/nested.c | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 426f94582b7a..320788f81a05 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4339,14 +4339,15 @@ Errors: #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 struct kvm_vmx_nested_state_hdr { - __u32 flags; __u64 vmxon_pa; __u64 vmcs12_pa; - __u64 preemption_timer_deadline; struct { __u16 flags; } smm; + + __u32 flags; + __u64 preemption_timer_deadline; }; struct kvm_vmx_nested_state_data { diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 17c5a038f42d..0780f97c1850 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -408,14 +408,15 @@ struct kvm_vmx_nested_state_data { }; struct kvm_vmx_nested_state_hdr { - __u32 flags; __u64 vmxon_pa; __u64 vmcs12_pa; - __u64 preemption_timer_deadline; struct { __u16 flags; } smm; + + __u32 flags; + __u64 preemption_timer_deadline; }; struct kvm_svm_nested_state_data { diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index b26655104d4a..d4a4cec034d0 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6176,6 +6176,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, goto error_guest_mode; } + vmx->nested.has_preemption_timer_deadline = false; if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) { vmx->nested.has_preemption_timer_deadline = true; vmx->nested.preemption_timer_deadline = -- cgit v1.2.3 From 3d9fdc252b52023260de1d12399cb3157ed28c07 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 10 Jul 2020 15:23:17 +0800 Subject: KVM: MIPS: Fix build errors for 32bit kernel Commit dc6d95b153e78ed70b1b2c04a ("KVM: MIPS: Add more MMIO load/store instructions emulation") introduced some 64bit load/store instructions emulation which are unavailable on 32bit platform, and it causes build errors: arch/mips/kvm/emulate.c: In function 'kvm_mips_emulate_store': arch/mips/kvm/emulate.c:1734:6: error: right shift count >= width of type [-Werror] ((vcpu->arch.gprs[rt] >> 56) & 0xff); ^ arch/mips/kvm/emulate.c:1738:6: error: right shift count >= width of type [-Werror] ((vcpu->arch.gprs[rt] >> 48) & 0xffff); ^ arch/mips/kvm/emulate.c:1742:6: error: right shift count >= width of type [-Werror] ((vcpu->arch.gprs[rt] >> 40) & 0xffffff); ^ arch/mips/kvm/emulate.c:1746:6: error: right shift count >= width of type [-Werror] ((vcpu->arch.gprs[rt] >> 32) & 0xffffffff); ^ arch/mips/kvm/emulate.c:1796:6: error: left shift count >= width of type [-Werror] (vcpu->arch.gprs[rt] << 32); ^ arch/mips/kvm/emulate.c:1800:6: error: left shift count >= width of type [-Werror] (vcpu->arch.gprs[rt] << 40); ^ arch/mips/kvm/emulate.c:1804:6: error: left shift count >= width of type [-Werror] (vcpu->arch.gprs[rt] << 48); ^ arch/mips/kvm/emulate.c:1808:6: error: left shift count >= width of type [-Werror] (vcpu->arch.gprs[rt] << 56); ^ cc1: all warnings being treated as errors make[3]: *** [arch/mips/kvm/emulate.o] Error 1 So, use #if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ) to guard the 64bit load/store instructions emulation. Reported-by: kernel test robot Fixes: dc6d95b153e78ed70b1b2c04a ("KVM: MIPS: Add more MMIO load/store instructions emulation") Signed-off-by: Huacai Chen Message-Id: <1594365797-536-1-git-send-email-chenhc@lemote.com> Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 5ae82d925197..d242300cacc0 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1722,6 +1722,7 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, vcpu->arch.gprs[rt], *(u32 *)data); break; +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ) case sdl_op: run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( vcpu->arch.host_cp0_badvaddr) & (~0x7); @@ -1815,6 +1816,7 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, vcpu->arch.gprs[rt], *(u64 *)data); break; +#endif #ifdef CONFIG_CPU_LOONGSON64 case sdc2_op: @@ -2002,6 +2004,7 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, } break; +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ) case ldl_op: run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( vcpu->arch.host_cp0_badvaddr) & (~0x7); @@ -2073,6 +2076,7 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, break; } break; +#endif #ifdef CONFIG_CPU_LOONGSON64 case ldc2_op: -- cgit v1.2.3 From 9321f1aaf63e74ec3884347490e4ebb039f01b6e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 10 Jul 2020 15:34:41 -0700 Subject: mips: Remove compiler check in unroll macro CONFIG_CC_IS_GCC is undefined when Clang is used, which breaks the build (see our Travis link below). Clang 8 was chosen as a minimum version for this check because there were some improvements around __builtin_constant_p in that release. In reality, MIPS was not even buildable until clang 9 so that check was not technically necessary. Just remove all compiler checks and just assume that we have a working compiler. Fixes: d4e60453266b ("Restore gcc check in mips asm/unroll.h") Link: https://travis-ci.com/github/ClangBuiltLinux/continuous-integration/jobs/359642821 Signed-off-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- arch/mips/include/asm/unroll.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h index 49009319ac2c..7dd4a80e05d6 100644 --- a/arch/mips/include/asm/unroll.h +++ b/arch/mips/include/asm/unroll.h @@ -25,9 +25,7 @@ * generate reasonable code for the switch statement, \ * so we skip the sanity check for those compilers. \ */ \ - BUILD_BUG_ON((CONFIG_CC_IS_GCC || \ - CONFIG_CLANG_VERSION >= 80000) && \ - !__builtin_constant_p(times)); \ + BUILD_BUG_ON(!__builtin_constant_p(times)); \ \ switch (times) { \ case 32: fn(__VA_ARGS__); /* fall through */ \ -- cgit v1.2.3 From 4237c625304b212a3f30adf787901082082511ec Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Tue, 23 Jun 2020 12:06:54 -0700 Subject: ARM: dts: imx6qdl-gw551x: fix audio SSI The audio codec on the GW551x routes to ssi1. It fixes audio capture on the device. Cc: stable@vger.kernel.org Fixes: 3117e851cef1 ("ARM: dts: imx: Add TDA19971 HDMI Receiver to GW551x") Signed-off-by: Tim Harvey Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-gw551x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6qdl-gw551x.dtsi b/arch/arm/boot/dts/imx6qdl-gw551x.dtsi index c38e86eedcc0..8c33510c9519 100644 --- a/arch/arm/boot/dts/imx6qdl-gw551x.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw551x.dtsi @@ -110,7 +110,7 @@ simple-audio-card,frame-master = <&sound_codec>; sound_cpu: simple-audio-card,cpu { - sound-dai = <&ssi2>; + sound-dai = <&ssi1>; }; sound_codec: simple-audio-card,codec { -- cgit v1.2.3 From 2a83544007aba792167615c393e6154824f3a175 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 30 Jun 2020 20:26:51 -0700 Subject: ARM: imx: Provide correct number of resources when registering gpio devices Since commit a85a6c86c25be ("driver core: platform: Clarify that IRQ 0 is invalid"), the kernel is a bit touchy when it encounters interrupt 0. As a result, there are lots of warnings such as the following when booting systems such as 'kzm'. WARNING: CPU: 0 PID: 1 at drivers/base/platform.c:224 platform_get_irq_optional+0x118/0x128 0 is an invalid IRQ number Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.8.0-rc3 #1 Hardware name: Kyoto Microcomputer Co., Ltd. KZM-ARM11-01 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xe8/0x120) [] (dump_stack) from [] (__warn+0xe4/0x108) [] (__warn) from [] (warn_slowpath_fmt+0x74/0xbc) [] (warn_slowpath_fmt) from [] (platform_get_irq_optional+0x118/0x128) [] (platform_get_irq_optional) from [] (platform_irq_count+0x20/0x3c) [] (platform_irq_count) from [] (mxc_gpio_probe+0x8c/0x494) [] (mxc_gpio_probe) from [] (platform_drv_probe+0x48/0x98) [] (platform_drv_probe) from [] (really_probe+0x214/0x344) [] (really_probe) from [] (driver_probe_device+0x58/0xb4) [] (driver_probe_device) from [] (device_driver_attach+0x58/0x60) [] (device_driver_attach) from [] (__driver_attach+0x84/0xc0) [] (__driver_attach) from [] (bus_for_each_dev+0x78/0xb8) [] (bus_for_each_dev) from [] (bus_add_driver+0x154/0x1e0) [] (bus_add_driver) from [] (driver_register+0x74/0x108) [] (driver_register) from [] (do_one_initcall+0x80/0x3b4) [] (do_one_initcall) from [] (kernel_init_freeable+0x170/0x208) [] (kernel_init_freeable) from [] (kernel_init+0x8/0x11c) [] (kernel_init) from [] (ret_from_fork+0x14/0x20) As it turns out, mxc_register_gpio() is a bit lax when setting the number of resources: it registers a resource with interrupt 0 when in reality there is no such interrupt. Fix the problem by not declaring the second interrupt resource if there is no second interrupt. Fixes: a85a6c86c25be ("driver core: platform: Clarify that IRQ 0 is invalid") Cc: Bjorn Helgaas Signed-off-by: Guenter Roeck Signed-off-by: Shawn Guo --- arch/arm/mach-imx/devices/platform-gpio-mxc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/devices/platform-gpio-mxc.c b/arch/arm/mach-imx/devices/platform-gpio-mxc.c index 78628ef12672..355de845224c 100644 --- a/arch/arm/mach-imx/devices/platform-gpio-mxc.c +++ b/arch/arm/mach-imx/devices/platform-gpio-mxc.c @@ -24,7 +24,8 @@ struct platform_device *__init mxc_register_gpio(char *name, int id, .flags = IORESOURCE_IRQ, }, }; + unsigned int nres; - return platform_device_register_resndata(&mxc_aips_bus, - name, id, res, ARRAY_SIZE(res), NULL, 0); + nres = irq_high ? ARRAY_SIZE(res) : ARRAY_SIZE(res) - 1; + return platform_device_register_resndata(&mxc_aips_bus, name, id, res, nres, NULL, 0); } -- cgit v1.2.3 From f8951dce10c092075e39ef12c29022548e4c63db Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 1 Jul 2020 17:10:39 -0500 Subject: ARM: imx: Remove imx_add_imx_dma() unused irq_err argument No callers of imx_add_imx_dma() need an error IRQ, so they supply 0 as "irq_err", which means we register a resource of IRQ 0, which is invalid and causes a warning if used. Remove the "irq_err" argument altogether so there's no chance of trying to use the invalid IRQ 0. Fixes: a85a6c86c25be ("driver core: platform: Clarify that IRQ 0 is invalid") Signed-off-by: Bjorn Helgaas Cc: Russell King Cc: Shawn Guo Cc: Sascha Hauer Cc: kernel@pengutronix.de Cc: Fabio Estevam Cc: linux-imx@nxp.com Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Shawn Guo --- arch/arm/mach-imx/devices/devices-common.h | 2 +- arch/arm/mach-imx/devices/platform-imx-dma.c | 6 +----- arch/arm/mach-imx/mm-imx21.c | 3 +-- arch/arm/mach-imx/mm-imx27.c | 3 +-- 4 files changed, 4 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h index 2a685adec1df..ae84c08e11fa 100644 --- a/arch/arm/mach-imx/devices/devices-common.h +++ b/arch/arm/mach-imx/devices/devices-common.h @@ -289,6 +289,6 @@ struct platform_device *__init imx_add_spi_imx( const struct spi_imx_master *pdata); struct platform_device *imx_add_imx_dma(char *name, resource_size_t iobase, - int irq, int irq_err); + int irq); struct platform_device *imx_add_imx_sdma(char *name, resource_size_t iobase, int irq, struct sdma_platform_data *pdata); diff --git a/arch/arm/mach-imx/devices/platform-imx-dma.c b/arch/arm/mach-imx/devices/platform-imx-dma.c index 26b47b36257b..12656f24ad0d 100644 --- a/arch/arm/mach-imx/devices/platform-imx-dma.c +++ b/arch/arm/mach-imx/devices/platform-imx-dma.c @@ -6,7 +6,7 @@ #include "devices-common.h" struct platform_device __init __maybe_unused *imx_add_imx_dma(char *name, - resource_size_t iobase, int irq, int irq_err) + resource_size_t iobase, int irq) { struct resource res[] = { { @@ -17,10 +17,6 @@ struct platform_device __init __maybe_unused *imx_add_imx_dma(char *name, .start = irq, .end = irq, .flags = IORESOURCE_IRQ, - }, { - .start = irq_err, - .end = irq_err, - .flags = IORESOURCE_IRQ, }, }; diff --git a/arch/arm/mach-imx/mm-imx21.c b/arch/arm/mach-imx/mm-imx21.c index 50a2edac8513..b834026e4615 100644 --- a/arch/arm/mach-imx/mm-imx21.c +++ b/arch/arm/mach-imx/mm-imx21.c @@ -78,8 +78,7 @@ void __init imx21_soc_init(void) mxc_register_gpio("imx21-gpio", 5, MX21_GPIO6_BASE_ADDR, SZ_256, MX21_INT_GPIO, 0); pinctrl_provide_dummies(); - imx_add_imx_dma("imx21-dma", MX21_DMA_BASE_ADDR, - MX21_INT_DMACH0, 0); /* No ERR irq */ + imx_add_imx_dma("imx21-dma", MX21_DMA_BASE_ADDR, MX21_INT_DMACH0); platform_device_register_simple("imx21-audmux", 0, imx21_audmux_res, ARRAY_SIZE(imx21_audmux_res)); } diff --git a/arch/arm/mach-imx/mm-imx27.c b/arch/arm/mach-imx/mm-imx27.c index 4e4125140025..2717614f101d 100644 --- a/arch/arm/mach-imx/mm-imx27.c +++ b/arch/arm/mach-imx/mm-imx27.c @@ -79,8 +79,7 @@ void __init imx27_soc_init(void) mxc_register_gpio("imx21-gpio", 5, MX27_GPIO6_BASE_ADDR, SZ_256, MX27_INT_GPIO, 0); pinctrl_provide_dummies(); - imx_add_imx_dma("imx27-dma", MX27_DMA_BASE_ADDR, - MX27_INT_DMACH0, 0); /* No ERR irq */ + imx_add_imx_dma("imx27-dma", MX27_DMA_BASE_ADDR, MX27_INT_DMACH0); /* imx27 has the imx21 type audmux */ platform_device_register_simple("imx21-audmux", 0, imx27_audmux_res, ARRAY_SIZE(imx27_audmux_res)); -- cgit v1.2.3 From 192b6a780598976feb7321ff007754f8511a4129 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 12 Jul 2020 18:50:47 +0530 Subject: powerpc/book3s64/pkeys: Fix pkey_access_permitted() for execute disable pkey Even if the IAMR value denies execute access, the current code returns true from pkey_access_permitted() for an execute permission check, if the AMR read pkey bit is cleared. This results in repeated page fault loop with a test like below: #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #ifdef SYS_pkey_mprotect #undef SYS_pkey_mprotect #endif #ifdef SYS_pkey_alloc #undef SYS_pkey_alloc #endif #ifdef SYS_pkey_free #undef SYS_pkey_free #endif #undef PKEY_DISABLE_EXECUTE #define PKEY_DISABLE_EXECUTE 0x4 #define SYS_pkey_mprotect 386 #define SYS_pkey_alloc 384 #define SYS_pkey_free 385 #define PPC_INST_NOP 0x60000000 #define PPC_INST_BLR 0x4e800020 #define PROT_RWX (PROT_READ | PROT_WRITE | PROT_EXEC) static int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey) { return syscall(SYS_pkey_mprotect, addr, len, prot, pkey); } static int sys_pkey_alloc(unsigned long flags, unsigned long access_rights) { return syscall(SYS_pkey_alloc, flags, access_rights); } static int sys_pkey_free(int pkey) { return syscall(SYS_pkey_free, pkey); } static void do_execute(void *region) { /* jump to region */ asm volatile( "mtctr %0;" "bctrl" : : "r"(region) : "ctr", "lr"); } static void do_protect(void *region) { size_t pgsize; int i, pkey; pgsize = getpagesize(); pkey = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE); assert (pkey > 0); /* perform mprotect */ assert(!sys_pkey_mprotect(region, pgsize, PROT_RWX, pkey)); do_execute(region); /* free pkey */ assert(!sys_pkey_free(pkey)); } int main(int argc, char **argv) { size_t pgsize, numinsns; unsigned int *region; int i; /* allocate memory region to protect */ pgsize = getpagesize(); region = memalign(pgsize, pgsize); assert(region != NULL); assert(!mprotect(region, pgsize, PROT_RWX)); /* fill page with NOPs with a BLR at the end */ numinsns = pgsize / sizeof(region[0]); for (i = 0; i < numinsns - 1; i++) region[i] = PPC_INST_NOP; region[i] = PPC_INST_BLR; do_protect(region); return EXIT_SUCCESS; } The fix is to only check the IAMR for an execute check, the AMR value is not relevant. Fixes: f2407ef3ba22 ("powerpc: helper to validate key-access permissions of a pte") Cc: stable@vger.kernel.org # v4.16+ Reported-by: Sandipan Das Signed-off-by: Aneesh Kumar K.V [mpe: Add detail to change log, tweak wording & formatting] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200712132047.1038594-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/pkeys.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index ca5fcb4bff32..d174106bab67 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -354,12 +354,14 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) u64 amr; pkey_shift = pkeyshift(pkey); - if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift))) - return true; + if (execute) + return !(read_iamr() & (IAMR_EX_BIT << pkey_shift)); + + amr = read_amr(); + if (write) + return !(amr & (AMR_WR_BIT << pkey_shift)); - amr = read_amr(); /* Delay reading amr until absolutely needed */ - return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) || - (write && !(amr & (AMR_WR_BIT << pkey_shift)))); + return !(amr & (AMR_RD_BIT << pkey_shift)); } bool arch_pte_access_permitted(u64 pte, bool write, bool execute) -- cgit v1.2.3 From fecc5cfcd55f7aacc483726c405a951bd6aaaf2e Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 14 May 2020 23:10:38 +0100 Subject: arm64: defconfig: Enable CONFIG_PCIE_RCAR_HOST config option PCIE_RCAR internally selects PCIE_RCAR_HOST which builds the same driver. So this patch renames CONFIG_PCIE_RCAR to CONFIG_PCIE_RCAR_HOST so that PCIE_RCAR can be safely dropped from Kconfig file. Signed-off-by: Lad Prabhakar Reviewed-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/1589494238-2933-1-git-send-email-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- arch/arm64/configs/defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 883e8bace3ed..2ca7ba69c318 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -194,7 +194,7 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_ACPI=y CONFIG_PCI_AARDVARK=y CONFIG_PCI_TEGRA=y -CONFIG_PCIE_RCAR=y +CONFIG_PCIE_RCAR_HOST=y CONFIG_PCI_HOST_GENERIC=y CONFIG_PCI_XGENE=y CONFIG_PCIE_ALTERA=y -- cgit v1.2.3 From b710d27bf72068b15b2f0305d825988183e2ff28 Mon Sep 17 00:00:00 2001 From: Satheesh Rajendran Date: Fri, 19 Jun 2020 12:31:13 +0530 Subject: powerpc/pseries/svm: Fix incorrect check for shared_lppaca_size Early secure guest boot hits the below crash while booting with vcpus numbers aligned with page boundary for PAGE size of 64k and LPPACA size of 1k i.e 64, 128 etc. Partition configured for 64 cpus. CPU maps initialized for 1 thread per core ------------[ cut here ]------------ kernel BUG at arch/powerpc/kernel/paca.c:89! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries This is due to the BUG_ON() for shared_lppaca_total_size equal to shared_lppaca_size. Instead the code should only BUG_ON() if we have exceeded the total_size, which indicates we've overflowed the array. Fixes: bd104e6db6f0 ("powerpc/pseries/svm: Use shared memory for LPPACA structures") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Satheesh Rajendran Reviewed-by: Laurent Dufour Reviewed-by: Thiago Jung Bauermann [mpe: Reword change log to clarify we're fixing not removing the check] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200619070113.16696-1-sathnaga@linux.vnet.ibm.com --- arch/powerpc/kernel/paca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 2168372b792d..74da65aacbc9 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -87,7 +87,7 @@ static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align, * This is very early in boot, so no harm done if the kernel crashes at * this point. */ - BUG_ON(shared_lppaca_size >= shared_lppaca_total_size); + BUG_ON(shared_lppaca_size > shared_lppaca_total_size); return ptr; } -- cgit v1.2.3 From a18fb07623813aaee1a69873c785865695a32c9e Mon Sep 17 00:00:00 2001 From: Chen Tao Date: Fri, 19 Jun 2020 18:42:40 +0800 Subject: ARM: OMAP2+: Fix possible memory leak in omap_hwmod_allocate_module Fix memory leak in omap_hwmod_allocate_module not freeing in handling error path. Fixes: 8c87970543b17("ARM: OMAP2+: Add functions to allocate module data from device tree") Signed-off-by: Chen Tao Reviewed-by: Paul Walmsley [tony@atomide.com: fix call iounmap for missing regs] Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index c630457bb228..15b29a179c8a 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -3435,7 +3435,7 @@ static int omap_hwmod_allocate_module(struct device *dev, struct omap_hwmod *oh, regs = ioremap(data->module_pa, data->module_size); if (!regs) - return -ENOMEM; + goto out_free_sysc; } /* @@ -3445,13 +3445,13 @@ static int omap_hwmod_allocate_module(struct device *dev, struct omap_hwmod *oh, if (oh->class->name && strcmp(oh->class->name, data->name)) { class = kmemdup(oh->class, sizeof(*oh->class), GFP_KERNEL); if (!class) - return -ENOMEM; + goto out_unmap; } if (list_empty(&oh->slave_ports)) { oi = kcalloc(1, sizeof(*oi), GFP_KERNEL); if (!oi) - return -ENOMEM; + goto out_free_class; /* * Note that we assume interconnect interface clocks will be @@ -3478,6 +3478,14 @@ static int omap_hwmod_allocate_module(struct device *dev, struct omap_hwmod *oh, spin_unlock_irqrestore(&oh->_lock, flags); return 0; + +out_free_class: + kfree(class); +out_unmap: + iounmap(regs); +out_free_sysc: + kfree(sysc); + return -ENOMEM; } static const struct omap_hwmod_reset omap24xx_reset_quirks[] = { -- cgit v1.2.3 From 2a4117df9b436a0e4c79d211284ab2097bcd00dc Mon Sep 17 00:00:00 2001 From: dillon min Date: Tue, 14 Jul 2020 14:35:07 +0800 Subject: ARM: dts: Fix dcan driver probe failed on am437x platform Got following d_can probe errors with kernel 5.8-rc1 on am437x [ 10.730822] CAN device driver interface Starting Wait for Network to be Configured... [ OK ] Reached target Network. [ 10.787363] c_can_platform 481cc000.can: probe failed [ 10.792484] c_can_platform: probe of 481cc000.can failed with error -2 [ 10.799457] c_can_platform 481d0000.can: probe failed [ 10.804617] c_can_platform: probe of 481d0000.can failed with error -2 actually, Tony has fixed this issue on am335x with the patch [3] Since am437x has the same clock structure with am335x [1][2], so reuse the code from Tony Lindgren's patch [3] to fix it. [1]: https://www.ti.com/lit/pdf/spruh73 Chapter-23, Figure 23-1. DCAN Integration [2]: https://www.ti.com/lit/pdf/spruhl7 Chapter-25, Figure 25-1. DCAN Integration [3]: commit 516f1117d0fb ("ARM: dts: Configure osc clock for d_can on am335x") Fixes: 1a5cd7c23cc5 ("bus: ti-sysc: Enable all clocks directly during init to read revision") Signed-off-by: dillon min [tony@atomide.com: aligned commit message a bit for readability] Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am437x-l4.dtsi | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index 0d0f9fe4a882..4129affde54e 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -1541,8 +1541,9 @@ reg = <0xcc020 0x4>; reg-names = "rev"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ - clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN0_CLKCTRL 0>; - clock-names = "fck"; + clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN0_CLKCTRL 0>, + <&dcan0_fck>; + clock-names = "fck", "osc"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0xcc000 0x2000>; @@ -1550,6 +1551,8 @@ dcan0: can@0 { compatible = "ti,am4372-d_can", "ti,am3352-d_can"; reg = <0x0 0x2000>; + clocks = <&dcan0_fck>; + clock-names = "fck"; syscon-raminit = <&scm_conf 0x644 0>; interrupts = ; status = "disabled"; @@ -1561,8 +1564,9 @@ reg = <0xd0020 0x4>; reg-names = "rev"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ - clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN1_CLKCTRL 0>; - clock-names = "fck"; + clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN1_CLKCTRL 0>, + <&dcan1_fck>; + clock-names = "fck", "osc"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0xd0000 0x2000>; @@ -1570,6 +1574,8 @@ dcan1: can@0 { compatible = "ti,am4372-d_can", "ti,am3352-d_can"; reg = <0x0 0x2000>; + clocks = <&dcan1_fck>; + clock-name = "fck"; syscon-raminit = <&scm_conf 0x644 1>; interrupts = ; status = "disabled"; -- cgit v1.2.3 From e3beca48a45b5e0e6e6a4e0124276b8248dcc9bb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Jul 2020 11:53:06 +0200 Subject: irqdomain/treewide: Keep firmware node unconditionally allocated Quite some non OF/ACPI users of irqdomains allocate firmware nodes of type IRQCHIP_FWNODE_NAMED or IRQCHIP_FWNODE_NAMED_ID and free them right after creating the irqdomain. The only purpose of these FW nodes is to convey name information. When this was introduced the core code did not store the pointer to the node in the irqdomain. A recent change stored the firmware node pointer in irqdomain for other reasons and missed to notice that the usage sites which do the alloc_fwnode/create_domain/free_fwnode sequence are broken by this. Storing a dangling pointer is dangerous itself, but in case that the domain is destroyed later on this leads to a double free. Remove the freeing of the firmware node after creating the irqdomain from all affected call sites to cure this. Fixes: 711419e504eb ("irqdomain: Add the missing assignment of domain->fwnode for named fwnode") Reported-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Acked-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/873661qakd.fsf@nanos.tec.linutronix.de --- arch/mips/pci/pci-xtalk-bridge.c | 5 +++-- arch/x86/kernel/apic/io_apic.c | 10 +++++----- arch/x86/kernel/apic/msi.c | 18 ++++++++++++------ arch/x86/kernel/apic/vector.c | 1 - arch/x86/platform/uv/uv_irq.c | 3 ++- drivers/iommu/amd/iommu.c | 5 +++-- drivers/iommu/hyperv-iommu.c | 5 ++++- drivers/iommu/intel/irq_remapping.c | 2 +- drivers/mfd/ioc3.c | 5 +++-- drivers/pci/controller/vmd.c | 5 +++-- 10 files changed, 36 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index 3b2552fb7735..5958217861b8 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -627,9 +627,10 @@ static int bridge_probe(struct platform_device *pdev) return -ENOMEM; domain = irq_domain_create_hierarchy(parent, 0, 8, fn, &bridge_domain_ops, NULL); - irq_domain_free_fwnode(fn); - if (!domain) + if (!domain) { + irq_domain_free_fwnode(fn); return -ENOMEM; + } pci_set_flags(PCI_PROBE_ONLY); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ce61e3e7d399..81ffcfbfaef2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2316,12 +2316,12 @@ static int mp_irqdomain_create(int ioapic) ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops, (void *)(long)ioapic); - /* Release fw handle if it was allocated above */ - if (!cfg->dev) - irq_domain_free_fwnode(fn); - - if (!ip->irqdomain) + if (!ip->irqdomain) { + /* Release fw handle if it was allocated above */ + if (!cfg->dev) + irq_domain_free_fwnode(fn); return -ENOMEM; + } ip->irqdomain->parent = parent; diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 5cbaca58af95..c2b2911feeef 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -263,12 +263,13 @@ void __init arch_init_msi_domain(struct irq_domain *parent) msi_default_domain = pci_msi_create_irq_domain(fn, &pci_msi_domain_info, parent); - irq_domain_free_fwnode(fn); } - if (!msi_default_domain) + if (!msi_default_domain) { + irq_domain_free_fwnode(fn); pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); - else + } else { msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK; + } } #ifdef CONFIG_IRQ_REMAP @@ -301,7 +302,8 @@ struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent, if (!fn) return NULL; d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent); - irq_domain_free_fwnode(fn); + if (!d) + irq_domain_free_fwnode(fn); return d; } #endif @@ -364,7 +366,8 @@ static struct irq_domain *dmar_get_irq_domain(void) if (fn) { dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info, x86_vector_domain); - irq_domain_free_fwnode(fn); + if (!dmar_domain) + irq_domain_free_fwnode(fn); } out: mutex_unlock(&dmar_lock); @@ -489,7 +492,10 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id) } d = msi_create_irq_domain(fn, domain_info, parent); - irq_domain_free_fwnode(fn); + if (!d) { + irq_domain_free_fwnode(fn); + kfree(domain_info); + } return d; } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index c48be6e1f676..cc8b16f89dd4 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -709,7 +709,6 @@ int __init arch_early_irq_init(void) x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops, NULL); BUG_ON(x86_vector_domain == NULL); - irq_domain_free_fwnode(fn); irq_set_default_host(x86_vector_domain); arch_init_msi_domain(x86_vector_domain); diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index fc13cbbb2dce..abb6075397f0 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -167,9 +167,10 @@ static struct irq_domain *uv_get_irq_domain(void) goto out; uv_domain = irq_domain_create_tree(fn, &uv_domain_ops, NULL); - irq_domain_free_fwnode(fn); if (uv_domain) uv_domain->parent = x86_vector_domain; + else + irq_domain_free_fwnode(fn); out: mutex_unlock(&uv_lock); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 74cca1757172..2f22326ee4df 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3985,9 +3985,10 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu) if (!fn) return -ENOMEM; iommu->ir_domain = irq_domain_create_tree(fn, &amd_ir_domain_ops, iommu); - irq_domain_free_fwnode(fn); - if (!iommu->ir_domain) + if (!iommu->ir_domain) { + irq_domain_free_fwnode(fn); return -ENOMEM; + } iommu->ir_domain->parent = arch_get_ir_parent_domain(); iommu->msi_domain = arch_create_remap_msi_irq_domain(iommu->ir_domain, diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index 3c0c67a99c7b..8919c1c70b68 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -155,7 +155,10 @@ static int __init hyperv_prepare_irq_remapping(void) 0, IOAPIC_REMAPPING_ENTRY, fn, &hyperv_ir_domain_ops, NULL); - irq_domain_free_fwnode(fn); + if (!ioapic_ir_domain) { + irq_domain_free_fwnode(fn); + return -ENOMEM; + } /* * Hyper-V doesn't provide irq remapping function for diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 7f8769800815..9564d23d094f 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -563,8 +563,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) 0, INTR_REMAP_TABLE_ENTRIES, fn, &intel_ir_domain_ops, iommu); - irq_domain_free_fwnode(fn); if (!iommu->ir_domain) { + irq_domain_free_fwnode(fn); pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id); goto out_free_bitmap; } diff --git a/drivers/mfd/ioc3.c b/drivers/mfd/ioc3.c index 02998d4eb74b..74cee7cb0afc 100644 --- a/drivers/mfd/ioc3.c +++ b/drivers/mfd/ioc3.c @@ -142,10 +142,11 @@ static int ioc3_irq_domain_setup(struct ioc3_priv_data *ipd, int irq) goto err; domain = irq_domain_create_linear(fn, 24, &ioc3_irq_domain_ops, ipd); - if (!domain) + if (!domain) { + irq_domain_free_fwnode(fn); goto err; + } - irq_domain_free_fwnode(fn); ipd->domain = domain; irq_set_chained_handler_and_data(irq, ioc3_irq_handler, domain); diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index e386d4eac407..9a64cf90c291 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -546,9 +546,10 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) vmd->irq_domain = pci_msi_create_irq_domain(fn, &vmd_msi_domain_info, x86_vector_domain); - irq_domain_free_fwnode(fn); - if (!vmd->irq_domain) + if (!vmd->irq_domain) { + irq_domain_free_fwnode(fn); return -ENODEV; + } pci_add_resource(&resources, &vmd->resources[0]); pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]); -- cgit v1.2.3 From 0cac21b02ba5f3095fd2dcc77c26a25a0b2432ed Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 6 Jul 2020 14:32:26 +0200 Subject: riscv: use 16KB kernel stack on 64-bit With the current 8KB stack size there are frequent overflows in a 64-bit configuration. We may split IRQ stacks off in the future, but this fixes a number of issues right now. Signed-off-by: Andreas Schwab Reviewed-by: Anup Patel [Palmer: mention irqstack in the commit text] Fixes: 7db91e57a0ac ("RISC-V: Task implementation") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/thread_info.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 1dd12a0cbb2b..464a2bbc97ea 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -12,7 +12,11 @@ #include /* thread information allocation */ +#ifdef CONFIG_64BIT +#define THREAD_SIZE_ORDER (2) +#else #define THREAD_SIZE_ORDER (1) +#endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 6068e1a4427e88f5cc62f238d1baf94a8b824ef4 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Fri, 10 Jul 2020 16:47:19 -0700 Subject: powerpc/vas: Report proper error code for address translation failure P9 DD2 NX workbook (Table 4-36) says DMA controller uses CC=5 internally for translation fault handling. NX reserves CC=250 for OS to notify user space when NX encounters address translation failure on the request buffer. Not an issue in earlier releases as NX does not get faults on kernel addresses. This patch defines CSB_CC_FAULT_ADDRESS(250) and updates CSB.CC with this proper error code for user space. Fixes: c96c4436aba4 ("powerpc/vas: Update CSB and notify process for fault CRBs") Signed-off-by: Haren Myneni [mpe: Added Fixes tag and fix typo in comment] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/019fd53e7538c6f8f332d175df74b1815ef5aa8c.camel@linux.ibm.com --- Documentation/powerpc/vas-api.rst | 2 +- arch/powerpc/include/asm/icswx.h | 2 ++ arch/powerpc/platforms/powernv/vas-fault.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/Documentation/powerpc/vas-api.rst b/Documentation/powerpc/vas-api.rst index 1217c2f1595e..788dc8375a0e 100644 --- a/Documentation/powerpc/vas-api.rst +++ b/Documentation/powerpc/vas-api.rst @@ -213,7 +213,7 @@ request buffers are not in memory. The operating system handles the fault by updating CSB with the following data: csb.flags = CSB_V; - csb.cc = CSB_CC_TRANSLATION; + csb.cc = CSB_CC_FAULT_ADDRESS; csb.ce = CSB_CE_TERMINATION; csb.address = fault_address; diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h index 965b1f39b2a5..b0c70a35fd0e 100644 --- a/arch/powerpc/include/asm/icswx.h +++ b/arch/powerpc/include/asm/icswx.h @@ -77,6 +77,8 @@ struct coprocessor_completion_block { #define CSB_CC_CHAIN (37) #define CSB_CC_SEQUENCE (38) #define CSB_CC_HW (39) +/* P9 DD2 NX Workbook 3.2 (Table 4-36): Address translation fault */ +#define CSB_CC_FAULT_ADDRESS (250) #define CSB_SIZE (0x10) #define CSB_ALIGN CSB_SIZE diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 266a6ca5e15e..3d21fce254b7 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -79,7 +79,7 @@ static void update_csb(struct vas_window *window, csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); memset(&csb, 0, sizeof(csb)); - csb.cc = CSB_CC_TRANSLATION; + csb.cc = CSB_CC_FAULT_ADDRESS; csb.ce = CSB_CE_TERMINATION; csb.cs = 0; csb.count = 0; -- cgit v1.2.3 From 60176e6be0bb6f629b15aea7dcab5a95ecb158e9 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 29 Jun 2020 11:19:21 -0500 Subject: arm64: dts: agilex: add status to qspi dts node Add status = "okay" to QSPI node. Fixes: c4c8757b2d895 ("arm64: dts: agilex: add QSPI support for Intel Agilex") Cc: linux-stable # >= v5.5 Signed-off-by: Dinh Nguyen --- arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts index 51d948323bfd..92f478def723 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts +++ b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts @@ -98,6 +98,7 @@ }; &qspi { + status = "okay"; flash@0 { #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3 From 263a0269a59c0b4145829462a107fe7f7327105f Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 29 Jun 2020 11:25:43 -0500 Subject: arm64: dts: stratix10: add status to qspi dts node Add status = "okay" to QSPI node. Fixes: 0cb140d07fc75 ("arm64: dts: stratix10: Add QSPI support for Stratix10") Cc: linux-stable # >= v5.6 Signed-off-by: Dinh Nguyen --- arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts | 1 + arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts index f6c4a15079d3..feadd21bc0dc 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts @@ -155,6 +155,7 @@ }; &qspi { + status = "okay"; flash@0 { #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts index 9946515b8afd..4000c393243d 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts @@ -188,6 +188,7 @@ }; &qspi { + status = "okay"; flash@0 { #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3 From 3bf9b8ffc8980c1090bdd3a5570cf42420620838 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 29 Jun 2020 12:47:35 -0500 Subject: arm64: dts: stratix10: increase QSPI reg address in nand dts file Match the QSPI reg address in the socfpga_stratix10_socdk.dts file. Fixes: 80f132d73709 ("arm64: dts: increase the QSPI reg address for Stratix10 and Agilex") Cc: linux-stable # >= v5.6 Signed-off-by: Dinh Nguyen --- arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts index 4000c393243d..c07966740e14 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts @@ -212,12 +212,12 @@ qspi_boot: partition@0 { label = "Boot and fpga data"; - reg = <0x0 0x034B0000>; + reg = <0x0 0x03FE0000>; }; - qspi_rootfs: partition@4000000 { + qspi_rootfs: partition@3FE0000 { label = "Root Filesystem - JFFS2"; - reg = <0x034B0000 0x0EB50000>; + reg = <0x03FE0000 0x0C020000>; }; }; }; -- cgit v1.2.3 From d7adfe5ffed9faa05f8926223086b101e14f700d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 26 Jun 2020 10:06:02 +0200 Subject: ARM: dts: socfpga: Align L2 cache-controller nodename with dtschema Fix dtschema validator warnings like: l2-cache@fffff000: $nodename:0: 'l2-cache@fffff000' does not match '^(cache-controller|cpu)(@[0-9a-f,]+)*$' Fixes: 475dc86d08de ("arm: dts: socfpga: Add a base DTSI for Altera's Arria10 SOC") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Dinh Nguyen --- arch/arm/boot/dts/socfpga.dtsi | 2 +- arch/arm/boot/dts/socfpga_arria10.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index c2b54af417a2..78f3267d9cbf 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -726,7 +726,7 @@ }; }; - L2: l2-cache@fffef000 { + L2: cache-controller@fffef000 { compatible = "arm,pl310-cache"; reg = <0xfffef000 0x1000>; interrupts = <0 38 0x04>; diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index 3b8571b8b412..8f614c4b0e3e 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -636,7 +636,7 @@ reg = <0xffcfb100 0x80>; }; - L2: l2-cache@fffff000 { + L2: cache-controller@fffff000 { compatible = "arm,pl310-cache"; reg = <0xfffff000 0x1000>; interrupts = <0 18 IRQ_TYPE_LEVEL_HIGH>; -- cgit v1.2.3 From 681a5c71fb829fc2193e3bb524af41525477f5c3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 29 Jun 2020 10:16:29 +0200 Subject: arm64: dts: spcfpga: Align GIC, NAND and UART nodenames with dtschema Fix dtschema validator warnings like: intc@fffc1000: $nodename:0: 'intc@fffc1000' does not match '^interrupt-controller(@[0-9a-f,]+)*$' Fixes: 78cd6a9d8e15 ("arm64: dts: Add base stratix 10 dtsi") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Dinh Nguyen --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index d1fc9c2055f4..9498d1de730c 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -77,7 +77,7 @@ method = "smc"; }; - intc: intc@fffc1000 { + intc: interrupt-controller@fffc1000 { compatible = "arm,gic-400", "arm,cortex-a15-gic"; #interrupt-cells = <3>; interrupt-controller; @@ -302,7 +302,7 @@ status = "disabled"; }; - nand: nand@ffb90000 { + nand: nand-controller@ffb90000 { #address-cells = <1>; #size-cells = <0>; compatible = "altr,socfpga-denali-nand"; @@ -445,7 +445,7 @@ clock-names = "timer"; }; - uart0: serial0@ffc02000 { + uart0: serial@ffc02000 { compatible = "snps,dw-apb-uart"; reg = <0xffc02000 0x100>; interrupts = <0 108 4>; @@ -456,7 +456,7 @@ status = "disabled"; }; - uart1: serial1@ffc02100 { + uart1: serial@ffc02100 { compatible = "snps,dw-apb-uart"; reg = <0xffc02100 0x100>; interrupts = <0 109 4>; -- cgit v1.2.3 From ac2081cdc4d99c57f219c1a6171526e0fa0a6fff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Jul 2020 21:16:20 +0100 Subject: arm64: ptrace: Consistently use pseudo-singlestep exceptions Although the arm64 single-step state machine can be fast-forwarded in cases where we wish to generate a SIGTRAP without actually executing an instruction, this has two major limitations outside of simply skipping an instruction due to emulation. 1. Stepping out of a ptrace signal stop into a signal handler where SIGTRAP is blocked. Fast-forwarding the stepping state machine in this case will result in a forced SIGTRAP, with the handler reset to SIG_DFL. 2. The hardware implicitly fast-forwards the state machine when executing an SVC instruction for issuing a system call. This can interact badly with subsequent ptrace stops signalled during the execution of the system call (e.g. SYSCALL_EXIT or seccomp traps), as they may corrupt the stepping state by updating the PSTATE for the tracee. Resolve both of these issues by injecting a pseudo-singlestep exception on entry to a signal handler and also on return to userspace following a system call. Cc: Cc: Mark Rutland Tested-by: Luis Machado Reported-by: Keno Fischer Signed-off-by: Will Deacon --- arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/ptrace.c | 25 +++++++++++++++++++------ arch/arm64/kernel/signal.c | 11 ++--------- arch/arm64/kernel/syscall.c | 2 +- 4 files changed, 23 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 6ea8b6a26ae9..5e784e16ee89 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -93,6 +93,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_FSCHECK (1 << TIF_FSCHECK) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 68b7f34a08f5..057d4aa1af4d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1818,12 +1818,23 @@ static void tracehook_report_syscall(struct pt_regs *regs, saved_reg = regs->regs[regno]; regs->regs[regno] = dir; - if (dir == PTRACE_SYSCALL_EXIT) + if (dir == PTRACE_SYSCALL_ENTER) { + if (tracehook_report_syscall_entry(regs)) + forget_syscall(regs); + regs->regs[regno] = saved_reg; + } else if (!test_thread_flag(TIF_SINGLESTEP)) { tracehook_report_syscall_exit(regs, 0); - else if (tracehook_report_syscall_entry(regs)) - forget_syscall(regs); + regs->regs[regno] = saved_reg; + } else { + regs->regs[regno] = saved_reg; - regs->regs[regno] = saved_reg; + /* + * Signal a pseudo-step exception since we are stepping but + * tracer modifications to the registers may have rewound the + * state machine. + */ + tracehook_report_syscall_exit(regs, 1); + } } int syscall_trace_enter(struct pt_regs *regs) @@ -1851,12 +1862,14 @@ int syscall_trace_enter(struct pt_regs *regs) void syscall_trace_exit(struct pt_regs *regs) { + unsigned long flags = READ_ONCE(current_thread_info()->flags); + audit_syscall_exit(regs); - if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + if (flags & _TIF_SYSCALL_TRACEPOINT) trace_sys_exit(regs, regs_return_value(regs)); - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); rseq_syscall(regs); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 801d56cdf701..3b4f31f35e45 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -800,7 +800,6 @@ static void setup_restart_syscall(struct pt_regs *regs) */ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) { - struct task_struct *tsk = current; sigset_t *oldset = sigmask_to_save(); int usig = ksig->sig; int ret; @@ -824,14 +823,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) */ ret |= !valid_user_regs(®s->user_regs, current); - /* - * Fast forward the stepping logic so we step into the signal - * handler. - */ - if (!ret) - user_fastforward_single_step(tsk); - - signal_setup_done(ret, ksig, 0); + /* Step into the signal handler if we are stepping */ + signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP)); } /* diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 5f5b868292f5..7c14466a12af 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -139,7 +139,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { local_daif_mask(); flags = current_thread_info()->flags; - if (!has_syscall_work(flags)) { + if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) { /* * We're off to userspace, where interrupts are * always enabled after we restore the flags from -- cgit v1.2.3 From 3a5a4366cecc25daa300b9a9174f7fdd352b9068 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 13 Feb 2020 12:06:26 +0000 Subject: arm64: ptrace: Override SPSR.SS when single-stepping is enabled Luis reports that, when reverse debugging with GDB, single-step does not function as expected on arm64: | I've noticed, under very specific conditions, that a PTRACE_SINGLESTEP | request by GDB won't execute the underlying instruction. As a consequence, | the PC doesn't move, but we return a SIGTRAP just like we would for a | regular successful PTRACE_SINGLESTEP request. The underlying problem is that when the CPU register state is restored as part of a reverse step, the SPSR.SS bit is cleared and so the hardware single-step state can transition to the "active-pending" state, causing an unexpected step exception to be taken immediately if a step operation is attempted. In hindsight, we probably shouldn't have exposed SPSR.SS in the pstate accessible by the GPR regset, but it's a bit late for that now. Instead, simply prevent userspace from configuring the bit to a value which is inconsistent with the TIF_SINGLESTEP state for the task being traced. Cc: Cc: Mark Rutland Cc: Keno Fischer Link: https://lore.kernel.org/r/1eed6d69-d53d-9657-1fc9-c089be07f98c@linaro.org Reported-by: Luis Machado Tested-by: Luis Machado Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 2 ++ arch/arm64/kernel/debug-monitors.c | 20 ++++++++++++++++---- arch/arm64/kernel/ptrace.c | 4 ++-- 3 files changed, 20 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index e5ceea213e39..0b298f48f5bf 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -109,6 +109,8 @@ void disable_debug_monitors(enum dbg_active_el el); void user_rewind_single_step(struct task_struct *task); void user_fastforward_single_step(struct task_struct *task); +void user_regs_reset_single_step(struct user_pt_regs *regs, + struct task_struct *task); void kernel_enable_single_step(struct pt_regs *regs); void kernel_disable_single_step(void); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 5df49366e9ab..91146c0a3691 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -141,17 +141,20 @@ postcore_initcall(debug_monitors_init); /* * Single step API and exception handling. */ -static void set_regs_spsr_ss(struct pt_regs *regs) +static void set_user_regs_spsr_ss(struct user_pt_regs *regs) { regs->pstate |= DBG_SPSR_SS; } -NOKPROBE_SYMBOL(set_regs_spsr_ss); +NOKPROBE_SYMBOL(set_user_regs_spsr_ss); -static void clear_regs_spsr_ss(struct pt_regs *regs) +static void clear_user_regs_spsr_ss(struct user_pt_regs *regs) { regs->pstate &= ~DBG_SPSR_SS; } -NOKPROBE_SYMBOL(clear_regs_spsr_ss); +NOKPROBE_SYMBOL(clear_user_regs_spsr_ss); + +#define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs) +#define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs) static DEFINE_SPINLOCK(debug_hook_lock); static LIST_HEAD(user_step_hook); @@ -402,6 +405,15 @@ void user_fastforward_single_step(struct task_struct *task) clear_regs_spsr_ss(task_pt_regs(task)); } +void user_regs_reset_single_step(struct user_pt_regs *regs, + struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) + set_user_regs_spsr_ss(regs); + else + clear_user_regs_spsr_ss(regs); +} + /* Kernel API */ void kernel_enable_single_step(struct pt_regs *regs) { diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 057d4aa1af4d..22f9053b55b6 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1947,8 +1947,8 @@ static int valid_native_regs(struct user_pt_regs *regs) */ int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task) { - if (!test_tsk_thread_flag(task, TIF_SINGLESTEP)) - regs->pstate &= ~DBG_SPSR_SS; + /* https://lore.kernel.org/lkml/20191118131525.GA4180@willie-the-truck */ + user_regs_reset_single_step(regs, task); if (is_compat_thread(task_thread_info(task))) return valid_compat_regs(regs); -- cgit v1.2.3 From 15956689a0e60aa0c795174f3c310b60d8794235 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 3 Jul 2020 12:08:42 +0100 Subject: arm64: compat: Ensure upper 32 bits of x0 are zero on syscall return Although we zero the upper bits of x0 on entry to the kernel from an AArch32 task, we do not clear them on the exception return path and can therefore expose 64-bit sign extended syscall return values to userspace via interfaces such as the 'perf_regs' ABI, which deal exclusively with 64-bit registers. Explicitly clear the upper 32 bits of x0 on return from a compat system call. Cc: Cc: Mark Rutland Cc: Keno Fischer Cc: Luis Machado Signed-off-by: Will Deacon --- arch/arm64/include/asm/syscall.h | 12 +++++++++++- arch/arm64/kernel/syscall.c | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 65299a2dcf9c..cfc0672013f6 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -34,6 +34,10 @@ static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { unsigned long error = regs->regs[0]; + + if (is_compat_thread(task_thread_info(task))) + error = sign_extend64(error, 31); + return IS_ERR_VALUE(error) ? error : 0; } @@ -47,7 +51,13 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - regs->regs[0] = (long) error ? error : val; + if (error) + val = error; + + if (is_compat_thread(task_thread_info(task))) + val = lower_32_bits(val); + + regs->regs[0] = val; } #define SYSCALL_MAX_ARGS 6 diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 7c14466a12af..98a26d4e7b0c 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -50,6 +50,9 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, ret = do_ni_syscall(regs, scno); } + if (is_compat_task()) + ret = lower_32_bits(ret); + regs->regs[0] = ret; } -- cgit v1.2.3 From 59ee987ea47caff8c1e7ba4b89932c6900a35d0c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Jul 2020 21:14:20 +0100 Subject: arm64: ptrace: Add a comment describing our syscall entry/exit trap ABI Our tracehook logic for syscall entry/exit raises a SIGTRAP back to the tracer following a ptrace request such as PTRACE_SYSCALL. As part of this procedure, we clobber the reported value of one of the tracee's general purpose registers (x7 for native tasks, r12 for compat) to indicate whether the stop occurred on syscall entry or exit. This is a slightly unfortunate ABI, as it prevents the tracer from accessing the real register value and is at odds with other similar stops such as seccomp traps. Since we're stuck with this ABI, expand the comment in our tracehook logic to acknowledge the issue and describe the behaviour in more detail. Cc: Mark Rutland Cc: Luis Machado Reported-by: Keno Fischer Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 22f9053b55b6..89fbee3991a2 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1811,8 +1811,20 @@ static void tracehook_report_syscall(struct pt_regs *regs, unsigned long saved_reg; /* - * A scratch register (ip(r12) on AArch32, x7 on AArch64) is - * used to denote syscall entry/exit: + * We have some ABI weirdness here in the way that we handle syscall + * exit stops because we indicate whether or not the stop has been + * signalled from syscall entry or syscall exit by clobbering a general + * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee + * and restoring its old value after the stop. This means that: + * + * - Any writes by the tracer to this register during the stop are + * ignored/discarded. + * + * - The actual value of the register is not available during the stop, + * so the tracer cannot save it and restore it later. + * + * - Syscall stops behave differently to seccomp and pseudo-step traps + * (the latter do not nobble any registers). */ regno = (is_compat_task() ? 12 : 7); saved_reg = regs->regs[regno]; -- cgit v1.2.3 From 139dbe5d8ed383cbd1ada56c78dbbbd35bf6a9d3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 3 Jul 2020 09:41:24 +0100 Subject: arm64: syscall: Expand the comment about ptrace and syscall(-1) If a task executes syscall(-1), we intercept this early and force x0 to be -ENOSYS so that we don't need to distinguish this scenario from one where the scno is -1 because a tracer wants to skip the system call using ptrace. With the return value set, the return path is the same as the skip case. Although there is a one-line comment noting this in el0_svc_common(), it misses out most of the detail. Expand the comment to describe a bit more about what is going on. Cc: Mark Rutland Cc: Keno Fischer Cc: Luis Machado Signed-off-by: Will Deacon --- arch/arm64/kernel/syscall.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 98a26d4e7b0c..5f0c04863d2c 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -124,7 +124,21 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, user_exit(); if (has_syscall_work(flags)) { - /* set default errno for user-issued syscall(-1) */ + /* + * The de-facto standard way to skip a system call using ptrace + * is to set the system call to -1 (NO_SYSCALL) and set x0 to a + * suitable error code for consumption by userspace. However, + * this cannot be distinguished from a user-issued syscall(-1) + * and so we must set x0 to -ENOSYS here in case the tracer doesn't + * issue the skip and we fall into trace_exit with x0 preserved. + * + * This is slightly odd because it also means that if a tracer + * sets the system call number to -1 but does not initialise x0, + * then x0 will be preserved for all system calls apart from a + * user-issued syscall(-1). However, requesting a skip and not + * setting the return value is unlikely to do anything sensible + * anyway. + */ if (scno == NO_SYSCALL) regs->regs[0] = -ENOSYS; scno = syscall_trace_enter(regs); -- cgit v1.2.3 From d83ee6e3e75db6f518ef2b0858f163849f2ddeb7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Jul 2020 13:20:57 +0100 Subject: arm64: ptrace: Use NO_SYSCALL instead of -1 in syscall_trace_enter() Setting a system call number of -1 is special, as it indicates that the current system call should be skipped. Use NO_SYSCALL instead of -1 when checking for this scenario, which is different from the -1 returned due to a seccomp failure. Cc: Mark Rutland Cc: Keno Fischer Cc: Luis Machado Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 89fbee3991a2..1e02e98e68dd 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1856,12 +1856,12 @@ int syscall_trace_enter(struct pt_regs *regs) if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) { tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); if (!in_syscall(regs) || (flags & _TIF_SYSCALL_EMU)) - return -1; + return NO_SYSCALL; } /* Do the secure computing after ptrace; failures should be fast. */ if (secure_computing() == -1) - return -1; + return NO_SYSCALL; if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, regs->syscallno); -- cgit v1.2.3 From 5afc78551bf5d53279036e0bf63314e35631d79f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 13 Feb 2020 12:12:26 +0000 Subject: arm64: Use test_tsk_thread_flag() for checking TIF_SINGLESTEP Rather than open-code test_tsk_thread_flag() at each callsite, simply replace the couple of offenders with calls to test_tsk_thread_flag() directly. Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 91146c0a3691..7310a4f7f993 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -394,14 +394,14 @@ void user_rewind_single_step(struct task_struct *task) * If single step is active for this thread, then set SPSR.SS * to 1 to avoid returning to the active-pending state. */ - if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) + if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) set_regs_spsr_ss(task_pt_regs(task)); } NOKPROBE_SYMBOL(user_rewind_single_step); void user_fastforward_single_step(struct task_struct *task) { - if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) + if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) clear_regs_spsr_ss(task_pt_regs(task)); } -- cgit v1.2.3 From 6ee93f8df09c470da1a4af11e394c52d7b62418c Mon Sep 17 00:00:00 2001 From: Jian Cai Date: Tue, 14 Jul 2020 16:30:21 -0700 Subject: x86/entry: Add compatibility with IAS Clang's integrated assembler does not allow symbols with non-absolute values to be reassigned. Modify the interrupt entry loop macro to be compatible with IAS by using a label and an offset. Reported-by: Nick Desaulniers Reported-by: Sedat Dilek Suggested-by: Nick Desaulniers Suggested-by: Brian Gerst Suggested-by: Arvind Sankar Signed-off-by: Jian Cai Signed-off-by: Thomas Gleixner Tested-by: Sedat Dilek # Link: https://github.com/ClangBuiltLinux/linux/issues/1043 Link: https://lkml.kernel.org/r/20200714233024.1789985-1-caij2003@gmail.com --- arch/x86/include/asm/idtentry.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index f3d70830bf2a..5efaaed34eda 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -469,16 +469,15 @@ __visible noinstr void func(struct pt_regs *regs, \ .align 8 SYM_CODE_START(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR - pos = . .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) UNWIND_HINT_IRET_REGS +0 : .byte 0x6a, vector jmp asm_common_interrupt nop /* Ensure that the above is 8 bytes max */ - . = pos + 8 - pos=pos+8 - vector=vector+1 + . = 0b + 8 + vector = vector+1 .endr SYM_CODE_END(irq_entries_start) @@ -486,16 +485,15 @@ SYM_CODE_END(irq_entries_start) .align 8 SYM_CODE_START(spurious_entries_start) vector=FIRST_SYSTEM_VECTOR - pos = . .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) UNWIND_HINT_IRET_REGS +0 : .byte 0x6a, vector jmp asm_spurious_interrupt nop /* Ensure that the above is 8 bytes max */ - . = pos + 8 - pos=pos+8 - vector=vector+1 + . = 0b + 8 + vector = vector+1 .endr SYM_CODE_END(spurious_entries_start) #endif -- cgit v1.2.3 From 5769fe26f389b0002ed48fd16d642a1d86edaf79 Mon Sep 17 00:00:00 2001 From: Sedat Dilek Date: Tue, 14 Jul 2020 21:47:40 +0200 Subject: x86/entry: Fix vectors to IDTENTRY_SYSVEC for CONFIG_HYPERV When assembling with Clang via `make LLVM_IAS=1` and CONFIG_HYPERV enabled, we observe the following error: :9:6: error: expected absolute expression .if HYPERVISOR_REENLIGHTENMENT_VECTOR == 3 ^ :1:1: note: while in macro instantiation idtentry HYPERVISOR_REENLIGHTENMENT_VECTOR asm_sysvec_hyperv_reenlightenment sysvec_hyperv_reenlightenment has_error_code=0 ^ ./arch/x86/include/asm/idtentry.h:627:1: note: while in macro instantiation idtentry_sysvec HYPERVISOR_REENLIGHTENMENT_VECTOR sysvec_hyperv_reenlightenment; ^ :9:6: error: expected absolute expression .if HYPERVISOR_STIMER0_VECTOR == 3 ^ :1:1: note: while in macro instantiation idtentry HYPERVISOR_STIMER0_VECTOR asm_sysvec_hyperv_stimer0 sysvec_hyperv_stimer0 has_error_code=0 ^ ./arch/x86/include/asm/idtentry.h:628:1: note: while in macro instantiation idtentry_sysvec HYPERVISOR_STIMER0_VECTOR sysvec_hyperv_stimer0; This is caused by typos in arch/x86/include/asm/idtentry.h: HYPERVISOR_REENLIGHTENMENT_VECTOR -> HYPERV_REENLIGHTENMENT_VECTOR HYPERVISOR_STIMER0_VECTOR -> HYPERV_STIMER0_VECTOR For more details see ClangBuiltLinux issue #1088. Fixes: a16be368dd3f ("x86/entry: Convert various hypervisor vectors to IDTENTRY_SYSVEC") Suggested-by: Nick Desaulniers Signed-off-by: Sedat Dilek Signed-off-by: Thomas Gleixner Reviewed-by: Nathan Chancellor Reviewed-by: Wei Liu Reviewed-by: Nick Desaulniers Link: https://github.com/ClangBuiltLinux/linux/issues/1088 Link: https://github.com/ClangBuiltLinux/linux/issues/1043 Link: https://lore.kernel.org/patchwork/patch/1272115/ Link: https://lkml.kernel.org/r/20200714194740.4548-1-sedat.dilek@gmail.com --- arch/x86/include/asm/idtentry.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 5efaaed34eda..80d3b30d3ee3 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -624,8 +624,8 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested #if IS_ENABLED(CONFIG_HYPERV) DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); -DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); -DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_STIMER0_VECTOR, sysvec_hyperv_stimer0); +DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); +DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0); #endif #if IS_ENABLED(CONFIG_ACRN_GUEST) -- cgit v1.2.3 From 81e96851ea32deb2c921c870eecabf335f598aeb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 May 2020 15:53:46 +0200 Subject: x86: math-emu: Fix up 'cmp' insn for clang ias The clang integrated assembler requires the 'cmp' instruction to have a length prefix here: arch/x86/math-emu/wm_sqrt.S:212:2: error: ambiguous instructions require an explicit suffix (could be 'cmpb', 'cmpw', or 'cmpl') cmp $0xffffffff,-24(%ebp) ^ Make this a 32-bit comparison, which it was clearly meant to be. Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Reviewed-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20200527135352.1198078-1-arnd@arndb.de --- arch/x86/math-emu/wm_sqrt.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S index 3b2b58164ec1..40526dd85137 100644 --- a/arch/x86/math-emu/wm_sqrt.S +++ b/arch/x86/math-emu/wm_sqrt.S @@ -209,7 +209,7 @@ sqrt_stage_2_finish: #ifdef PARANOID /* It should be possible to get here only if the arg is ffff....ffff */ - cmp $0xffffffff,FPU_fsqrt_arg_1 + cmpl $0xffffffff,FPU_fsqrt_arg_1 jnz sqrt_stage_2_error #endif /* PARANOID */ -- cgit v1.2.3 From 38b7c2a3ffb1fce8358ddc6006cfe5c038ff9963 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 16 Jul 2020 11:57:26 -0700 Subject: RISC-V: Upgrade smp_mb__after_spinlock() to iorw,iorw While digging through the recent mmiowb preemption issue it came up that we aren't actually preventing IO from crossing a scheduling boundary. While it's a bit ugly to overload smp_mb__after_spinlock() with this behavior, it's what PowerPC is doing so there's some precedent. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/barrier.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 3f1737f301cc..d0e24aaa2aa0 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -58,8 +58,16 @@ do { \ * The AQ/RL pair provides a RCpc critical section, but there's not really any * way we can take advantage of that here because the ordering is only enforced * on that one lock. Thus, we're just doing a full fence. + * + * Since we allow writeX to be called from preemptive regions we need at least + * an "o" in the predecessor set to ensure device writes are visible before the + * task is marked as available for scheduling on a new hart. While I don't see + * any concrete reason we need a full IO fence, it seems safer to just upgrade + * this in order to avoid any IO crossing a scheduling boundary. In both + * instances the scheduler pairs this with an mb(), so nothing is necessary on + * the new hart. */ -#define smp_mb__after_spinlock() RISCV_FENCE(rw,rw) +#define smp_mb__after_spinlock() RISCV_FENCE(iorw,iorw) #include -- cgit v1.2.3 From baedb87d1b53532f81b4bd0387f83b05d4f7eb9a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 17 Jul 2020 18:00:02 +0200 Subject: genirq/affinity: Handle affinity setting on inactive interrupts correctly Setting interrupt affinity on inactive interrupts is inconsistent when hierarchical irq domains are enabled. The core code should just store the affinity and not call into the irq chip driver for inactive interrupts because the chip drivers may not be in a state to handle such requests. X86 has a hacky workaround for that but all other irq chips have not which causes problems e.g. on GIC V3 ITS. Instead of adding more ugly hacks all over the place, solve the problem in the core code. If the affinity is set on an inactive interrupt then: - Store it in the irq descriptors affinity mask - Update the effective affinity to reflect that so user space has a consistent view - Don't call into the irq chip driver This is the core equivalent of the X86 workaround and works correctly because the affinity setting is established in the irq chip when the interrupt is activated later on. Note, that this is only effective when hierarchical irq domains are enabled by the architecture. Doing it unconditionally would break legacy irq chip implementations. For hierarchial irq domains this works correctly as none of the drivers can have a dependency on affinity setting in inactive state by design. Remove the X86 workaround as it is not longer required. Fixes: 02edee152d6e ("x86/apic/vector: Ignore set_affinity call for inactive interrupts") Reported-by: Ali Saidi Signed-off-by: Thomas Gleixner Tested-by: Ali Saidi Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200529015501.15771-1-alisaidi@amazon.com Link: https://lkml.kernel.org/r/877dv2rv25.fsf@nanos.tec.linutronix.de --- arch/x86/kernel/apic/vector.c | 22 +++++----------------- kernel/irq/manage.c | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index cc8b16f89dd4..7649da2478d8 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -446,12 +446,10 @@ static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, trace_vector_activate(irqd->irq, apicd->is_managed, apicd->can_reserve, reserve); - /* Nothing to do for fixed assigned vectors */ - if (!apicd->can_reserve && !apicd->is_managed) - return 0; - raw_spin_lock_irqsave(&vector_lock, flags); - if (reserve || irqd_is_managed_and_shutdown(irqd)) + if (!apicd->can_reserve && !apicd->is_managed) + assign_irq_vector_any_locked(irqd); + else if (reserve || irqd_is_managed_and_shutdown(irqd)) vector_assign_managed_shutdown(irqd); else if (apicd->is_managed) ret = activate_managed(irqd); @@ -774,20 +772,10 @@ void lapic_offline(void) static int apic_set_affinity(struct irq_data *irqd, const struct cpumask *dest, bool force) { - struct apic_chip_data *apicd = apic_chip_data(irqd); int err; - /* - * Core code can call here for inactive interrupts. For inactive - * interrupts which use managed or reservation mode there is no - * point in going through the vector assignment right now as the - * activation will assign a vector which fits the destination - * cpumask. Let the core code store the destination mask and be - * done with it. - */ - if (!irqd_is_activated(irqd) && - (apicd->is_managed || apicd->can_reserve)) - return IRQ_SET_MASK_OK; + if (WARN_ON_ONCE(!irqd_is_activated(irqd))) + return -EIO; raw_spin_lock(&vector_lock); cpumask_and(vector_searchmask, dest, cpu_online_mask); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 761911168438..2a9fec53e159 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -195,9 +195,9 @@ void irq_set_thread_affinity(struct irq_desc *desc) set_bit(IRQTF_AFFINITY, &action->thread_flags); } +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK static void irq_validate_effective_affinity(struct irq_data *data) { -#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK const struct cpumask *m = irq_data_get_effective_affinity_mask(data); struct irq_chip *chip = irq_data_get_irq_chip(data); @@ -205,9 +205,19 @@ static void irq_validate_effective_affinity(struct irq_data *data) return; pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n", chip->name, data->irq); -#endif } +static inline void irq_init_effective_affinity(struct irq_data *data, + const struct cpumask *mask) +{ + cpumask_copy(irq_data_get_effective_affinity_mask(data), mask); +} +#else +static inline void irq_validate_effective_affinity(struct irq_data *data) { } +static inline void irq_init_effective_affinity(struct irq_data *data, + const struct cpumask *mask) { } +#endif + int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -304,6 +314,26 @@ static int irq_try_set_affinity(struct irq_data *data, return ret; } +static bool irq_set_affinity_deactivated(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + struct irq_desc *desc = irq_data_to_desc(data); + + /* + * If the interrupt is not yet activated, just store the affinity + * mask and do not call the chip driver at all. On activation the + * driver has to make sure anyway that the interrupt is in a + * useable state so startup works. + */ + if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data)) + return false; + + cpumask_copy(desc->irq_common_data.affinity, mask); + irq_init_effective_affinity(data, mask); + irqd_set(data, IRQD_AFFINITY_SET); + return true; +} + int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -314,6 +344,9 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, if (!chip || !chip->irq_set_affinity) return -EINVAL; + if (irq_set_affinity_deactivated(data, mask, force)) + return 0; + if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) { ret = irq_try_set_affinity(data, mask, force); } else { -- cgit v1.2.3 From 5f55dd54994a596ce3bdb9e2a73164907ca46c03 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 26 Jun 2020 14:19:12 +0200 Subject: media: atomisp: move CCK endpoint address to generic header IOSF MBI header contains a lot of definitions, such as end point addresses of IPs. Move CCK address from AtomISP driver to generic header. While here, drop unused one. Signed-off-by: Andy Shevchenko Signed-off-by: Mauro Carvalho Chehab --- arch/x86/include/asm/iosf_mbi.h | 1 + drivers/staging/media/atomisp/pci/atomisp-regs.h | 3 --- drivers/staging/media/atomisp/pci/atomisp_v4l2.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h index 5270ff39b9af..a1911fea8739 100644 --- a/arch/x86/include/asm/iosf_mbi.h +++ b/arch/x86/include/asm/iosf_mbi.h @@ -39,6 +39,7 @@ #define BT_MBI_UNIT_PMC 0x04 #define BT_MBI_UNIT_GFX 0x06 #define BT_MBI_UNIT_SMI 0x0C +#define BT_MBI_UNIT_CCK 0x14 #define BT_MBI_UNIT_USB 0x43 #define BT_MBI_UNIT_SATA 0xA3 #define BT_MBI_UNIT_PCIE 0xA6 diff --git a/drivers/staging/media/atomisp/pci/atomisp-regs.h b/drivers/staging/media/atomisp/pci/atomisp-regs.h index de34ee28e390..022997f47121 100644 --- a/drivers/staging/media/atomisp/pci/atomisp-regs.h +++ b/drivers/staging/media/atomisp/pci/atomisp-regs.h @@ -20,9 +20,6 @@ #define ATOMISP_REGS_H /* common register definitions */ -#define PUNIT_PORT 0x04 -#define CCK_PORT 0x14 - #define PCICMDSTS 0x01 #define INTR 0x0f #define MSI_CAPID 0x24 diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c index 29ea66f175c8..3bd78d870264 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c +++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c @@ -1687,7 +1687,7 @@ static int atomisp_pci_probe(struct pci_dev *dev, isp->dfs = &dfs_config_cht; isp->pdev->d3cold_delay = 0; - iosf_mbi_read(CCK_PORT, MBI_REG_READ, CCK_FUSE_REG_0, &val); + iosf_mbi_read(BT_MBI_UNIT_CCK, MBI_REG_READ, CCK_FUSE_REG_0, &val); switch (val & CCK_FUSE_HPLL_FREQ_MASK) { case 0x00: isp->hpll_freq = HPLL_FREQ_800MHZ; -- cgit v1.2.3 From cadfad870154e14f745ec845708bc17d166065f2 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 17 Jul 2020 16:53:55 -0700 Subject: x86/ioperm: Fix io bitmap invalidation on Xen PV tss_invalidate_io_bitmap() wasn't wired up properly through the pvop machinery, so the TSS and Xen's io bitmap would get out of sync whenever disabling a valid io bitmap. Add a new pvop for tss_invalidate_io_bitmap() to fix it. This is XSA-329. Fixes: 22fe5b0439dd ("x86/ioperm: Move TSS bitmap update to exit to user work") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Reviewed-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/d53075590e1f91c19f8af705059d3ff99424c020.1595030016.git.luto@kernel.org --- arch/x86/include/asm/io_bitmap.h | 16 ++++++++++++++++ arch/x86/include/asm/paravirt.h | 5 +++++ arch/x86/include/asm/paravirt_types.h | 1 + arch/x86/kernel/paravirt.c | 3 ++- arch/x86/kernel/process.c | 18 ++---------------- arch/x86/xen/enlighten_pv.c | 12 ++++++++++++ 6 files changed, 38 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h index ac1a99ffbd8d..7f080f5c7def 100644 --- a/arch/x86/include/asm/io_bitmap.h +++ b/arch/x86/include/asm/io_bitmap.h @@ -19,12 +19,28 @@ struct task_struct; void io_bitmap_share(struct task_struct *tsk); void io_bitmap_exit(struct task_struct *tsk); +static inline void native_tss_invalidate_io_bitmap(void) +{ + /* + * Invalidate the I/O bitmap by moving io_bitmap_base outside the + * TSS limit so any subsequent I/O access from user space will + * trigger a #GP. + * + * This is correct even when VMEXIT rewrites the TSS limit + * to 0x67 as the only requirement is that the base points + * outside the limit. + */ + this_cpu_write(cpu_tss_rw.x86_tss.io_bitmap_base, + IO_BITMAP_OFFSET_INVALID); +} + void native_tss_update_io_bitmap(void); #ifdef CONFIG_PARAVIRT_XXL #include #else #define tss_update_io_bitmap native_tss_update_io_bitmap +#define tss_invalidate_io_bitmap native_tss_invalidate_io_bitmap #endif #else diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 5ca5d297df75..3d2afecde50c 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -302,6 +302,11 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) } #ifdef CONFIG_X86_IOPL_IOPERM +static inline void tss_invalidate_io_bitmap(void) +{ + PVOP_VCALL0(cpu.invalidate_io_bitmap); +} + static inline void tss_update_io_bitmap(void) { PVOP_VCALL0(cpu.update_io_bitmap); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 732f62e04ddb..8dfcb2508e6d 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -141,6 +141,7 @@ struct pv_cpu_ops { void (*load_sp0)(unsigned long sp0); #ifdef CONFIG_X86_IOPL_IOPERM + void (*invalidate_io_bitmap)(void); void (*update_io_bitmap)(void); #endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 674a7d66d960..de2138ba38e5 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -324,7 +324,8 @@ struct paravirt_patch_template pv_ops = { .cpu.swapgs = native_swapgs, #ifdef CONFIG_X86_IOPL_IOPERM - .cpu.update_io_bitmap = native_tss_update_io_bitmap, + .cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap, + .cpu.update_io_bitmap = native_tss_update_io_bitmap, #endif .cpu.start_context_switch = paravirt_nop, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f362ce0d5ac0..fe67dbd76e51 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -322,20 +322,6 @@ void arch_setup_new_exec(void) } #ifdef CONFIG_X86_IOPL_IOPERM -static inline void tss_invalidate_io_bitmap(struct tss_struct *tss) -{ - /* - * Invalidate the I/O bitmap by moving io_bitmap_base outside the - * TSS limit so any subsequent I/O access from user space will - * trigger a #GP. - * - * This is correct even when VMEXIT rewrites the TSS limit - * to 0x67 as the only requirement is that the base points - * outside the limit. - */ - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; -} - static inline void switch_to_bitmap(unsigned long tifp) { /* @@ -346,7 +332,7 @@ static inline void switch_to_bitmap(unsigned long tifp) * user mode. */ if (tifp & _TIF_IO_BITMAP) - tss_invalidate_io_bitmap(this_cpu_ptr(&cpu_tss_rw)); + tss_invalidate_io_bitmap(); } static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm) @@ -380,7 +366,7 @@ void native_tss_update_io_bitmap(void) u16 *base = &tss->x86_tss.io_bitmap_base; if (!test_thread_flag(TIF_IO_BITMAP)) { - tss_invalidate_io_bitmap(tss); + native_tss_invalidate_io_bitmap(); return; } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 0d68948c82ad..c46b9f2e732f 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -870,6 +870,17 @@ static void xen_load_sp0(unsigned long sp0) } #ifdef CONFIG_X86_IOPL_IOPERM +static void xen_invalidate_io_bitmap(void) +{ + struct physdev_set_iobitmap iobitmap = { + .bitmap = 0, + .nr_ports = 0, + }; + + native_tss_invalidate_io_bitmap(); + HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap); +} + static void xen_update_io_bitmap(void) { struct physdev_set_iobitmap iobitmap; @@ -1099,6 +1110,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .load_sp0 = xen_load_sp0, #ifdef CONFIG_X86_IOPL_IOPERM + .invalidate_io_bitmap = xen_invalidate_io_bitmap, .update_io_bitmap = xen_update_io_bitmap, #endif .io_delay = xen_io_delay, -- cgit v1.2.3 From 58ac3154b83938515129c20aa76d456a4c9202a8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 26 Jun 2020 13:34:25 -0700 Subject: x86/entry: Actually disable stack protector Some builds of GCC enable stack protector by default. Simply removing the arguments is not sufficient to disable stack protector, as the stack protector for those GCC builds must be explicitly disabled. Remove the argument removals and add -fno-stack-protector. Additionally include missed x32 argument updates, and adjust whitespace for readability. Fixes: 20355e5f73a7 ("x86/entry: Exclude low level entry code from sanitizing") Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/202006261333.585319CA6B@keescook --- arch/x86/entry/Makefile | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index b7a5790d8d63..08bf95dbc911 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -7,12 +7,20 @@ KASAN_SANITIZE := n UBSAN_SANITIZE := n KCOV_INSTRUMENT := n -CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) -fstack-protector -fstack-protector-strong -CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE) -fstack-protector -fstack-protector-strong -CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE) -fstack-protector -fstack-protector-strong +CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_syscall_x32.o = $(CC_FLAGS_FTRACE) + +CFLAGS_common.o += -fno-stack-protector +CFLAGS_syscall_64.o += -fno-stack-protector +CFLAGS_syscall_32.o += -fno-stack-protector +CFLAGS_syscall_x32.o += -fno-stack-protector CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall_x32.o += $(call cc-option,-Wno-override-init,) + obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o -- cgit v1.2.3 From da05b143a308bd6a7a444401f9732678ae63fc70 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 14 Jul 2020 23:26:31 -0400 Subject: x86/boot: Don't add the EFI stub to targets vmlinux-objs-y is added to targets, which currently means that the EFI stub gets added to the targets as well. It shouldn't be added since it is built elsewhere. This confuses Makefile.build which interprets the EFI stub as a target $(obj)/$(objtree)/drivers/firmware/efi/libstub/lib.a and will create drivers/firmware/efi/libstub/ underneath arch/x86/boot/compressed, to hold this supposed target, if building out-of-tree. [0] Fix this by pulling the stub out of vmlinux-objs-y into efi-obj-y. [0] See scripts/Makefile.build near the end: # Create directories for object files if they do not exist Signed-off-by: Arvind Sankar Signed-off-by: Thomas Gleixner Reviewed-by: Masahiro Yamada Acked-by: Ard Biesheuvel Link: https://lkml.kernel.org/r/20200715032631.1562882-1-nivedita@alum.mit.edu --- arch/x86/boot/compressed/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 7619742f91c9..5a828fde7a42 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -90,8 +90,8 @@ endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o -vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o +efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a # The compressed kernel is built with -fPIC/-fPIE so that a boot loader # can place it anywhere in memory and it will still run. However, since @@ -115,7 +115,7 @@ endef quiet_cmd_check-and-link-vmlinux = LD $@ cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld) -$(obj)/vmlinux: $(vmlinux-objs-y) FORCE +$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE $(call if_changed,check-and-link-vmlinux) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S -- cgit v1.2.3 From 5714ee50bb4375bd586858ad800b1d9772847452 Mon Sep 17 00:00:00 2001 From: Kevin Buettner Date: Sat, 18 Jul 2020 00:20:03 -0700 Subject: copy_xstate_to_kernel: Fix typo which caused GDB regression This fixes a regression encountered while running the gdb.base/corefile.exp test in GDB's test suite. In my testing, the typo prevented the sw_reserved field of struct fxregs_state from being output to the kernel XSAVES area. Thus the correct mask corresponding to XCR0 was not present in the core file for GDB to interrogate, resulting in the following behavior: [kev@f32-1 gdb]$ ./gdb -q testsuite/outputs/gdb.base/corefile/corefile testsuite/outputs/gdb.base/corefile/corefile.core Reading symbols from testsuite/outputs/gdb.base/corefile/corefile... [New LWP 232880] warning: Unexpected size of section `.reg-xstate/232880' in core file. With the typo fixed, the test works again as expected. Signed-off-by: Kevin Buettner Fixes: 9e4636545933 ("copy_xstate_to_kernel(): don't leave parts of destination uninitialized") Cc: Al Viro Cc: Dave Airlie Signed-off-by: Linus Torvalds --- arch/x86/kernel/fpu/xstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index bda2e5eaca0e..ad3a2b37927d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1074,7 +1074,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of copy_part(offsetof(struct fxregs_state, st_space), 128, &xsave->i387.st_space, &kbuf, &offset_start, &count); if (header.xfeatures & XFEATURE_MASK_SSE) - copy_part(xstate_offsets[XFEATURE_MASK_SSE], 256, + copy_part(xstate_offsets[XFEATURE_SSE], 256, &xsave->i387.xmm_space, &kbuf, &offset_start, &count); /* * Fill xsave->i387.sw_reserved value for ptrace frame: -- cgit v1.2.3 From 7c6719a1aaca51ffd7cdf3905e70aa8313f6ef26 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 19 Jul 2020 12:00:40 +0100 Subject: arm64: dts: clearfog-gt-8k: fix switch link configuration The commit below caused a regression for clearfog-gt-8k, where the link between the switch and the host does not come up. Investigation revealed two issues: - MV88E6xxx DSA no longer allows an in-band link to come up as the link is programmed to be forced down. Commit "net: dsa: mv88e6xxx: fix in-band AN link establishment" addresses this. - The dts configured dissimilar link modes at each end of the host to switch link; the host was configured using a fixed link (so has no in-band status) and the switch was configured to expect in-band status. With both issues fixed, the regression is resolved. Fixes: 34b5e6a33c1a ("net: dsa: mv88e6xxx: Configure MAC when using fixed link") Reported-by: Martin Rowe Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts index c8243da71041..eb01cc96ba7a 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts +++ b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts @@ -454,10 +454,7 @@ status = "okay"; phy-mode = "2500base-x"; phys = <&cp1_comphy5 2>; - fixed-link { - speed = <2500>; - full-duplex; - }; + managed = "in-band-status"; }; &cp1_spi1 { -- cgit v1.2.3 From 3d3af181d370069861a3be94608464e2ff3682e2 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 17 Jul 2020 11:27:22 +0200 Subject: s390/cpum_cf,perf: change DFLT_CCERROR counter name Change the counter name DLFT_CCERROR to DLFT_CCFINISH on IBM z15. This counter counts completed DEFLATE instructions with exit code 0, 1 or 2. Since exit code 0 means success and exit code 1 or 2 indicate errors, change the counter name to avoid confusion. This counter is incremented each time the DEFLATE instruction completed regardless if an error was detected or not. Fixes: d68d5d51dc89 ("s390/cpum_cf: Add new extended counters for IBM z15") Fixes: e7950166e402 ("perf vendor events s390: Add new deflate counters for IBM z15") Cc: stable@vger.kernel.org # v5.7 Signed-off-by: Thomas Richter Reviewed-by: Sumanth Korikkar Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_cf_events.c | 4 ++-- tools/perf/pmu-events/arch/s390/cf_z15/extended.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index 1e3df52b2b65..37265f551a11 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -292,7 +292,7 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7); CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc); CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CCERROR, 0x00109); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); @@ -629,7 +629,7 @@ static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS), CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES), CPUMF_EVENT_PTR(cf_z15, DFLT_CC), - CPUMF_EVENT_PTR(cf_z15, DFLT_CCERROR), + CPUMF_EVENT_PTR(cf_z15, DFLT_CCFINISH), CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE), CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE), NULL, diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json index 2df2e231e9ee..24c4ba2a9ae5 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json @@ -380,7 +380,7 @@ { "Unit": "CPU-M-CF", "EventCode": "265", - "EventName": "DFLT_CCERROR", + "EventName": "DFLT_CCFINISH", "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2", "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2" }, -- cgit v1.2.3 From 5fff09bc141570cd15c118b097b17dec832b517f Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 21 Jul 2020 15:00:35 -0700 Subject: xtensa: fix access check in csum_and_copy_from_user Commit d341659f470b ("xtensa: switch to providing csum_and_copy_from_user()") introduced access check, but incorrectly tested dst instead of src. Fix access_ok argument in csum_and_copy_from_user. Cc: Al Viro Fixes: d341659f470b ("xtensa: switch to providing csum_and_copy_from_user()") Signed-off-by: Max Filippov Signed-off-by: Al Viro --- arch/xtensa/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/xtensa/include/asm/checksum.h b/arch/xtensa/include/asm/checksum.h index d8292cc9ebdf..243a5fe79d3c 100644 --- a/arch/xtensa/include/asm/checksum.h +++ b/arch/xtensa/include/asm/checksum.h @@ -57,7 +57,7 @@ static inline __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr) { - if (access_ok(dst, len)) + if (access_ok(src, len)) return csum_partial_copy_generic((__force const void *)src, dst, len, sum, err_ptr, NULL); if (len) -- cgit v1.2.3 From de2b41be8fcccb2f5b6c480d35df590476344201 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 21 Jul 2020 11:34:48 +0200 Subject: x86, vmlinux.lds: Page-align end of ..page_aligned sections On x86-32 the idt_table with 256 entries needs only 2048 bytes. It is page-aligned, but the end of the .bss..page_aligned section is not guaranteed to be page-aligned. As a result, objects from other .bss sections may end up on the same 4k page as the idt_table, and will accidentially get mapped read-only during boot, causing unexpected page-faults when the kernel writes to them. This could be worked around by making the objects in the page aligned sections page sized, but that's wrong. Explicit sections which store only page aligned objects have an implicit guarantee that the object is alone in the page in which it is placed. That works for all objects except the last one. That's inconsistent. Enforcing page sized objects for these sections would wreckage memory sanitizers, because the object becomes artificially larger than it should be and out of bound access becomes legit. Align the end of the .bss..page_aligned and .data..page_aligned section on page-size so all objects places in these sections are guaranteed to have their own page. [ tglx: Amended changelog ] Signed-off-by: Joerg Roedel Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200721093448.10417-1-joro@8bytes.org --- arch/x86/kernel/vmlinux.lds.S | 1 + include/asm-generic/vmlinux.lds.h | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3bfc8dd8a43d..9a03e5b23135 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -358,6 +358,7 @@ SECTIONS .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; *(.bss..page_aligned) + . = ALIGN(PAGE_SIZE); *(BSS_MAIN) BSS_DECRYPTED . = ALIGN(PAGE_SIZE); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index db600ef218d7..052e0f05a984 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -341,7 +341,8 @@ #define PAGE_ALIGNED_DATA(page_align) \ . = ALIGN(page_align); \ - *(.data..page_aligned) + *(.data..page_aligned) \ + . = ALIGN(page_align); #define READ_MOSTLY_DATA(align) \ . = ALIGN(align); \ @@ -737,7 +738,9 @@ . = ALIGN(bss_align); \ .bss : AT(ADDR(.bss) - LOAD_OFFSET) { \ BSS_FIRST_SECTIONS \ + . = ALIGN(PAGE_SIZE); \ *(.bss..page_aligned) \ + . = ALIGN(PAGE_SIZE); \ *(.dynbss) \ *(BSS_MAIN) \ *(COMMON) \ -- cgit v1.2.3 From 372a8eaa05998cd45b3417d0e0ffd3a70978211a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 17 Jul 2020 09:04:25 -0500 Subject: x86/unwind/orc: Fix ORC for newly forked tasks The ORC unwinder fails to unwind newly forked tasks which haven't yet run on the CPU. It correctly reads the 'ret_from_fork' instruction pointer from the stack, but it incorrectly interprets that value as a call stack address rather than a "signal" one, so the address gets incorrectly decremented in the call to orc_find(), resulting in bad ORC data. Fix it by forcing 'ret_from_fork' frames to be signal frames. Reported-by: Wang ShaoBo Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Wang ShaoBo Link: https://lkml.kernel.org/r/f91a8778dde8aae7f71884b5df2b16d552040441.1594994374.git.jpoimboe@redhat.com --- arch/x86/kernel/unwind_orc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 7f969b2d240f..ec88bbe08a32 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -440,8 +440,11 @@ bool unwind_next_frame(struct unwind_state *state) /* * Find the orc_entry associated with the text address. * - * Decrement call return addresses by one so they work for sibling - * calls and calls to noreturn functions. + * For a call frame (as opposed to a signal frame), state->ip points to + * the instruction after the call. That instruction's stack layout + * could be different from the call instruction's layout, for example + * if the call was to a noreturn function. So get the ORC data for the + * call instruction itself. */ orc = orc_find(state->signal ? state->ip : state->ip - 1); if (!orc) { @@ -662,6 +665,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, state->sp = task->thread.sp; state->bp = READ_ONCE_NOCHECK(frame->bp); state->ip = READ_ONCE_NOCHECK(frame->ret_addr); + state->signal = (void *)state->ip == ret_from_fork; } if (get_stack_info((unsigned long *)state->sp, state->task, -- cgit v1.2.3 From 039a7a30ec102ec866d382a66f87f6f7654f8140 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 17 Jul 2020 09:04:26 -0500 Subject: x86/stacktrace: Fix reliable check for empty user task stacks If a user task's stack is empty, or if it only has user regs, ORC reports it as a reliable empty stack. But arch_stack_walk_reliable() incorrectly treats it as unreliable. That happens because the only success path for user tasks is inside the loop, which only iterates on non-empty stacks. Generally, a user task must end in a user regs frame, but an empty stack is an exception to that rule. Thanks to commit 71c95825289f ("x86/unwind/orc: Fix error handling in __unwind_start()"), unwind_start() now sets state->error appropriately. So now for both ORC and FP unwinders, unwind_done() and !unwind_error() always means the end of the stack was successfully reached. So the success path for kthreads is no longer needed -- it can also be used for empty user tasks. Reported-by: Wang ShaoBo Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Wang ShaoBo Link: https://lkml.kernel.org/r/f136a4e5f019219cbc4f4da33b30c2f44fa65b84.1594994374.git.jpoimboe@redhat.com --- arch/x86/kernel/stacktrace.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 6ad43fc44556..2fd698e28e4d 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -58,7 +58,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, * or a page fault), which can make frame pointers * unreliable. */ - if (IS_ENABLED(CONFIG_FRAME_POINTER)) return -EINVAL; } @@ -81,10 +80,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, if (unwind_error(&state)) return -EINVAL; - /* Success path for non-user tasks, i.e. kthreads and idle tasks */ - if (!(task->flags & (PF_KTHREAD | PF_IDLE))) - return -EINVAL; - return 0; } -- cgit v1.2.3 From d181d2da0141371bbc360eaea78719203e165e1c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 22 Jul 2020 10:39:54 +0200 Subject: x86/dumpstack: Dump user space code correctly again H.J. reported that post 5.7 a segfault of a user space task does not longer dump the Code bytes when /proc/sys/debug/exception-trace is enabled. It prints 'Code: Bad RIP value.' instead. This was broken by a recent change which made probe_kernel_read() reject non-kernel addresses. Update show_opcodes() so it retrieves user space opcodes via copy_from_user_nmi(). Fixes: 98a23609b103 ("maccess: always use strict semantics for probe_kernel_read") Reported-by: H.J. Lu Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/87h7tz306w.fsf@nanos.tec.linutronix.de --- arch/x86/kernel/dumpstack.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index b037cfa7c0c5..7401cc12c3cc 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -71,6 +71,22 @@ static void printk_stack_address(unsigned long address, int reliable, printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); } +static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src, + unsigned int nbytes) +{ + if (!user_mode(regs)) + return copy_from_kernel_nofault(buf, (u8 *)src, nbytes); + + /* + * Make sure userspace isn't trying to trick us into dumping kernel + * memory by pointing the userspace instruction pointer at it. + */ + if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX)) + return -EINVAL; + + return copy_from_user_nmi(buf, (void __user *)src, nbytes); +} + /* * There are a couple of reasons for the 2/3rd prologue, courtesy of Linus: * @@ -97,17 +113,8 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl) #define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE) u8 opcodes[OPCODE_BUFSIZE]; unsigned long prologue = regs->ip - PROLOGUE_SIZE; - bool bad_ip; - - /* - * Make sure userspace isn't trying to trick us into dumping kernel - * memory by pointing the userspace instruction pointer at it. - */ - bad_ip = user_mode(regs) && - __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX); - if (bad_ip || copy_from_kernel_nofault(opcodes, (u8 *)prologue, - OPCODE_BUFSIZE)) { + if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { printk("%sCode: Bad RIP value.\n", loglvl); } else { printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %" -- cgit v1.2.3 From 7b7891c7bdfd61fc9ed6747a0a05efe2394dddc6 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 22 Jul 2020 21:15:10 -0700 Subject: arm64: vdso32: Fix '--prefix=' value for newer versions of clang Newer versions of clang only look for $(COMPAT_GCC_TOOLCHAIN_DIR)as [1], rather than $(COMPAT_GCC_TOOLCHAIN_DIR)$(CROSS_COMPILE_COMPAT)as, resulting in the following build error: $ make -skj"$(nproc)" ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- \ CROSS_COMPILE_COMPAT=arm-linux-gnueabi- LLVM=1 O=out/aarch64 distclean \ defconfig arch/arm64/kernel/vdso32/ ... /home/nathan/cbl/toolchains/llvm-binutils/bin/as: unrecognized option '-EL' clang-12: error: assembler command failed with exit code 1 (use -v to see invocation) make[3]: *** [arch/arm64/kernel/vdso32/Makefile:181: arch/arm64/kernel/vdso32/note.o] Error 1 ... Adding the value of CROSS_COMPILE_COMPAT (adding notdir to account for a full path for CROSS_COMPILE_COMPAT) fixes this issue, which matches the solution done for the main Makefile [2]. [1]: https://github.com/llvm/llvm-project/commit/3452a0d8c17f7166f479706b293caf6ac76ffd90 [2]: https://lore.kernel.org/lkml/20200721173125.1273884-1-maskray@google.com/ Signed-off-by: Nathan Chancellor Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/1099 Link: https://lore.kernel.org/r/20200723041509.400450-1-natechancellor@gmail.com Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso32/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index d88148bef6b0..5139a5f19256 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -14,7 +14,7 @@ COMPAT_GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE_COMPAT)elfedit)) COMPAT_GCC_TOOLCHAIN := $(realpath $(COMPAT_GCC_TOOLCHAIN_DIR)/..) CC_COMPAT_CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%)) -CC_COMPAT_CLANG_FLAGS += --prefix=$(COMPAT_GCC_TOOLCHAIN_DIR) +CC_COMPAT_CLANG_FLAGS += --prefix=$(COMPAT_GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE_COMPAT)) CC_COMPAT_CLANG_FLAGS += -no-integrated-as -Qunused-arguments ifneq ($(COMPAT_GCC_TOOLCHAIN),) CC_COMPAT_CLANG_FLAGS += --gcc-toolchain=$(COMPAT_GCC_TOOLCHAIN) -- cgit v1.2.3