diff options
181 files changed, 2738 insertions, 1278 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups index 35c64e00b35c..017f5bc3920c 100644 --- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups +++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups @@ -24,3 +24,12 @@ Description: /sys/kernel/iommu_groups/reserved_regions list IOVA region is described on a single line: the 1st field is the base IOVA, the second is the end IOVA and the third field describes the type of the region. + +What: /sys/kernel/iommu_groups/reserved_regions +Date: June 2019 +KernelVersion: v5.3 +Contact: Eric Auger <eric.auger@redhat.com> +Description: In case an RMRR is used only by graphics or USB devices + it is now exposed as "direct-relaxable" instead of "direct". + In device assignment use case, for instance, those RMRR + are considered to be relaxable and safe. diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 27f02ec4bb45..f97a4ecd7b91 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -152,17 +152,19 @@ examples: - | // Example 2: Spike ISA Simulator with 1 Hart cpus { - cpu@0 { - device_type = "cpu"; - reg = <0>; - compatible = "riscv"; - riscv,isa = "rv64imafdc"; - mmu-type = "riscv,sv48"; - interrupt-controller { - #interrupt-cells = <1>; - interrupt-controller; - compatible = "riscv,cpu-intc"; - }; - }; + #address-cells = <1>; + #size-cells = <0>; + cpu@0 { + device_type = "cpu"; + reg = <0>; + compatible = "riscv"; + riscv,isa = "rv64imafdc"; + mmu-type = "riscv,sv48"; + interrupt-controller { + #interrupt-cells = <1>; + interrupt-controller; + compatible = "riscv,cpu-intc"; + }; + }; }; ... diff --git a/MAINTAINERS b/MAINTAINERS index d0ed735994a5..01a52fc964da 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3122,6 +3122,7 @@ F: arch/arm/mach-bcm/ BROADCOM BCM2835 ARM ARCHITECTURE M: Eric Anholt <eric@anholt.net> M: Stefan Wahren <wahrenst@gmx.net> +L: bcm-kernel-feedback-list@broadcom.com L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) T: git git://github.com/anholt/linux @@ -3151,6 +3152,7 @@ F: arch/arm/boot/dts/bcm953012* BROADCOM BCM53573 ARM ARCHITECTURE M: Rafał Miłecki <rafal@milecki.pl> +L: bcm-kernel-feedback-list@broadcom.com L: linux-arm-kernel@lists.infradead.org S: Maintained F: arch/arm/boot/dts/bcm53573* @@ -3940,6 +3942,14 @@ M: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com> S: Maintained F: .clang-format +CLANG/LLVM BUILD SUPPORT +L: clang-built-linux@googlegroups.com +W: https://clangbuiltlinux.github.io/ +B: https://github.com/ClangBuiltLinux/linux/issues +C: irc://chat.freenode.net/clangbuiltlinux +S: Supported +K: \b(?i:clang|llvm)\b + CLEANCACHE API M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> L: linux-kernel@vger.kernel.org @@ -2,8 +2,8 @@ VERSION = 5 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc6 -NAME = Golden Lions +EXTRAVERSION = -rc7 +NAME = Bobtail Squid # *DOCUMENTATION* # To see a list of typical targets execute "make help" diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 480af1af9e63..03a0b19c92cd 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -5,6 +5,10 @@ KBUILD_DEFCONFIG := nsim_hs_defconfig +ifeq ($(CROSS_COMPILE),) +CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-) +endif + cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38 diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 6a91a742ab3d..7dd2dd335cf6 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -32,8 +32,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu) #define ARC_PERIPHERAL_BASE 0xf0000000 #define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000) -#define CREG_PAE (CREG_BASE + 0x180) -#define CREG_PAE_UPDATE (CREG_BASE + 0x194) #define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) #define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) @@ -99,20 +97,167 @@ static void __init hsdk_enable_gpio_intc_wire(void) iowrite32(GPIO_INT_CONNECTED_MASK, (void __iomem *) GPIO_INTEN); } -static void __init hsdk_init_early(void) +enum hsdk_axi_masters { + M_HS_CORE = 0, + M_HS_RTT, + M_AXI_TUN, + M_HDMI_VIDEO, + M_HDMI_AUDIO, + M_USB_HOST, + M_ETHERNET, + M_SDIO, + M_GPU, + M_DMAC_0, + M_DMAC_1, + M_DVFS +}; + +#define UPDATE_VAL 1 + +/* + * This is modified configuration of AXI bridge. Default settings + * are specified in "Table 111 CREG Address Decoder register reset values". + * + * AXI_M_m_SLV{0|1} - Slave Select register for master 'm'. + * Possible slaves are: + * - 0 => no slave selected + * - 1 => DDR controller port #1 + * - 2 => SRAM controller + * - 3 => AXI tunnel + * - 4 => EBI controller + * - 5 => ROM controller + * - 6 => AXI2APB bridge + * - 7 => DDR controller port #2 + * - 8 => DDR controller port #3 + * - 9 => HS38x4 IOC + * - 10 => HS38x4 DMI + * AXI_M_m_OFFSET{0|1} - Addr Offset register for master 'm' + * + * Please read ARC HS Development IC Specification, section 17.2 for more + * information about apertures configuration. + * + * m master AXI_M_m_SLV0 AXI_M_m_SLV1 AXI_M_m_OFFSET0 AXI_M_m_OFFSET1 + * 0 HS (CBU) 0x11111111 0x63111111 0xFEDCBA98 0x0E543210 + * 1 HS (RTT) 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 2 AXI Tunnel 0x88888888 0x88888888 0xFEDCBA98 0x76543210 + * 3 HDMI-VIDEO 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 4 HDMI-ADUIO 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 5 USB-HOST 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98 + * 6 ETHERNET 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98 + * 7 SDIO 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98 + * 8 GPU 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 9 DMAC (port #1) 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 10 DMAC (port #2) 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 11 DVFS 0x00000000 0x60000000 0x00000000 0x00000000 + */ + +#define CREG_AXI_M_SLV0(m) ((void __iomem *)(CREG_BASE + 0x20 * (m))) +#define CREG_AXI_M_SLV1(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x04)) +#define CREG_AXI_M_OFT0(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x08)) +#define CREG_AXI_M_OFT1(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x0C)) +#define CREG_AXI_M_UPDT(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x14)) + +#define CREG_AXI_M_HS_CORE_BOOT ((void __iomem *)(CREG_BASE + 0x010)) + +#define CREG_PAE ((void __iomem *)(CREG_BASE + 0x180)) +#define CREG_PAE_UPDT ((void __iomem *)(CREG_BASE + 0x194)) + +static void __init hsdk_init_memory_bridge(void) { + u32 reg; + + /* + * M_HS_CORE has one unique register - BOOT. + * We need to clean boot mirror (BOOT[1:0]) bits in them to avoid first + * aperture to be masked by 'boot mirror'. + */ + reg = readl(CREG_AXI_M_HS_CORE_BOOT) & (~0x3); + writel(reg, CREG_AXI_M_HS_CORE_BOOT); + writel(0x11111111, CREG_AXI_M_SLV0(M_HS_CORE)); + writel(0x63111111, CREG_AXI_M_SLV1(M_HS_CORE)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_CORE)); + writel(0x0E543210, CREG_AXI_M_OFT1(M_HS_CORE)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_CORE)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_HS_RTT)); + writel(0x77777777, CREG_AXI_M_SLV1(M_HS_RTT)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_RTT)); + writel(0x76543210, CREG_AXI_M_OFT1(M_HS_RTT)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_RTT)); + + writel(0x88888888, CREG_AXI_M_SLV0(M_AXI_TUN)); + writel(0x88888888, CREG_AXI_M_SLV1(M_AXI_TUN)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_AXI_TUN)); + writel(0x76543210, CREG_AXI_M_OFT1(M_AXI_TUN)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_AXI_TUN)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_VIDEO)); + writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_VIDEO)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_VIDEO)); + writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_VIDEO)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_VIDEO)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_AUDIO)); + writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_AUDIO)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_AUDIO)); + writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_AUDIO)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_AUDIO)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_USB_HOST)); + writel(0x77999999, CREG_AXI_M_SLV1(M_USB_HOST)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_USB_HOST)); + writel(0x76DCBA98, CREG_AXI_M_OFT1(M_USB_HOST)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_USB_HOST)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_ETHERNET)); + writel(0x77999999, CREG_AXI_M_SLV1(M_ETHERNET)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_ETHERNET)); + writel(0x76DCBA98, CREG_AXI_M_OFT1(M_ETHERNET)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_ETHERNET)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_SDIO)); + writel(0x77999999, CREG_AXI_M_SLV1(M_SDIO)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_SDIO)); + writel(0x76DCBA98, CREG_AXI_M_OFT1(M_SDIO)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_SDIO)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_GPU)); + writel(0x77777777, CREG_AXI_M_SLV1(M_GPU)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_GPU)); + writel(0x76543210, CREG_AXI_M_OFT1(M_GPU)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_GPU)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_0)); + writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_0)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_0)); + writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_0)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_0)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_1)); + writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_1)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_1)); + writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_1)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_1)); + + writel(0x00000000, CREG_AXI_M_SLV0(M_DVFS)); + writel(0x60000000, CREG_AXI_M_SLV1(M_DVFS)); + writel(0x00000000, CREG_AXI_M_OFT0(M_DVFS)); + writel(0x00000000, CREG_AXI_M_OFT1(M_DVFS)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DVFS)); + /* * PAE remapping for DMA clients does not work due to an RTL bug, so * CREG_PAE register must be programmed to all zeroes, otherwise it * will cause problems with DMA to/from peripherals even if PAE40 is * not used. */ + writel(0x00000000, CREG_PAE); + writel(UPDATE_VAL, CREG_PAE_UPDT); +} - /* Default is 1, which means "PAE offset = 4GByte" */ - writel_relaxed(0, (void __iomem *) CREG_PAE); - - /* Really apply settings made above */ - writel(1, (void __iomem *) CREG_PAE_UPDATE); +static void __init hsdk_init_early(void) +{ + hsdk_init_memory_bridge(); /* * Switch SDIO external ciu clock divider from default div-by-8 to diff --git a/arch/arm/boot/dts/gemini-dlink-dir-685.dts b/arch/arm/boot/dts/gemini-dlink-dir-685.dts index cfbfbc91a1e1..3613f05f8a80 100644 --- a/arch/arm/boot/dts/gemini-dlink-dir-685.dts +++ b/arch/arm/boot/dts/gemini-dlink-dir-685.dts @@ -20,7 +20,7 @@ }; chosen { - bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait"; + bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait consoleblank=300"; stdout-path = "uart0:19200n8"; }; diff --git a/arch/arm/boot/dts/gemini-dlink-dns-313.dts b/arch/arm/boot/dts/gemini-dlink-dns-313.dts index b12504e10f0b..360642a02a48 100644 --- a/arch/arm/boot/dts/gemini-dlink-dns-313.dts +++ b/arch/arm/boot/dts/gemini-dlink-dns-313.dts @@ -11,7 +11,7 @@ / { model = "D-Link DNS-313 1-Bay Network Storage Enclosure"; - compatible = "dlink,dir-313", "cortina,gemini"; + compatible = "dlink,dns-313", "cortina,gemini"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index bbf010c73336..a7f6d1d58e20 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -358,7 +358,7 @@ pwm1: pwm@2080000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x02080000 0x4000>; - interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_PWM1>, <&clks IMX6UL_CLK_PWM1>; clock-names = "ipg", "per"; @@ -369,7 +369,7 @@ pwm2: pwm@2084000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x02084000 0x4000>; - interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_PWM2>, <&clks IMX6UL_CLK_PWM2>; clock-names = "ipg", "per"; @@ -380,7 +380,7 @@ pwm3: pwm@2088000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x02088000 0x4000>; - interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_PWM3>, <&clks IMX6UL_CLK_PWM3>; clock-names = "ipg", "per"; @@ -391,7 +391,7 @@ pwm4: pwm@208c000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x0208c000 0x4000>; - interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_PWM4>, <&clks IMX6UL_CLK_PWM4>; clock-names = "ipg", "per"; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index 7ef442462ea4..40c11b6b217a 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -248,8 +248,8 @@ <GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>, @@ -264,7 +264,6 @@ clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>; clock-names = "bus", "core"; operating-points-v2 = <&gpu_opp_table>; - switch-delay = <0xffff>; }; }; }; /* end of / */ diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index 800cd65fc50a..ec67f49116d9 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -163,23 +163,23 @@ opp-255000000 { opp-hz = /bits/ 64 <255000000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-364300000 { opp-hz = /bits/ 64 <364300000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-425000000 { opp-hz = /bits/ 64 <425000000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-510000000 { opp-hz = /bits/ 64 <510000000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-637500000 { opp-hz = /bits/ 64 <637500000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; turbo-mode; }; }; @@ -229,7 +229,6 @@ clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>; clock-names = "bus", "core"; operating-points-v2 = <&gpu_opp_table>; - switch-delay = <0xffff>; }; }; }; /* end of / */ diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index fd4a3bf27993..1b442b128569 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -430,7 +430,7 @@ static void omap3_prm_reconfigure_io_chain(void) * registers, and omap3xxx_prm_reconfigure_io_chain() must be called. * No return value. */ -static void __init omap3xxx_prm_enable_io_wakeup(void) +static void omap3xxx_prm_enable_io_wakeup(void) { if (prm_features & PRM_HAS_IO_WAKEUP) omap2_prm_set_mod_reg_bits(OMAP3430_EN_IO_MASK, WKUP_MOD, diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index b04581249f0b..bf7f845447ed 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -28,7 +28,7 @@ enable-method = "psci"; clocks = <&clockgen 1 0>; next-level-cache = <&l2>; - cpu-idle-states = <&CPU_PH20>; + cpu-idle-states = <&CPU_PW20>; }; cpu1: cpu@1 { @@ -38,7 +38,7 @@ enable-method = "psci"; clocks = <&clockgen 1 0>; next-level-cache = <&l2>; - cpu-idle-states = <&CPU_PH20>; + cpu-idle-states = <&CPU_PW20>; }; l2: l2-cache { @@ -53,13 +53,13 @@ */ entry-method = "arm,psci"; - CPU_PH20: cpu-ph20 { - compatible = "arm,idle-state"; - idle-state-name = "PH20"; - arm,psci-suspend-param = <0x00010000>; - entry-latency-us = <1000>; - exit-latency-us = <1000>; - min-residency-us = <3000>; + CPU_PW20: cpu-pw20 { + compatible = "arm,idle-state"; + idle-state-name = "PW20"; + arm,psci-suspend-param = <0x0>; + entry-latency-us = <2000>; + exit-latency-us = <2000>; + min-residency-us = <6000>; }; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 4d583514258c..6bca5b082ea4 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -613,6 +613,7 @@ CONFIG_RTC_DRV_TEGRA=y CONFIG_RTC_DRV_IMX_SC=m CONFIG_RTC_DRV_XGENE=y CONFIG_DMADEVICES=y +CONFIG_FSL_EDMA=y CONFIG_DMA_BCM2835=m CONFIG_K3_DMA=y CONFIG_MV_XOR=y diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 04a43cfd4e09..d47a3381aad8 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -39,6 +39,11 @@ static int save_fpu_state(struct sigcontext __user *sc) #endif struct rt_sigframe { + /* + * pad[3] is compatible with the same struct defined in + * gcc/libgcc/config/csky/linux-unwind.h + */ + int pad[3]; struct siginfo info; struct ucontext uc; }; diff --git a/arch/mips/include/asm/mips-gic.h b/arch/mips/include/asm/mips-gic.h index 75a1cdee1331..084cac1c5ea2 100644 --- a/arch/mips/include/asm/mips-gic.h +++ b/arch/mips/include/asm/mips-gic.h @@ -311,6 +311,36 @@ static inline bool mips_gic_present(void) } /** + * mips_gic_vx_map_reg() - Return GIC_Vx_<intr>_MAP register offset + * @intr: A GIC local interrupt + * + * Determine the index of the GIC_VL_<intr>_MAP or GIC_VO_<intr>_MAP register + * within the block of GIC map registers. This is almost the same as the order + * of interrupts in the pending & mask registers, as used by enum + * mips_gic_local_interrupt, but moves the FDC interrupt & thus offsets the + * interrupts after it... + * + * Return: The map register index corresponding to @intr. + * + * The return value is suitable for use with the (read|write)_gic_v[lo]_map + * accessor functions. + */ +static inline unsigned int +mips_gic_vx_map_reg(enum mips_gic_local_interrupt intr) +{ + /* WD, Compare & Timer are 1:1 */ + if (intr <= GIC_LOCAL_INT_TIMER) + return intr; + + /* FDC moves to after Timer... */ + if (intr == GIC_LOCAL_INT_FDC) + return GIC_LOCAL_INT_TIMER + 1; + + /* As a result everything else is offset by 1 */ + return intr + 1; +} + +/** * gic_get_c0_compare_int() - Return cp0 count/compare interrupt virq * * Determine the virq number to use for the coprocessor 0 count/compare diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index f241ded9239b..1f0f29a289d3 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -786,6 +786,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, /* 32-bit PC relative address */ *loc = val - dot - 8 + addend; break; + case R_PARISC_PCREL64: + /* 64-bit PC relative address */ + *loc64 = val - dot - 8 + addend; + break; case R_PARISC_DIR64: /* 64-bit effective address */ *loc64 = val + addend; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6b86055e5251..73ba246ca11d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -315,7 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early) mfspr r11,SPRN_DSISR /* Save DSISR */ std r11,_DSISR(r1) std r9,_CCR(r1) /* Save CR in stackframe */ - kuap_save_amr_and_lock r9, r10, cr1 + /* We don't touch AMR here, we never go to virtual mode */ /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index bb70391401f7..794404d50a85 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -50,20 +50,52 @@ EXPORT_SYMBOL_GPL(hash__alloc_context_id); void slb_setup_new_exec(void); +static int realloc_context_ids(mm_context_t *ctx) +{ + int i, id; + + /* + * id 0 (aka. ctx->id) is special, we always allocate a new one, even if + * there wasn't one allocated previously (which happens in the exec + * case where ctx is newly allocated). + * + * We have to be a bit careful here. We must keep the existing ids in + * the array, so that we can test if they're non-zero to decide if we + * need to allocate a new one. However in case of error we must free the + * ids we've allocated but *not* any of the existing ones (or risk a + * UAF). That's why we decrement i at the start of the error handling + * loop, to skip the id that we just tested but couldn't reallocate. + */ + for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) { + if (i == 0 || ctx->extended_id[i]) { + id = hash__alloc_context_id(); + if (id < 0) + goto error; + + ctx->extended_id[i] = id; + } + } + + /* The caller expects us to return id */ + return ctx->id; + +error: + for (i--; i >= 0; i--) { + if (ctx->extended_id[i]) + ida_free(&mmu_context_ida, ctx->extended_id[i]); + } + + return id; +} + static int hash__init_new_context(struct mm_struct *mm) { int index; - index = hash__alloc_context_id(); - if (index < 0) - return index; - mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), GFP_KERNEL); - if (!mm->context.hash_context) { - ida_free(&mmu_context_ida, index); + if (!mm->context.hash_context) return -ENOMEM; - } /* * The old code would re-promote on fork, we don't do that when using @@ -91,13 +123,20 @@ static int hash__init_new_context(struct mm_struct *mm) mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); if (!mm->context.hash_context->spt) { - ida_free(&mmu_context_ida, index); kfree(mm->context.hash_context); return -ENOMEM; } } #endif + } + index = realloc_context_ids(&mm->context); + if (index < 0) { +#ifdef CONFIG_PPC_SUBPAGE_PROT + kfree(mm->context.hash_context->spt); +#endif + kfree(mm->context.hash_context); + return index; } pkey_mm_init(mm); diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 3c06ee4b2b29..40983491b95f 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -163,6 +163,7 @@ interrupt-parent = <&plic0>; interrupts = <4>; clocks = <&prci PRCI_CLK_TLCLK>; + status = "disabled"; }; uart1: serial@10011000 { compatible = "sifive,fu540-c000-uart", "sifive,uart0"; @@ -170,6 +171,7 @@ interrupt-parent = <&plic0>; interrupts = <5>; clocks = <&prci PRCI_CLK_TLCLK>; + status = "disabled"; }; i2c0: i2c@10030000 { compatible = "sifive,fu540-c000-i2c", "sifive,i2c0"; @@ -181,6 +183,7 @@ reg-io-width = <1>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; qspi0: spi@10040000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; @@ -191,6 +194,7 @@ clocks = <&prci PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; qspi1: spi@10041000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; @@ -201,6 +205,7 @@ clocks = <&prci PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; qspi2: spi@10050000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; @@ -210,6 +215,7 @@ clocks = <&prci PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; }; }; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 4da88707e28f..0b55c53c08c7 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -42,7 +42,20 @@ }; }; +&uart0 { + status = "okay"; +}; + +&uart1 { + status = "okay"; +}; + +&i2c0 { + status = "okay"; +}; + &qspi0 { + status = "okay"; flash@0 { compatible = "issi,is25wp256", "jedec,spi-nor"; reg = <0>; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 4f02967e55de..04944fb4fa7a 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -69,6 +69,7 @@ CONFIG_VIRTIO_MMIO=y CONFIG_CLK_SIFIVE=y CONFIG_CLK_SIFIVE_FU540_PRCI=y CONFIG_SIFIVE_PLIC=y +CONFIG_SPI_SIFIVE=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y @@ -84,4 +85,8 @@ CONFIG_ROOT_NFS=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y +CONFIG_SPI=y +CONFIG_MMC_SPI=y +CONFIG_MMC=y +CONFIG_DEVTMPFS_MOUNT=y # CONFIG_RCU_TRACE is not set diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 3e2708c626a8..f960c3f4ce47 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -272,9 +272,6 @@ vmalloc_fault: * entries, but in RISC-V, SFENCE.VMA specifies an * ordering constraint, not a cache flush; it is * necessary even after writing invalid entries. - * Relying on flush_tlb_fix_spurious_fault would - * suffice, but the extra traps reduce - * performance. So, eagerly SFENCE.VMA. */ local_flush_tlb_page(addr); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index f315425d8468..3cd94a21bd53 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -561,14 +561,14 @@ int x86_pmu_hw_config(struct perf_event *event) } /* sample_regs_user never support XMM registers */ - if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS)) + if (unlikely(event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK)) return -EINVAL; /* * Besides the general purpose registers, XMM registers may * be collected in PEBS on some platforms, e.g. Icelake */ - if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) { - if (x86_pmu.pebs_no_xmm_regs) + if (unlikely(event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK)) { + if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS)) return -EINVAL; if (!event->attr.precise_ip) @@ -2402,13 +2402,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re return; } - if (perf_hw_regs(regs)) { - if (perf_callchain_store(entry, regs->ip)) - return; + if (perf_callchain_store(entry, regs->ip)) + return; + + if (perf_hw_regs(regs)) unwind_start(&state, current, regs, NULL); - } else { + else unwind_start(&state, current, NULL, (void *)regs->sp); - } for (; !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7acc526b4ad2..505c73dc6a73 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -987,7 +987,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) pebs_data_cfg |= PEBS_DATACFG_GP; if ((sample_type & PERF_SAMPLE_REGS_INTR) && - (attr->sample_regs_intr & PEBS_XMM_REGS)) + (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK)) pebs_data_cfg |= PEBS_DATACFG_XMMS; if (sample_type & PERF_SAMPLE_BRANCH_STACK) { @@ -1964,10 +1964,9 @@ void __init intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; - if (x86_pmu.version <= 4) { + if (x86_pmu.version <= 4) x86_pmu.pebs_no_isolation = 1; - x86_pmu.pebs_no_xmm_regs = 1; - } + if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; char *pebs_qual = ""; @@ -2020,9 +2019,9 @@ void __init intel_ds_init(void) PERF_SAMPLE_TIME; x86_pmu.flags |= PMU_FL_PEBS_ALL; pebs_qual = "-baseline"; + x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { /* Only basic record supported */ - x86_pmu.pebs_no_xmm_regs = 1; x86_pmu.large_pebs_flags &= ~(PERF_SAMPLE_ADDR | PERF_SAMPLE_TIME | diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index a6ac2f4f76fc..4e346856ee19 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -121,24 +121,6 @@ struct amd_nb { (1ULL << PERF_REG_X86_R14) | \ (1ULL << PERF_REG_X86_R15)) -#define PEBS_XMM_REGS \ - ((1ULL << PERF_REG_X86_XMM0) | \ - (1ULL << PERF_REG_X86_XMM1) | \ - (1ULL << PERF_REG_X86_XMM2) | \ - (1ULL << PERF_REG_X86_XMM3) | \ - (1ULL << PERF_REG_X86_XMM4) | \ - (1ULL << PERF_REG_X86_XMM5) | \ - (1ULL << PERF_REG_X86_XMM6) | \ - (1ULL << PERF_REG_X86_XMM7) | \ - (1ULL << PERF_REG_X86_XMM8) | \ - (1ULL << PERF_REG_X86_XMM9) | \ - (1ULL << PERF_REG_X86_XMM10) | \ - (1ULL << PERF_REG_X86_XMM11) | \ - (1ULL << PERF_REG_X86_XMM12) | \ - (1ULL << PERF_REG_X86_XMM13) | \ - (1ULL << PERF_REG_X86_XMM14) | \ - (1ULL << PERF_REG_X86_XMM15)) - /* * Per register state. */ @@ -668,8 +650,7 @@ struct x86_pmu { pebs_broken :1, pebs_prec_dist :1, pebs_no_tlb :1, - pebs_no_isolation :1, - pebs_no_xmm_regs :1; + pebs_no_isolation :1; int pebs_record_size; int pebs_buffer_size; int max_pebs_events; diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h index ac67bbea10ca..7c9d2bb3833b 100644 --- a/arch/x86/include/uapi/asm/perf_regs.h +++ b/arch/x86/include/uapi/asm/perf_regs.h @@ -52,4 +52,7 @@ enum perf_event_x86_regs { /* These include both GPRs and XMMX registers */ PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2, }; + +#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1)) + #endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 177aa8ef2afa..85be316665b4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1464,7 +1464,8 @@ static void apic_pending_intr_clear(void) if (queued) { if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { ntsc = rdtsc(); - max_loops = (cpu_khz << 10) - (ntsc - tsc); + max_loops = (long long)cpu_khz << 10; + max_loops -= ntsc - tsc; } else { max_loops--; } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 03b4cc0ec3a7..66ca906aa790 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -836,6 +836,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) } /* + * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper + * bit in the mask to allow guests to use the mitigation even in the + * case where the host does not enable it. + */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; + } + + /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass @@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); } } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index a813987b5552..cb0fdcaf1415 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -789,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = { .resume = mc_bp_resume, }; -static int mc_cpu_online(unsigned int cpu) +static int mc_cpu_starting(unsigned int cpu) { - struct device *dev; - - dev = get_cpu_device(cpu); microcode_update_cpu(cpu); pr_debug("CPU%d added\n", cpu); + return 0; +} + +static int mc_cpu_online(unsigned int cpu) +{ + struct device *dev = get_cpu_device(cpu); if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); @@ -872,7 +875,9 @@ int __init microcode_init(void) goto out_ucode_group; register_syscore_ops(&mc_syscore_ops); - cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online", + cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting", + mc_cpu_starting, NULL); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 2131b8bbaad7..2f4824793798 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -796,8 +796,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct rdt_resource *r = of->kn->parent->priv; - u32 sw_shareable = 0, hw_shareable = 0; - u32 exclusive = 0, pseudo_locked = 0; + /* + * Use unsigned long even though only 32 bits are used to ensure + * test_bit() is used safely. + */ + unsigned long sw_shareable = 0, hw_shareable = 0; + unsigned long exclusive = 0, pseudo_locked = 0; struct rdt_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; @@ -842,10 +846,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, } for (i = r->cache.cbm_len - 1; i >= 0; i--) { pseudo_locked = dom->plr ? dom->plr->cbm : 0; - hwb = test_bit(i, (unsigned long *)&hw_shareable); - swb = test_bit(i, (unsigned long *)&sw_shareable); - excl = test_bit(i, (unsigned long *)&exclusive); - psl = test_bit(i, (unsigned long *)&pseudo_locked); + hwb = test_bit(i, &hw_shareable); + swb = test_bit(i, &sw_shareable); + excl = test_bit(i, &exclusive); + psl = test_bit(i, &pseudo_locked); if (hwb && swb) seq_putc(seq, 'X'); else if (hwb && !swb) @@ -2486,26 +2490,19 @@ out_destroy: */ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r) { - /* - * Convert the u32 _val to an unsigned long required by all the bit - * operations within this function. No more than 32 bits of this - * converted value can be accessed because all bit operations are - * additionally provided with cbm_len that is initialized during - * hardware enumeration using five bits from the EAX register and - * thus never can exceed 32 bits. - */ - unsigned long *val = (unsigned long *)_val; + unsigned long val = *_val; unsigned int cbm_len = r->cache.cbm_len; unsigned long first_bit, zero_bit; - if (*val == 0) + if (val == 0) return; - first_bit = find_first_bit(val, cbm_len); - zero_bit = find_next_zero_bit(val, cbm_len, first_bit); + first_bit = find_first_bit(&val, cbm_len); + zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); /* Clear any remaining bits to ensure contiguous region */ - bitmap_clear(val, zero_bit, cbm_len - zero_bit); + bitmap_clear(&val, zero_bit, cbm_len - zero_bit); + *_val = (u32)val; } /* diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 16b1cbd3a61e..29ffa495bd1c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -184,24 +184,25 @@ unsigned long __head __startup_64(unsigned long physaddr, pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); if (la57) { - p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); + p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], + physaddr); i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; - i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D; - p4d[i + 0] = (pgdval_t)pud + pgtable_flags; - p4d[i + 1] = (pgdval_t)pud + pgtable_flags; + i = physaddr >> P4D_SHIFT; + p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; + p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; } else { i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; pgd[i + 0] = (pgdval_t)pud + pgtable_flags; pgd[i + 1] = (pgdval_t)pud + pgtable_flags; } - i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD; - pud[i + 0] = (pudval_t)pmd + pgtable_flags; - pud[i + 1] = (pudval_t)pmd + pgtable_flags; + i = physaddr >> PUD_SHIFT; + pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; + pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; /* Filter out unsupported __PAGE_KERNEL_* bits: */ @@ -211,8 +212,9 @@ unsigned long __head __startup_64(unsigned long physaddr, pmd_entry += physaddr; for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) { - int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD; - pmd[idx] = pmd_entry + i * PMD_SIZE; + int idx = i + (physaddr >> PMD_SHIFT); + + pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE; } /* diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index 07c30ee17425..bb7e1132290b 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -74,6 +74,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) return regs_get_register(regs, pt_regs_offset[idx]); } +#define PERF_REG_X86_RESERVED (((1ULL << PERF_REG_X86_XMM0) - 1) & \ + ~((1ULL << PERF_REG_X86_MAX) - 1)) + #ifdef CONFIG_X86_32 #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \ (1ULL << PERF_REG_X86_R9) | \ @@ -86,7 +89,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) int perf_reg_validate(u64 mask) { - if (!mask || (mask & REG_NOSUPPORT)) + if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))) return -EINVAL; return 0; @@ -112,7 +115,7 @@ void perf_get_regs_user(struct perf_regs *regs_user, int perf_reg_validate(u64 mask) { - if (!mask || (mask & REG_NOSUPPORT)) + if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))) return -EINVAL; return 0; diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 33b66b5c5aec..72b997eaa1fc 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -82,9 +82,9 @@ static struct orc_entry *orc_find(unsigned long ip); * But they are copies of the ftrace entries that are static and * defined in ftrace_*.S, which do have orc entries. * - * If the undwinder comes across a ftrace trampoline, then find the + * If the unwinder comes across a ftrace trampoline, then find the * ftrace function that was used to create it, and use that ftrace - * function's orc entrie, as the placement of the return code in + * function's orc entry, as the placement of the return code in * the stack will be identical. */ static struct orc_entry *orc_ftrace_find(unsigned long ip) @@ -128,6 +128,16 @@ static struct orc_entry null_orc_entry = { .type = ORC_TYPE_CALL }; +/* Fake frame pointer entry -- used as a fallback for generated code */ +static struct orc_entry orc_fp_entry = { + .type = ORC_TYPE_CALL, + .sp_reg = ORC_REG_BP, + .sp_offset = 16, + .bp_reg = ORC_REG_PREV_SP, + .bp_offset = -16, + .end = 0, +}; + static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; @@ -392,8 +402,16 @@ bool unwind_next_frame(struct unwind_state *state) * calls and calls to noreturn functions. */ orc = orc_find(state->signal ? state->ip : state->ip - 1); - if (!orc) - goto err; + if (!orc) { + /* + * As a fallback, try to assume this code uses a frame pointer. + * This is useful for generated code, like BPF, which ORC + * doesn't know about. This is just a guess, so the rest of + * the unwind is no longer considered reliable. + */ + orc = &orc_fp_entry; + state->error = true; + } /* End-of-stack check for kernel threads: */ if (orc->sp_reg == ORC_REG_UNDEFINED) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 693aaf28d5fe..0f01c7b1d217 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -671,23 +671,25 @@ static unsigned long __meminit phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, unsigned long page_size_mask, bool init) { - unsigned long paddr_next, paddr_last = paddr_end; - unsigned long vaddr = (unsigned long)__va(paddr); - int i = p4d_index(vaddr); + unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; + + paddr_last = paddr_end; + vaddr = (unsigned long)__va(paddr); + vaddr_end = (unsigned long)__va(paddr_end); if (!pgtable_l5_enabled()) return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask, init); - for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { - p4d_t *p4d; + for (; vaddr < vaddr_end; vaddr = vaddr_next) { + p4d_t *p4d = p4d_page + p4d_index(vaddr); pud_t *pud; - vaddr = (unsigned long)__va(paddr); - p4d = p4d_page + p4d_index(vaddr); - paddr_next = (paddr & P4D_MASK) + P4D_SIZE; + vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE; + paddr = __pa(vaddr); if (paddr >= paddr_end) { + paddr_next = __pa(vaddr_next); if (!after_bootmem && !e820__mapped_any(paddr & P4D_MASK, paddr_next, E820_TYPE_RAM) && @@ -699,13 +701,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!p4d_none(*p4d)) { pud = pud_offset(p4d, 0); - paddr_last = phys_pud_init(pud, paddr, paddr_end, - page_size_mask, init); + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), + page_size_mask, init); continue; } pud = alloc_low_page(); - paddr_last = phys_pud_init(pud, paddr, paddr_end, + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), page_size_mask, init); spin_lock(&init_mm.page_table_lock); diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 632b83885867..3b9fd679cea9 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -728,7 +728,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr) * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so * page faulting on these addresses isn't expected. */ - if (phys_addr >= 0x0000 && phys_addr <= 0x0fff) + if (phys_addr <= 0x0fff) return; /* diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index f8d430f88d25..f9269ae6da9c 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -240,7 +240,7 @@ static struct kmem_cache *bfq_pool; * containing only random (seeky) I/O are prevented from being tagged * as soft real-time. */ -#define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history & -1) +#define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history == -1) /* Min number of samples required to perform peak-rate update */ #define BFQ_RATE_MIN_SAMPLES 32 diff --git a/drivers/auxdisplay/cfag12864bfb.c b/drivers/auxdisplay/cfag12864bfb.c index 40c8a552a478..4074886b7bc8 100644 --- a/drivers/auxdisplay/cfag12864bfb.c +++ b/drivers/auxdisplay/cfag12864bfb.c @@ -52,8 +52,9 @@ static const struct fb_var_screeninfo cfag12864bfb_var = { static int cfag12864bfb_mmap(struct fb_info *info, struct vm_area_struct *vma) { - return vm_insert_page(vma, vma->vm_start, - virt_to_page(cfag12864b_buffer)); + struct page *pages = virt_to_page(cfag12864b_buffer); + + return vm_map_pages_zero(vma, &pages, 1); } static struct fb_ops cfag12864bfb_ops = { diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c index 21393ec3b9a4..9c0bb771751d 100644 --- a/drivers/auxdisplay/ht16k33.c +++ b/drivers/auxdisplay/ht16k33.c @@ -223,9 +223,9 @@ static const struct backlight_ops ht16k33_bl_ops = { static int ht16k33_mmap(struct fb_info *info, struct vm_area_struct *vma) { struct ht16k33_priv *priv = info->par; + struct page *pages = virt_to_page(priv->fbdev.buffer); - return vm_insert_page(vma, vma->vm_start, - virt_to_page(priv->fbdev.buffer)); + return vm_map_pages_zero(vma, &pages, 1); } static struct fb_ops ht16k33_fb_ops = { diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index aa51756fd4d6..87b410d6e51d 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -368,7 +368,7 @@ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index) const char *dev_id = dev ? dev_name(dev) : NULL; struct device_node *np = core->of_node; - if (np && index >= 0) + if (np && (name || index >= 0)) hw = of_clk_get_hw(np, index, name); /* diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c index 739f64fdf1e3..206fafd299ea 100644 --- a/drivers/clk/meson/g12a.c +++ b/drivers/clk/meson/g12a.c @@ -2734,8 +2734,8 @@ static struct clk_hw_onecell_data g12a_hw_onecell_data = { [CLKID_MALI_1_DIV] = &g12a_mali_1_div.hw, [CLKID_MALI_1] = &g12a_mali_1.hw, [CLKID_MALI] = &g12a_mali.hw, - [CLKID_MPLL_5OM_DIV] = &g12a_mpll_50m_div.hw, - [CLKID_MPLL_5OM] = &g12a_mpll_50m.hw, + [CLKID_MPLL_50M_DIV] = &g12a_mpll_50m_div.hw, + [CLKID_MPLL_50M] = &g12a_mpll_50m.hw, [CLKID_SYS_PLL_DIV16_EN] = &g12a_sys_pll_div16_en.hw, [CLKID_SYS_PLL_DIV16] = &g12a_sys_pll_div16.hw, [CLKID_CPU_CLK_DYN0_SEL] = &g12a_cpu_clk_premux0.hw, diff --git a/drivers/clk/meson/g12a.h b/drivers/clk/meson/g12a.h index 39c41af70804..bcc05cd9882f 100644 --- a/drivers/clk/meson/g12a.h +++ b/drivers/clk/meson/g12a.h @@ -166,7 +166,7 @@ #define CLKID_HDMI_DIV 167 #define CLKID_MALI_0_DIV 170 #define CLKID_MALI_1_DIV 173 -#define CLKID_MPLL_5OM_DIV 176 +#define CLKID_MPLL_50M_DIV 176 #define CLKID_SYS_PLL_DIV16_EN 178 #define CLKID_SYS_PLL_DIV16 179 #define CLKID_CPU_CLK_DYN0_SEL 180 diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c index 37cf0f01bb5d..62cd3a7f1f65 100644 --- a/drivers/clk/meson/meson8b.c +++ b/drivers/clk/meson/meson8b.c @@ -1761,7 +1761,7 @@ static struct clk_regmap meson8m2_gp_pll = { }, }; -static const char * const mmeson8b_vpu_0_1_parent_names[] = { +static const char * const meson8b_vpu_0_1_parent_names[] = { "fclk_div4", "fclk_div3", "fclk_div5", "fclk_div7" }; @@ -1778,8 +1778,8 @@ static struct clk_regmap meson8b_vpu_0_sel = { .hw.init = &(struct clk_init_data){ .name = "vpu_0_sel", .ops = &clk_regmap_mux_ops, - .parent_names = mmeson8b_vpu_0_1_parent_names, - .num_parents = ARRAY_SIZE(mmeson8b_vpu_0_1_parent_names), + .parent_names = meson8b_vpu_0_1_parent_names, + .num_parents = ARRAY_SIZE(meson8b_vpu_0_1_parent_names), .flags = CLK_SET_RATE_PARENT, }, }; @@ -1837,8 +1837,8 @@ static struct clk_regmap meson8b_vpu_1_sel = { .hw.init = &(struct clk_init_data){ .name = "vpu_1_sel", .ops = &clk_regmap_mux_ops, - .parent_names = mmeson8b_vpu_0_1_parent_names, - .num_parents = ARRAY_SIZE(mmeson8b_vpu_0_1_parent_names), + .parent_names = meson8b_vpu_0_1_parent_names, + .num_parents = ARRAY_SIZE(meson8b_vpu_0_1_parent_names), .flags = CLK_SET_RATE_PARENT, }, }; diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c index 8281dfbf38c2..5bed36e12951 100644 --- a/drivers/clk/socfpga/clk-s10.c +++ b/drivers/clk/socfpga/clk-s10.c @@ -103,9 +103,9 @@ static const struct stratix10_perip_cnt_clock s10_main_perip_cnt_clks[] = { { STRATIX10_NOC_CLK, "noc_clk", NULL, noc_mux, ARRAY_SIZE(noc_mux), 0, 0, 0, 0x3C, 1}, { STRATIX10_EMAC_A_FREE_CLK, "emaca_free_clk", NULL, emaca_free_mux, ARRAY_SIZE(emaca_free_mux), - 0, 0, 4, 0xB0, 0}, + 0, 0, 2, 0xB0, 0}, { STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, ARRAY_SIZE(emacb_free_mux), - 0, 0, 4, 0xB0, 1}, + 0, 0, 2, 0xB0, 1}, { STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, emac_ptp_free_mux, ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2}, { STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, gpio_db_free_mux, diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c index e1ba62d2b1a0..ac1d27a8c650 100644 --- a/drivers/clk/tegra/clk-tegra210.c +++ b/drivers/clk/tegra/clk-tegra210.c @@ -3366,6 +3366,8 @@ static struct tegra_clk_init_table init_table[] __initdata = { { TEGRA210_CLK_I2S3_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 }, { TEGRA210_CLK_I2S4_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 }, { TEGRA210_CLK_VIMCLK_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 }, + { TEGRA210_CLK_HDA, TEGRA210_CLK_PLL_P, 51000000, 0 }, + { TEGRA210_CLK_HDA2CODEC_2X, TEGRA210_CLK_PLL_P, 48000000, 0 }, /* This MUST be the last entry. */ { TEGRA210_CLK_CLK_MAX, TEGRA210_CLK_CLK_MAX, 0, 0 }, }; diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c index 8e834317c97d..975995eea15c 100644 --- a/drivers/clk/ti/clkctrl.c +++ b/drivers/clk/ti/clkctrl.c @@ -229,6 +229,7 @@ static struct clk_hw *_ti_omap4_clkctrl_xlate(struct of_phandle_args *clkspec, { struct omap_clkctrl_provider *provider = data; struct omap_clkctrl_clk *entry; + bool found = false; if (clkspec->args_count != 2) return ERR_PTR(-EINVAL); @@ -238,11 +239,13 @@ static struct clk_hw *_ti_omap4_clkctrl_xlate(struct of_phandle_args *clkspec, list_for_each_entry(entry, &provider->clocks, node) { if (entry->reg_offset == clkspec->args[0] && - entry->bit_offset == clkspec->args[1]) + entry->bit_offset == clkspec->args[1]) { + found = true; break; + } } - if (!entry) + if (!found) return ERR_PTR(-EINVAL); return entry->clk; diff --git a/drivers/firmware/efi/efi-bgrt.c b/drivers/firmware/efi/efi-bgrt.c index a2384184a7de..b07c17643210 100644 --- a/drivers/firmware/efi/efi-bgrt.c +++ b/drivers/firmware/efi/efi-bgrt.c @@ -47,11 +47,6 @@ void __init efi_bgrt_init(struct acpi_table_header *table) bgrt->version); goto out; } - if (bgrt->status & 0xfe) { - pr_notice("Ignoring BGRT: reserved status bits are non-zero %u\n", - bgrt->status); - goto out; - } if (bgrt->image_type != 0) { pr_notice("Ignoring BGRT: invalid image type %u (expected 0)\n", bgrt->image_type); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 16b2137d117c..4b7cf7bc0ded 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -1009,14 +1009,16 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) /* first try to find a slot in an existing linked list entry */ for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { - rsv = __va(prsv); + rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size); if (index < rsv->size) { rsv->entry[index].base = addr; rsv->entry[index].size = size; + memunmap(rsv); return 0; } + memunmap(rsv); } /* no slot found - allocate a new linked list entry */ @@ -1024,7 +1026,13 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) if (!rsv) return -ENOMEM; - rsv->size = EFI_MEMRESERVE_COUNT(PAGE_SIZE); + /* + * The memremap() call above assumes that a linux_efi_memreserve entry + * never crosses a page boundary, so let's ensure that this remains true + * even when kexec'ing a 4k pages kernel from a >4k pages kernel, by + * using SZ_4K explicitly in the size calculation below. + */ + rsv->size = EFI_MEMRESERVE_COUNT(SZ_4K); atomic_set(&rsv->count, 1); rsv->entry[0].base = addr; rsv->entry[0].size = size; diff --git a/drivers/firmware/efi/efibc.c b/drivers/firmware/efi/efibc.c index 61e099826cbb..35dccc88ac0a 100644 --- a/drivers/firmware/efi/efibc.c +++ b/drivers/firmware/efi/efibc.c @@ -43,11 +43,13 @@ static int efibc_set_variable(const char *name, const char *value) efibc_str_to_str16(value, (efi_char16_t *)entry->var.Data); memcpy(&entry->var.VendorGuid, &guid, sizeof(guid)); - ret = efivar_entry_set(entry, - EFI_VARIABLE_NON_VOLATILE - | EFI_VARIABLE_BOOTSERVICE_ACCESS - | EFI_VARIABLE_RUNTIME_ACCESS, - size, entry->var.Data, NULL); + ret = efivar_entry_set_safe(entry->var.VariableName, + entry->var.VendorGuid, + EFI_VARIABLE_NON_VOLATILE + | EFI_VARIABLE_BOOTSERVICE_ACCESS + | EFI_VARIABLE_RUNTIME_ACCESS, + false, size, entry->var.Data); + if (ret) pr_err("failed to set %s EFI variable: 0x%x\n", name, ret); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index eac0c54c5970..b032d3899fa3 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -80,6 +80,7 @@ #define HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP 0x1220 #define HID_DEVICE_ID_ALPS_U1 0x1215 #define HID_DEVICE_ID_ALPS_T4_BTNLESS 0x120C +#define HID_DEVICE_ID_ALPS_1222 0x1222 #define USB_VENDOR_ID_AMI 0x046b @@ -269,6 +270,7 @@ #define USB_DEVICE_ID_CHICONY_MULTI_TOUCH 0xb19d #define USB_DEVICE_ID_CHICONY_WIRELESS 0x0618 #define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE 0x1053 +#define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE2 0x0939 #define USB_DEVICE_ID_CHICONY_WIRELESS2 0x1123 #define USB_DEVICE_ID_ASUS_AK1D 0x1125 #define USB_DEVICE_ID_CHICONY_TOSHIBA_WT10A 0x1408 @@ -569,6 +571,7 @@ #define USB_VENDOR_ID_HUION 0x256c #define USB_DEVICE_ID_HUION_TABLET 0x006e +#define USB_DEVICE_ID_HUION_HS64 0x006d #define USB_VENDOR_ID_IBM 0x04b3 #define USB_DEVICE_ID_IBM_SCROLLPOINT_III 0x3100 diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index e564bff86515..bfcf2ee58d14 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -30,6 +30,7 @@ #define REPORT_ID_HIDPP_SHORT 0x10 #define REPORT_ID_HIDPP_LONG 0x11 +#define REPORT_ID_HIDPP_VERY_LONG 0x12 #define HIDPP_REPORT_SHORT_LENGTH 7 #define HIDPP_REPORT_LONG_LENGTH 20 @@ -1242,7 +1243,8 @@ static int logi_dj_ll_raw_request(struct hid_device *hid, int ret; if ((buf[0] == REPORT_ID_HIDPP_SHORT) || - (buf[0] == REPORT_ID_HIDPP_LONG)) { + (buf[0] == REPORT_ID_HIDPP_LONG) || + (buf[0] == REPORT_ID_HIDPP_VERY_LONG)) { if (count < 2) return -EINVAL; diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 5df5dd56ecc8..b603c14d043b 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1776,6 +1776,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP) }, + { .driver_data = MT_CLS_WIN_8_DUAL, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_ALPS_JP, + HID_DEVICE_ID_ALPS_1222) }, /* Lenovo X1 TAB Gen 2 */ { .driver_data = MT_CLS_WIN_8_DUAL, diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index e5ca6fe2ca57..671a285724f9 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -42,6 +42,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE2), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_CHIC, USB_DEVICE_ID_CHIC_GAMEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK), HID_QUIRK_NOGET }, diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index 8fe02d81265d..914fb527ae7a 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -369,6 +369,8 @@ static const struct hid_device_id uclogic_devices[] = { USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) }, { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, + { HID_USB_DEVICE(USB_VENDOR_ID_HUION, + USB_DEVICE_ID_HUION_HS64) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index 0187c9f8fc22..273d784fff66 100644 --- a/drivers/hid/hid-uclogic-params.c +++ b/drivers/hid/hid-uclogic-params.c @@ -977,6 +977,8 @@ int uclogic_params_init(struct uclogic_params *params, /* FALL THROUGH */ case VID_PID(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET): + case VID_PID(USB_VENDOR_ID_HUION, + USB_DEVICE_ID_HUION_HS64): case VID_PID(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET): case VID_PID(USB_VENDOR_ID_UCLOGIC, diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c index 22ba21457035..aa2dbed30fc3 100644 --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c @@ -816,9 +816,9 @@ static int load_fw_from_host(struct ishtp_cl_data *client_data) goto end_err_fw_release; release_firmware(fw); - kfree(filename); dev_info(cl_data_to_dev(client_data), "ISH firmware %s loaded\n", filename); + kfree(filename); return 0; end_err_fw_release: diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index c0487b34d2cf..6ba944b40fdb 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -891,7 +891,7 @@ static int hid_ishtp_cl_reset(struct ishtp_cl_device *cl_device) */ static int hid_ishtp_cl_suspend(struct device *device) { - struct ishtp_cl_device *cl_device = dev_get_drvdata(device); + struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device); struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device); struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl); @@ -912,7 +912,7 @@ static int hid_ishtp_cl_suspend(struct device *device) */ static int hid_ishtp_cl_resume(struct device *device) { - struct ishtp_cl_device *cl_device = dev_get_drvdata(device); + struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device); struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device); struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl); diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c index 794e700d65f7..c47c3328a0f4 100644 --- a/drivers/hid/intel-ish-hid/ishtp/bus.c +++ b/drivers/hid/intel-ish-hid/ishtp/bus.c @@ -471,7 +471,6 @@ static struct ishtp_cl_device *ishtp_bus_add_device(struct ishtp_device *dev, } ishtp_device_ready = true; - dev_set_drvdata(&device->dev, device); return device; } @@ -640,6 +639,20 @@ void *ishtp_get_drvdata(struct ishtp_cl_device *cl_device) EXPORT_SYMBOL(ishtp_get_drvdata); /** + * ishtp_dev_to_cl_device() - get ishtp_cl_device instance from device instance + * @device: device instance + * + * Get ish_cl_device instance which embeds device instance in it. + * + * Return: pointer to ishtp_cl_device instance + */ +struct ishtp_cl_device *ishtp_dev_to_cl_device(struct device *device) +{ + return to_ishtp_cl_device(device); +} +EXPORT_SYMBOL(ishtp_dev_to_cl_device); + +/** * ishtp_bus_new_client() - Create a new client * @dev: ISHTP device instance * diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index dce1d8d2e8a4..73740b969e62 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -619,9 +619,9 @@ retry: pasid = ((event[0] >> 16) & 0xFFFF) | ((event[1] << 6) & 0xF0000); tag = event[1] & 0x03FF; - dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", + dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), - pasid, address, flags); + pasid, address, flags, tag); break; default: dev_err(dev, "Event logged [UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n", @@ -1295,6 +1295,16 @@ static void domain_flush_complete(struct protection_domain *domain) } } +/* Flush the not present cache if it exists */ +static void domain_flush_np_cache(struct protection_domain *domain, + dma_addr_t iova, size_t size) +{ + if (unlikely(amd_iommu_np_cache)) { + domain_flush_pages(domain, iova, size); + domain_flush_complete(domain); + } +} + /* * This function flushes the DTEs for all devices in domain @@ -2377,10 +2387,7 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; - if (unlikely(amd_iommu_np_cache)) { - domain_flush_pages(&dma_dom->domain, address, size); - domain_flush_complete(&dma_dom->domain); - } + domain_flush_np_cache(&dma_dom->domain, address, size); out: return address; @@ -2559,6 +2566,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, s->dma_length = s->length; } + if (s) + domain_flush_np_cache(domain, s->dma_address, s->dma_length); + return nelems; out_unmap: @@ -2597,7 +2607,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, struct protection_domain *domain; struct dma_ops_domain *dma_dom; unsigned long startaddr; - int npages = 2; + int npages; domain = get_domain(dev); if (IS_ERR(domain)) @@ -3039,6 +3049,8 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, ret = iommu_map_page(domain, iova, paddr, page_size, prot, GFP_KERNEL); mutex_unlock(&domain->api_lock); + domain_flush_np_cache(domain, iova, page_size); + return ret; } diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 07d84dbab564..eb104c719629 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -406,6 +406,9 @@ static void iommu_enable(struct amd_iommu *iommu) static void iommu_disable(struct amd_iommu *iommu) { + if (!iommu->mmio_base) + return; + /* Disable command buffer */ iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); @@ -2325,15 +2328,6 @@ static void __init free_iommu_resources(void) amd_iommu_dev_table = NULL; free_iommu_all(); - -#ifdef CONFIG_GART_IOMMU - /* - * We failed to initialize the AMD IOMMU - try fallback to GART - * if possible. - */ - gart_iommu_init(); - -#endif } /* SB IOAPIC is always on this device in AMD systems */ @@ -2625,8 +2619,6 @@ static int __init state_next(void) init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) { pr_info("AMD IOMMU disabled on kernel command-line\n"); - free_dma_resources(); - free_iommu_resources(); init_state = IOMMU_CMDLINE_DISABLED; ret = -EINVAL; } @@ -2667,6 +2659,19 @@ static int __init state_next(void) BUG(); } + if (ret) { + free_dma_resources(); + if (!irq_remapping_enabled) { + disable_iommus(); + free_iommu_resources(); + } else { + struct amd_iommu *iommu; + + uninit_device_table_dma(); + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); + } + } return ret; } @@ -2740,17 +2745,15 @@ static int __init amd_iommu_init(void) int ret; ret = iommu_go_to_state(IOMMU_INITIALIZED); - if (ret) { - free_dma_resources(); - if (!irq_remapping_enabled) { - disable_iommus(); - free_iommu_resources(); - } else { - uninit_device_table_dma(); - for_each_iommu(iommu) - iommu_flush_all_caches(iommu); - } +#ifdef CONFIG_GART_IOMMU + if (ret && list_empty(&amd_iommu_list)) { + /* + * We failed to initialize the AMD IOMMU - try fallback + * to GART if possible. + */ + gart_iommu_init(); } +#endif for_each_iommu(iommu) amd_iommu_debugfs_setup(iommu); diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 4d5a694f02c2..2d96cf0023dd 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -192,6 +192,13 @@ #define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5) #define Q_BASE_LOG2SIZE GENMASK(4, 0) +/* Ensure DMA allocations are naturally aligned */ +#ifdef CONFIG_CMA_ALIGNMENT +#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT) +#else +#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1) +#endif + /* * Stream table. * @@ -289,8 +296,9 @@ FIELD_GET(ARM64_TCR_##fld, tcr)) /* Command queue */ -#define CMDQ_ENT_DWORDS 2 -#define CMDQ_MAX_SZ_SHIFT 8 +#define CMDQ_ENT_SZ_SHIFT 4 +#define CMDQ_ENT_DWORDS ((1 << CMDQ_ENT_SZ_SHIFT) >> 3) +#define CMDQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT) #define CMDQ_CONS_ERR GENMASK(30, 24) #define CMDQ_ERR_CERROR_NONE_IDX 0 @@ -336,14 +344,16 @@ #define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2) /* Event queue */ -#define EVTQ_ENT_DWORDS 4 -#define EVTQ_MAX_SZ_SHIFT 7 +#define EVTQ_ENT_SZ_SHIFT 5 +#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3) +#define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT) #define EVTQ_0_ID GENMASK_ULL(7, 0) /* PRI queue */ -#define PRIQ_ENT_DWORDS 2 -#define PRIQ_MAX_SZ_SHIFT 8 +#define PRIQ_ENT_SZ_SHIFT 4 +#define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3) +#define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT) #define PRIQ_0_SID GENMASK_ULL(31, 0) #define PRIQ_0_SSID GENMASK_ULL(51, 32) @@ -798,7 +808,7 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent) /* High-level queue accessors */ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) { - memset(cmd, 0, CMDQ_ENT_DWORDS << 3); + memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT); cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode); switch (ent->opcode) { @@ -1785,13 +1795,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, + .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, .tlb = &arm_smmu_gather_ops, .iommu_dev = smmu->dev, }; - if (smmu->features & ARM_SMMU_FEAT_COHERENCY) - pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; - if (smmu_domain->non_strict) pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; @@ -1884,9 +1892,13 @@ static int arm_smmu_enable_ats(struct arm_smmu_master *master) static void arm_smmu_disable_ats(struct arm_smmu_master *master) { + struct arm_smmu_cmdq_ent cmd; + if (!master->ats_enabled || !dev_is_pci(master->dev)) return; + arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); + arm_smmu_atc_inv_master(master, &cmd); pci_disable_ats(to_pci_dev(master->dev)); master->ats_enabled = false; } @@ -1906,7 +1918,6 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) master->domain = NULL; arm_smmu_install_ste_for_dev(master); - /* Disabling ATS invalidates all ATC entries */ arm_smmu_disable_ats(master); } @@ -2270,17 +2281,32 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, struct arm_smmu_queue *q, unsigned long prod_off, unsigned long cons_off, - size_t dwords) + size_t dwords, const char *name) { - size_t qsz = ((1 << q->max_n_shift) * dwords) << 3; + size_t qsz; + + do { + qsz = ((1 << q->max_n_shift) * dwords) << 3; + q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, + GFP_KERNEL); + if (q->base || qsz < PAGE_SIZE) + break; + + q->max_n_shift--; + } while (1); - q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL); if (!q->base) { - dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n", - qsz); + dev_err(smmu->dev, + "failed to allocate queue (0x%zx bytes) for %s\n", + qsz, name); return -ENOMEM; } + if (!WARN_ON(q->base_dma & (qsz - 1))) { + dev_info(smmu->dev, "allocated %u entries for %s\n", + 1 << q->max_n_shift, name); + } + q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu); q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu); q->ent_dwords = dwords; @@ -2300,13 +2326,15 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) /* cmdq */ spin_lock_init(&smmu->cmdq.lock); ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD, - ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS); + ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS, + "cmdq"); if (ret) return ret; /* evtq */ ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD, - ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS); + ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS, + "evtq"); if (ret) return ret; @@ -2315,7 +2343,8 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) return 0; return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, - ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS); + ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS, + "priq"); } static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) @@ -2879,7 +2908,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) return -ENXIO; } - /* Queue sizes, capped at 4k */ + /* Queue sizes, capped to ensure natural alignment */ smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, reg)); if (!smmu->cmdq.q.max_n_shift) { diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 586dd5a46d9f..653b6b3dcafb 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -892,13 +892,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, + .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, .tlb = smmu_domain->tlb_ops, .iommu_dev = smmu->dev, }; - if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) - pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; - if (smmu_domain->non_strict) pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 0ba108edc519..f802255219d3 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -226,8 +226,8 @@ resv_iova: start = window->res->end - window->offset + 1; /* If window is last entry */ if (window->node.next == &bridge->dma_ranges && - end != ~(dma_addr_t)0) { - end = ~(dma_addr_t)0; + end != ~(phys_addr_t)0) { + end = ~(phys_addr_t)0; goto resv_iova; } } diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c index 7fabf9b1c2dc..73a552914455 100644 --- a/drivers/iommu/intel-iommu-debugfs.c +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -14,6 +14,17 @@ #include <asm/irq_remapping.h> +#include "intel-pasid.h" + +struct tbl_walk { + u16 bus; + u16 devfn; + u32 pasid; + struct root_entry *rt_entry; + struct context_entry *ctx_entry; + struct pasid_entry *pasid_tbl_entry; +}; + struct iommu_regset { int offset; const char *regs; @@ -131,16 +142,86 @@ out: } DEFINE_SHOW_ATTRIBUTE(iommu_regset); -static void ctx_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu, - int bus) +static inline void print_tbl_walk(struct seq_file *m) { - struct context_entry *context; - int devfn; + struct tbl_walk *tbl_wlk = m->private; + + seq_printf(m, "%02x:%02x.%x\t0x%016llx:0x%016llx\t0x%016llx:0x%016llx\t", + tbl_wlk->bus, PCI_SLOT(tbl_wlk->devfn), + PCI_FUNC(tbl_wlk->devfn), tbl_wlk->rt_entry->hi, + tbl_wlk->rt_entry->lo, tbl_wlk->ctx_entry->hi, + tbl_wlk->ctx_entry->lo); + + /* + * A legacy mode DMAR doesn't support PASID, hence default it to -1 + * indicating that it's invalid. Also, default all PASID related fields + * to 0. + */ + if (!tbl_wlk->pasid_tbl_entry) + seq_printf(m, "%-6d\t0x%016llx:0x%016llx:0x%016llx\n", -1, + (u64)0, (u64)0, (u64)0); + else + seq_printf(m, "%-6d\t0x%016llx:0x%016llx:0x%016llx\n", + tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[0], + tbl_wlk->pasid_tbl_entry->val[1], + tbl_wlk->pasid_tbl_entry->val[2]); +} - seq_printf(m, " Context Table Entries for Bus: %d\n", bus); - seq_puts(m, " Entry\tB:D.F\tHigh\tLow\n"); +static void pasid_tbl_walk(struct seq_file *m, struct pasid_entry *tbl_entry, + u16 dir_idx) +{ + struct tbl_walk *tbl_wlk = m->private; + u8 tbl_idx; + + for (tbl_idx = 0; tbl_idx < PASID_TBL_ENTRIES; tbl_idx++) { + if (pasid_pte_is_present(tbl_entry)) { + tbl_wlk->pasid_tbl_entry = tbl_entry; + tbl_wlk->pasid = (dir_idx << PASID_PDE_SHIFT) + tbl_idx; + print_tbl_walk(m); + } + + tbl_entry++; + } +} + +static void pasid_dir_walk(struct seq_file *m, u64 pasid_dir_ptr, + u16 pasid_dir_size) +{ + struct pasid_dir_entry *dir_entry = phys_to_virt(pasid_dir_ptr); + struct pasid_entry *pasid_tbl; + u16 dir_idx; + + for (dir_idx = 0; dir_idx < pasid_dir_size; dir_idx++) { + pasid_tbl = get_pasid_table_from_pde(dir_entry); + if (pasid_tbl) + pasid_tbl_walk(m, pasid_tbl, dir_idx); + + dir_entry++; + } +} + +static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, u16 bus) +{ + struct context_entry *context; + u16 devfn, pasid_dir_size; + u64 pasid_dir_ptr; for (devfn = 0; devfn < 256; devfn++) { + struct tbl_walk tbl_wlk = {0}; + + /* + * Scalable mode root entry points to upper scalable mode + * context table and lower scalable mode context table. Each + * scalable mode context table has 128 context entries where as + * legacy mode context table has 256 context entries. So in + * scalable mode, the context entries for former 128 devices are + * in the lower scalable mode context table, while the latter + * 128 devices are in the upper scalable mode context table. + * In scalable mode, when devfn > 127, iommu_context_addr() + * automatically refers to upper scalable mode context table and + * hence the caller doesn't have to worry about differences + * between scalable mode and non scalable mode. + */ context = iommu_context_addr(iommu, bus, devfn, 0); if (!context) return; @@ -148,33 +229,41 @@ static void ctx_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu, if (!context_present(context)) continue; - seq_printf(m, " %-5d\t%02x:%02x.%x\t%-6llx\t%llx\n", devfn, - bus, PCI_SLOT(devfn), PCI_FUNC(devfn), - context[0].hi, context[0].lo); + tbl_wlk.bus = bus; + tbl_wlk.devfn = devfn; + tbl_wlk.rt_entry = &iommu->root_entry[bus]; + tbl_wlk.ctx_entry = context; + m->private = &tbl_wlk; + + if (pasid_supported(iommu) && is_pasid_enabled(context)) { + pasid_dir_ptr = context->lo & VTD_PAGE_MASK; + pasid_dir_size = get_pasid_dir_size(context); + pasid_dir_walk(m, pasid_dir_ptr, pasid_dir_size); + continue; + } + + print_tbl_walk(m); } } -static void root_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu) +static void root_tbl_walk(struct seq_file *m, struct intel_iommu *iommu) { unsigned long flags; - int bus; + u16 bus; spin_lock_irqsave(&iommu->lock, flags); - seq_printf(m, "IOMMU %s: Root Table Address:%llx\n", iommu->name, + seq_printf(m, "IOMMU %s: Root Table Address: 0x%llx\n", iommu->name, (u64)virt_to_phys(iommu->root_entry)); - seq_puts(m, "Root Table Entries:\n"); + seq_puts(m, "B.D.F\tRoot_entry\t\t\t\tContext_entry\t\t\t\tPASID\tPASID_table_entry\n"); - for (bus = 0; bus < 256; bus++) { - if (!(iommu->root_entry[bus].lo & 1)) - continue; + /* + * No need to check if the root entry is present or not because + * iommu_context_addr() performs the same check before returning + * context entry. + */ + for (bus = 0; bus < 256; bus++) + ctx_tbl_walk(m, iommu, bus); - seq_printf(m, " Bus: %d H: %llx L: %llx\n", bus, - iommu->root_entry[bus].hi, - iommu->root_entry[bus].lo); - - ctx_tbl_entry_show(m, iommu, bus); - seq_putc(m, '\n'); - } spin_unlock_irqrestore(&iommu->lock, flags); } @@ -185,7 +274,7 @@ static int dmar_translation_struct_show(struct seq_file *m, void *unused) rcu_read_lock(); for_each_active_iommu(iommu, drhd) { - root_tbl_entry_show(m, iommu); + root_tbl_walk(m, iommu); seq_putc(m, '\n'); } rcu_read_unlock(); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 162b3236e72c..ac4172c02244 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -294,14 +294,16 @@ static inline void context_clear_entry(struct context_entry *context) static struct dmar_domain *si_domain; static int hw_pass_through = 1; +/* si_domain contains mulitple devices */ +#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0) + /* - * Domain represents a virtual machine, more than one devices - * across iommus may be owned in one domain, e.g. kvm guest. + * This is a DMA domain allocated through the iommu domain allocation + * interface. But one or more devices belonging to this domain have + * been chosen to use a private domain. We should avoid to use the + * map/unmap/iova_to_phys APIs on it. */ -#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0) - -/* si_domain contains mulitple devices */ -#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1) +#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1) #define for_each_domain_iommu(idx, domain) \ for (idx = 0; idx < g_num_of_iommus; idx++) \ @@ -314,7 +316,6 @@ struct dmar_rmrr_unit { u64 end_address; /* reserved end address */ struct dmar_dev_scope *devices; /* target devices */ int devices_cnt; /* target device count */ - struct iommu_resv_region *resv; /* reserved region handle */ }; struct dmar_atsr_unit { @@ -342,6 +343,9 @@ static void domain_context_clear(struct intel_iommu *iommu, struct device *dev); static int domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); +static bool device_is_rmrr_locked(struct device *dev); +static int intel_iommu_attach_device(struct iommu_domain *domain, + struct device *dev); #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON int dmar_disabled = 0; @@ -349,6 +353,7 @@ int dmar_disabled = 0; int dmar_disabled = 1; #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/ +int intel_iommu_sm; int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); @@ -356,21 +361,17 @@ static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; -static int intel_iommu_sm; static int iommu_identity_mapping; #define IDENTMAP_ALL 1 #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) -#define pasid_supported(iommu) (sm_supported(iommu) && \ - ecap_pasid((iommu)->ecap)) - int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) +#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) static DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); @@ -535,22 +536,11 @@ static inline void free_devinfo_mem(void *vaddr) kmem_cache_free(iommu_devinfo_cache, vaddr); } -static inline int domain_type_is_vm(struct dmar_domain *domain) -{ - return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE; -} - static inline int domain_type_is_si(struct dmar_domain *domain) { return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY; } -static inline int domain_type_is_vm_or_si(struct dmar_domain *domain) -{ - return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE | - DOMAIN_FLAG_STATIC_IDENTITY); -} - static inline int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) { @@ -598,7 +588,9 @@ struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) int iommu_id; /* si_domain and vm domain should not get here. */ - BUG_ON(domain_type_is_vm_or_si(domain)); + if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA)) + return NULL; + for_each_domain_iommu(iommu_id, domain) break; @@ -729,12 +721,39 @@ static int iommu_dummy(struct device *dev) return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; } +/** + * is_downstream_to_pci_bridge - test if a device belongs to the PCI + * sub-hierarchy of a candidate PCI-PCI bridge + * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy + * @bridge: the candidate PCI-PCI bridge + * + * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false. + */ +static bool +is_downstream_to_pci_bridge(struct device *dev, struct device *bridge) +{ + struct pci_dev *pdev, *pbridge; + + if (!dev_is_pci(dev) || !dev_is_pci(bridge)) + return false; + + pdev = to_pci_dev(dev); + pbridge = to_pci_dev(bridge); + + if (pbridge->subordinate && + pbridge->subordinate->number <= pdev->bus->number && + pbridge->subordinate->busn_res.end >= pdev->bus->number) + return true; + + return false; +} + static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) { struct dmar_drhd_unit *drhd = NULL; struct intel_iommu *iommu; struct device *tmp; - struct pci_dev *ptmp, *pdev = NULL; + struct pci_dev *pdev = NULL; u16 segment = 0; int i; @@ -780,13 +799,7 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf goto out; } - if (!pdev || !dev_is_pci(tmp)) - continue; - - ptmp = to_pci_dev(tmp); - if (ptmp->subordinate && - ptmp->subordinate->number <= pdev->bus->number && - ptmp->subordinate->busn_res.end >= pdev->bus->number) + if (is_downstream_to_pci_bridge(dev, tmp)) goto got_pdev; } @@ -908,7 +921,6 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, return pte; } - /* return address's pte at specific level */ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, unsigned long pfn, @@ -1577,7 +1589,6 @@ static void iommu_disable_translation(struct intel_iommu *iommu) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } - static int iommu_init_domains(struct intel_iommu *iommu) { u32 ndomains, nlongs; @@ -1615,8 +1626,6 @@ static int iommu_init_domains(struct intel_iommu *iommu) return -ENOMEM; } - - /* * If Caching mode is set, then invalid translations are tagged * with domain-id 0, hence we need to pre-allocate it. We also @@ -1646,32 +1655,15 @@ static void disable_dmar_iommu(struct intel_iommu *iommu) if (!iommu->domains || !iommu->domain_ids) return; -again: spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry_safe(info, tmp, &device_domain_list, global) { - struct dmar_domain *domain; - if (info->iommu != iommu) continue; if (!info->dev || !info->domain) continue; - domain = info->domain; - __dmar_remove_one_dev_info(info); - - if (!domain_type_is_vm_or_si(domain)) { - /* - * The domain_exit() function can't be called under - * device_domain_lock, as it takes this lock itself. - * So release the lock here and re-run the loop - * afterwards. - */ - spin_unlock_irqrestore(&device_domain_lock, flags); - domain_exit(domain); - goto again; - } } spin_unlock_irqrestore(&device_domain_lock, flags); @@ -1841,71 +1833,12 @@ static inline int guestwidth_to_adjustwidth(int gaw) return agaw; } -static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, - int guest_width) -{ - int adjust_width, agaw; - unsigned long sagaw; - int err; - - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - - err = init_iova_flush_queue(&domain->iovad, - iommu_flush_iova, iova_entry_free); - if (err) - return err; - - domain_reserve_special_ranges(domain); - - /* calculate AGAW */ - if (guest_width > cap_mgaw(iommu->cap)) - guest_width = cap_mgaw(iommu->cap); - domain->gaw = guest_width; - adjust_width = guestwidth_to_adjustwidth(guest_width); - agaw = width_to_agaw(adjust_width); - sagaw = cap_sagaw(iommu->cap); - if (!test_bit(agaw, &sagaw)) { - /* hardware doesn't support it, choose a bigger one */ - pr_debug("Hardware doesn't support agaw %d\n", agaw); - agaw = find_next_bit(&sagaw, 5, agaw); - if (agaw >= 5) - return -ENODEV; - } - domain->agaw = agaw; - - if (ecap_coherent(iommu->ecap)) - domain->iommu_coherency = 1; - else - domain->iommu_coherency = 0; - - if (ecap_sc_support(iommu->ecap)) - domain->iommu_snooping = 1; - else - domain->iommu_snooping = 0; - - if (intel_iommu_superpage) - domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); - else - domain->iommu_superpage = 0; - - domain->nid = iommu->node; - - /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); - if (!domain->pgd) - return -ENOMEM; - __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); - return 0; -} - static void domain_exit(struct dmar_domain *domain) { struct page *freelist; /* Remove associated devices and clear attached or cached domains */ - rcu_read_lock(); domain_remove_dev_info(domain); - rcu_read_unlock(); /* destroy iovas */ put_iova_domain(&domain->iovad); @@ -2336,7 +2269,7 @@ static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct scatterlist *sg, unsigned long phys_pfn, unsigned long nr_pages, int prot) { - int ret; + int iommu_id, ret; struct intel_iommu *iommu; /* Do the real mapping first */ @@ -2344,18 +2277,8 @@ static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, if (ret) return ret; - /* Notify about the new mapping */ - if (domain_type_is_vm(domain)) { - /* VM typed domains can have more than one IOMMUs */ - int iommu_id; - - for_each_domain_iommu(iommu_id, domain) { - iommu = g_iommus[iommu_id]; - __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); - } - } else { - /* General domains only have one IOMMU */ - iommu = domain_get_iommu(domain); + for_each_domain_iommu(iommu_id, domain) { + iommu = g_iommus[iommu_id]; __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); } @@ -2435,8 +2358,18 @@ static struct dmar_domain *find_domain(struct device *dev) { struct device_domain_info *info; + if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) { + struct iommu_domain *domain; + + dev->archdata.iommu = NULL; + domain = iommu_get_domain_for_dev(dev); + if (domain) + intel_iommu_attach_device(domain, dev); + } + /* No lock here, assumes no domain exit in normal case */ info = dev->archdata.iommu; + if (likely(info)) return info->domain; return NULL; @@ -2580,6 +2513,31 @@ static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) return 0; } +static int domain_init(struct dmar_domain *domain, int guest_width) +{ + int adjust_width; + + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); + domain_reserve_special_ranges(domain); + + /* calculate AGAW */ + domain->gaw = guest_width; + adjust_width = guestwidth_to_adjustwidth(guest_width); + domain->agaw = width_to_agaw(adjust_width); + + domain->iommu_coherency = 0; + domain->iommu_snooping = 0; + domain->iommu_superpage = 0; + domain->max_addr = 0; + + /* always allocate the top pgd */ + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); + if (!domain->pgd) + return -ENOMEM; + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); + return 0; +} + static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) { struct device_domain_info *info; @@ -2617,13 +2575,20 @@ static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) domain = alloc_domain(0); if (!domain) return NULL; - if (domain_init(domain, iommu, gaw)) { + + if (domain_init(domain, gaw)) { domain_exit(domain); return NULL; } -out: + if (init_iova_flush_queue(&domain->iovad, + iommu_flush_iova, + iova_entry_free)) { + pr_warn("iova flush queue initialization failed\n"); + intel_iommu_strict = 1; + } +out: return domain; } @@ -2663,29 +2628,6 @@ static struct dmar_domain *set_domain_for_dev(struct device *dev, return domain; } -static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw) -{ - struct dmar_domain *domain, *tmp; - - domain = find_domain(dev); - if (domain) - goto out; - - domain = find_or_alloc_domain(dev, gaw); - if (!domain) - goto out; - - tmp = set_domain_for_dev(dev, domain); - if (!tmp || domain != tmp) { - domain_exit(domain); - domain = tmp; - } - -out: - - return domain; -} - static int iommu_domain_identity_map(struct dmar_domain *domain, unsigned long long start, unsigned long long end) @@ -2750,75 +2692,21 @@ static int domain_prepare_identity_map(struct device *dev, return iommu_domain_identity_map(domain, start, end); } -static int iommu_prepare_identity_map(struct device *dev, - unsigned long long start, - unsigned long long end) -{ - struct dmar_domain *domain; - int ret; - - domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) - return -ENOMEM; - - ret = domain_prepare_identity_map(dev, domain, start, end); - if (ret) - domain_exit(domain); - - return ret; -} - -static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, - struct device *dev) -{ - if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) - return 0; - return iommu_prepare_identity_map(dev, rmrr->base_address, - rmrr->end_address); -} - -#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA -static inline void iommu_prepare_isa(void) -{ - struct pci_dev *pdev; - int ret; - - pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); - if (!pdev) - return; - - pr_info("Prepare 0-16MiB unity mapping for LPC\n"); - ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1); - - if (ret) - pr_err("Failed to create 0-16MiB identity map - floppy might not work\n"); - - pci_dev_put(pdev); -} -#else -static inline void iommu_prepare_isa(void) -{ - return; -} -#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */ - -static int md_domain_init(struct dmar_domain *domain, int guest_width); - static int __init si_domain_init(int hw) { - int nid, ret; + struct dmar_rmrr_unit *rmrr; + struct device *dev; + int i, nid, ret; si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY); if (!si_domain) return -EFAULT; - if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + if (domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { domain_exit(si_domain); return -EFAULT; } - pr_debug("Identity mapping domain allocated\n"); - if (hw) return 0; @@ -2834,6 +2722,31 @@ static int __init si_domain_init(int hw) } } + /* + * Normally we use DMA domains for devices which have RMRRs. But we + * loose this requirement for graphic and usb devices. Identity map + * the RMRRs for graphic and USB devices so that they could use the + * si_domain. + */ + for_each_rmrr_units(rmrr) { + for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, + i, dev) { + unsigned long long start = rmrr->base_address; + unsigned long long end = rmrr->end_address; + + if (device_is_rmrr_locked(dev)) + continue; + + if (WARN_ON(end < start || + end >> agaw_to_width(si_domain->agaw))) + continue; + + ret = iommu_domain_identity_map(si_domain, start, end); + if (ret) + return ret; + } + } + return 0; } @@ -2841,9 +2754,6 @@ static int identity_mapping(struct device *dev) { struct device_domain_info *info; - if (likely(!iommu_identity_mapping)) - return 0; - info = dev->archdata.iommu; if (info && info != DUMMY_DEVICE_DOMAIN_INFO) return (info->domain == si_domain); @@ -2882,7 +2792,8 @@ static bool device_has_rmrr(struct device *dev) */ for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, i, tmp) - if (tmp == dev) { + if (tmp == dev || + is_downstream_to_pci_bridge(dev, tmp)) { rcu_read_unlock(); return true; } @@ -2891,6 +2802,35 @@ static bool device_has_rmrr(struct device *dev) return false; } +/** + * device_rmrr_is_relaxable - Test whether the RMRR of this device + * is relaxable (ie. is allowed to be not enforced under some conditions) + * @dev: device handle + * + * We assume that PCI USB devices with RMRRs have them largely + * for historical reasons and that the RMRR space is not actively used post + * boot. This exclusion may change if vendors begin to abuse it. + * + * The same exception is made for graphics devices, with the requirement that + * any use of the RMRR regions will be torn down before assigning the device + * to a guest. + * + * Return: true if the RMRR is relaxable, false otherwise + */ +static bool device_rmrr_is_relaxable(struct device *dev) +{ + struct pci_dev *pdev; + + if (!dev_is_pci(dev)) + return false; + + pdev = to_pci_dev(dev); + if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) + return true; + else + return false; +} + /* * There are a couple cases where we need to restrict the functionality of * devices associated with RMRRs. The first is when evaluating a device for @@ -2905,52 +2845,51 @@ static bool device_has_rmrr(struct device *dev) * We therefore prevent devices associated with an RMRR from participating in * the IOMMU API, which eliminates them from device assignment. * - * In both cases we assume that PCI USB devices with RMRRs have them largely - * for historical reasons and that the RMRR space is not actively used post - * boot. This exclusion may change if vendors begin to abuse it. - * - * The same exception is made for graphics devices, with the requirement that - * any use of the RMRR regions will be torn down before assigning the device - * to a guest. + * In both cases, devices which have relaxable RMRRs are not concerned by this + * restriction. See device_rmrr_is_relaxable comment. */ static bool device_is_rmrr_locked(struct device *dev) { if (!device_has_rmrr(dev)) return false; - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) - return false; - } + if (device_rmrr_is_relaxable(dev)) + return false; return true; } -static int iommu_should_identity_map(struct device *dev, int startup) +/* + * Return the required default domain type for a specific device. + * + * @dev: the device in query + * @startup: true if this is during early boot + * + * Returns: + * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain + * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain + * - 0: both identity and dynamic domains work for this device + */ +static int device_def_domain_type(struct device *dev) { if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); if (device_is_rmrr_locked(dev)) - return 0; + return IOMMU_DOMAIN_DMA; /* * Prevent any device marked as untrusted from getting * placed into the statically identity mapping domain. */ if (pdev->untrusted) - return 0; + return IOMMU_DOMAIN_DMA; if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) - return 1; + return IOMMU_DOMAIN_IDENTITY; if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) - return 1; - - if (!(iommu_identity_mapping & IDENTMAP_ALL)) - return 0; + return IOMMU_DOMAIN_IDENTITY; /* * We want to start off with all devices in the 1:1 domain, and @@ -2971,94 +2910,18 @@ static int iommu_should_identity_map(struct device *dev, int startup) */ if (!pci_is_pcie(pdev)) { if (!pci_is_root_bus(pdev->bus)) - return 0; + return IOMMU_DOMAIN_DMA; if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) - return 0; + return IOMMU_DOMAIN_DMA; } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) - return 0; + return IOMMU_DOMAIN_DMA; } else { if (device_has_rmrr(dev)) - return 0; + return IOMMU_DOMAIN_DMA; } - /* - * At boot time, we don't yet know if devices will be 64-bit capable. - * Assume that they will — if they turn out not to be, then we can - * take them out of the 1:1 domain later. - */ - if (!startup) { - /* - * If the device's dma_mask is less than the system's memory - * size then this is not a candidate for identity mapping. - */ - u64 dma_mask = *dev->dma_mask; - - if (dev->coherent_dma_mask && - dev->coherent_dma_mask < dma_mask) - dma_mask = dev->coherent_dma_mask; - - return dma_mask >= dma_get_required_mask(dev); - } - - return 1; -} - -static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw) -{ - int ret; - - if (!iommu_should_identity_map(dev, 1)) - return 0; - - ret = domain_add_dev_info(si_domain, dev); - if (!ret) - dev_info(dev, "%s identity mapping\n", - hw ? "Hardware" : "Software"); - else if (ret == -ENODEV) - /* device not associated with an iommu */ - ret = 0; - - return ret; -} - - -static int __init iommu_prepare_static_identity_mapping(int hw) -{ - struct pci_dev *pdev = NULL; - struct dmar_drhd_unit *drhd; - /* To avoid a -Wunused-but-set-variable warning. */ - struct intel_iommu *iommu __maybe_unused; - struct device *dev; - int i; - int ret = 0; - - for_each_pci_dev(pdev) { - ret = dev_prepare_static_identity_mapping(&pdev->dev, hw); - if (ret) - return ret; - } - - for_each_active_iommu(iommu, drhd) - for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { - struct acpi_device_physical_node *pn; - struct acpi_device *adev; - - if (dev->bus != &acpi_bus_type) - continue; - - adev= to_acpi_device(dev); - mutex_lock(&adev->physical_node_lock); - list_for_each_entry(pn, &adev->physical_node_list, node) { - ret = dev_prepare_static_identity_mapping(pn->dev, hw); - if (ret) - break; - } - mutex_unlock(&adev->physical_node_lock); - if (ret) - return ret; - } - - return 0; + return (iommu_identity_mapping & IDENTMAP_ALL) ? + IOMMU_DOMAIN_IDENTITY : 0; } static void intel_iommu_init_qi(struct intel_iommu *iommu) @@ -3283,11 +3146,8 @@ out_unmap: static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; - struct dmar_rmrr_unit *rmrr; - bool copied_tables = false; - struct device *dev; struct intel_iommu *iommu; - int i, ret; + int ret; /* * for each drhd @@ -3320,7 +3180,12 @@ static int __init init_dmars(void) goto error; } - for_each_active_iommu(iommu, drhd) { + for_each_iommu(iommu, drhd) { + if (drhd->ignored) { + iommu_disable_translation(iommu); + continue; + } + /* * Find the max pasid size of all IOMMU's in the system. * We need to ensure the system pasid table is no bigger @@ -3380,7 +3245,6 @@ static int __init init_dmars(void) } else { pr_info("Copied translation tables from previous kernel for %s\n", iommu->name); - copied_tables = true; } } @@ -3416,62 +3280,9 @@ static int __init init_dmars(void) check_tylersburg_isoch(); - if (iommu_identity_mapping) { - ret = si_domain_init(hw_pass_through); - if (ret) - goto free_iommu; - } - - - /* - * If we copied translations from a previous kernel in the kdump - * case, we can not assign the devices to domains now, as that - * would eliminate the old mappings. So skip this part and defer - * the assignment to device driver initialization time. - */ - if (copied_tables) - goto domains_done; - - /* - * If pass through is not set or not enabled, setup context entries for - * identity mappings for rmrr, gfx, and isa and may fall back to static - * identity mapping if iommu_identity_mapping is set. - */ - if (iommu_identity_mapping) { - ret = iommu_prepare_static_identity_mapping(hw_pass_through); - if (ret) { - pr_crit("Failed to setup IOMMU pass-through\n"); - goto free_iommu; - } - } - /* - * For each rmrr - * for each dev attached to rmrr - * do - * locate drhd for dev, alloc domain for dev - * allocate free domain - * allocate page table entries for rmrr - * if context not allocated for bus - * allocate and init context - * set present in root table for this bus - * init context with domain, translation etc - * endfor - * endfor - */ - pr_info("Setting RMRR:\n"); - for_each_rmrr_units(rmrr) { - /* some BIOS lists non-exist devices in DMAR table. */ - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, dev) { - ret = iommu_prepare_rmrr_dev(rmrr, dev); - if (ret) - pr_err("Mapping reserved region failed\n"); - } - } - - iommu_prepare_isa(); - -domains_done: + ret = si_domain_init(hw_pass_through); + if (ret) + goto free_iommu; /* * for each drhd @@ -3509,11 +3320,6 @@ domains_done: ret = dmar_set_interrupt(iommu); if (ret) goto free_iommu; - - if (!translation_pre_enabled(iommu)) - iommu_enable_translation(iommu); - - iommu_disable_protect_mem_regions(iommu); } return 0; @@ -3563,16 +3369,17 @@ static unsigned long intel_alloc_iova(struct device *dev, return iova_pfn; } -struct dmar_domain *get_valid_domain_for_dev(struct device *dev) +static struct dmar_domain *get_private_domain_for_dev(struct device *dev) { struct dmar_domain *domain, *tmp; struct dmar_rmrr_unit *rmrr; struct device *i_dev; int i, ret; + /* Device shouldn't be attached by any domains. */ domain = find_domain(dev); if (domain) - goto out; + return NULL; domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) @@ -3602,10 +3409,10 @@ struct dmar_domain *get_valid_domain_for_dev(struct device *dev) } out: - if (!domain) dev_err(dev, "Allocating domain failed\n"); - + else + domain->domain.type = IOMMU_DOMAIN_DMA; return domain; } @@ -3613,17 +3420,19 @@ out: /* Check if the dev needs to go through non-identity map and unmap process.*/ static bool iommu_need_mapping(struct device *dev) { - int found; + int ret; if (iommu_dummy(dev)) return false; - if (!iommu_identity_mapping) - return true; + ret = identity_mapping(dev); + if (ret) { + u64 dma_mask = *dev->dma_mask; - found = identity_mapping(dev); - if (found) { - if (iommu_should_identity_map(dev, 0)) + if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask) + dma_mask = dev->coherent_dma_mask; + + if (dma_mask >= dma_get_required_mask(dev)) return false; /* @@ -3631,17 +3440,20 @@ static bool iommu_need_mapping(struct device *dev) * non-identity mapping. */ dmar_remove_one_dev_info(dev); - dev_info(dev, "32bit DMA uses non-identity mapping\n"); - } else { - /* - * In case of a detached 64 bit DMA device from vm, the device - * is put into si_domain for identity mapping. - */ - if (iommu_should_identity_map(dev, 0) && - !domain_add_dev_info(si_domain, dev)) { - dev_info(dev, "64bit DMA uses identity mapping\n"); - return false; + ret = iommu_request_dma_domain_for_dev(dev); + if (ret) { + struct iommu_domain *domain; + struct dmar_domain *dmar_domain; + + domain = iommu_get_domain_for_dev(dev); + if (domain) { + dmar_domain = to_dmar_domain(domain); + dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; + } + get_private_domain_for_dev(dev); } + + dev_info(dev, "32bit DMA uses non-identity mapping\n"); } return true; @@ -3660,7 +3472,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); - domain = get_valid_domain_for_dev(dev); + domain = find_domain(dev); if (!domain) return DMA_MAPPING_ERROR; @@ -3875,7 +3687,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele if (!iommu_need_mapping(dev)) return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); - domain = get_valid_domain_for_dev(dev); + domain = find_domain(dev); if (!domain) return 0; @@ -4194,13 +4006,10 @@ static void __init init_iommu_pm_ops(void) static inline void init_iommu_pm_ops(void) {} #endif /* CONFIG_PM */ - int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) { struct acpi_dmar_reserved_memory *rmrr; - int prot = DMA_PTE_READ|DMA_PTE_WRITE; struct dmar_rmrr_unit *rmrru; - size_t length; rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); if (!rmrru) @@ -4211,23 +4020,15 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) rmrru->base_address = rmrr->base_address; rmrru->end_address = rmrr->end_address; - length = rmrr->end_address - rmrr->base_address + 1; - rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot, - IOMMU_RESV_DIRECT); - if (!rmrru->resv) - goto free_rmrru; - rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), ((void *)rmrr) + rmrr->header.length, &rmrru->devices_cnt); if (rmrru->devices_cnt && rmrru->devices == NULL) - goto free_all; + goto free_rmrru; list_add(&rmrru->list, &dmar_rmrr_units); return 0; -free_all: - kfree(rmrru->resv); free_rmrru: kfree(rmrru); out: @@ -4445,7 +4246,6 @@ static void intel_iommu_free_dmars(void) list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { list_del(&rmrru->list); dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); - kfree(rmrru->resv); kfree(rmrru); } @@ -4550,42 +4350,6 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) return 0; } -/* - * Here we only respond to action of unbound device from driver. - * - * Added device is not attached to its DMAR domain here yet. That will happen - * when mapping the device to iova. - */ -static int device_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - struct dmar_domain *domain; - - if (iommu_dummy(dev)) - return 0; - - if (action == BUS_NOTIFY_REMOVED_DEVICE) { - domain = find_domain(dev); - if (!domain) - return 0; - - dmar_remove_one_dev_info(dev); - if (!domain_type_is_vm_or_si(domain) && - list_empty(&domain->devices)) - domain_exit(domain); - } else if (action == BUS_NOTIFY_ADD_DEVICE) { - if (iommu_should_identity_map(dev, 1)) - domain_add_dev_info(si_domain, dev); - } - - return 0; -} - -static struct notifier_block device_nb = { - .notifier_call = device_notifier, -}; - static int intel_iommu_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { @@ -4812,6 +4576,49 @@ static int __init platform_optin_force_iommu(void) return 1; } +static int __init probe_acpi_namespace_devices(void) +{ + struct dmar_drhd_unit *drhd; + /* To avoid a -Wunused-but-set-variable warning. */ + struct intel_iommu *iommu __maybe_unused; + struct device *dev; + int i, ret = 0; + + for_each_active_iommu(iommu, drhd) { + for_each_active_dev_scope(drhd->devices, + drhd->devices_cnt, i, dev) { + struct acpi_device_physical_node *pn; + struct iommu_group *group; + struct acpi_device *adev; + + if (dev->bus != &acpi_bus_type) + continue; + + adev = to_acpi_device(dev); + mutex_lock(&adev->physical_node_lock); + list_for_each_entry(pn, + &adev->physical_node_list, node) { + group = iommu_group_get(pn->dev); + if (group) { + iommu_group_put(group); + continue; + } + + pn->dev->bus->iommu_ops = &intel_iommu_ops; + ret = iommu_probe_device(pn->dev); + if (ret) + break; + } + mutex_unlock(&adev->physical_node_lock); + + if (ret) + return ret; + } + } + + return 0; +} + int __init intel_iommu_init(void) { int ret = -ENODEV; @@ -4901,7 +4708,6 @@ int __init intel_iommu_init(void) goto out_free_reserved_range; } up_write(&dmar_global_lock); - pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); #if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB) swiotlb = 0; @@ -4919,11 +4725,25 @@ int __init intel_iommu_init(void) } bus_set_iommu(&pci_bus_type, &intel_iommu_ops); - bus_register_notifier(&pci_bus_type, &device_nb); if (si_domain && !hw_pass_through) register_memory_notifier(&intel_iommu_memory_nb); cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL, intel_iommu_cpu_dead); + + down_read(&dmar_global_lock); + if (probe_acpi_namespace_devices()) + pr_warn("ACPI name space devices didn't probe correctly\n"); + up_read(&dmar_global_lock); + + /* Finally, we enable the DMA remapping hardware. */ + for_each_iommu(iommu, drhd) { + if (!drhd->ignored && !translation_pre_enabled(iommu)) + iommu_enable_translation(iommu); + + iommu_disable_protect_mem_regions(iommu); + } + pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); + intel_iommu_enabled = 1; intel_iommu_debugfs_init(); @@ -4962,6 +4782,7 @@ static void domain_context_clear(struct intel_iommu *iommu, struct device *dev) static void __dmar_remove_one_dev_info(struct device_domain_info *info) { + struct dmar_domain *domain; struct intel_iommu *iommu; unsigned long flags; @@ -4971,6 +4792,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) return; iommu = info->iommu; + domain = info->domain; if (info->dev) { if (dev_is_pci(info->dev) && sm_supported(iommu)) @@ -4985,9 +4807,14 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) unlink_domain_info(info); spin_lock_irqsave(&iommu->lock, flags); - domain_detach_iommu(info->domain, iommu); + domain_detach_iommu(domain, iommu); spin_unlock_irqrestore(&iommu->lock, flags); + /* free the private domain */ + if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN && + !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) + domain_exit(info->domain); + free_devinfo_mem(info); } @@ -5002,62 +4829,55 @@ static void dmar_remove_one_dev_info(struct device *dev) spin_unlock_irqrestore(&device_domain_lock, flags); } -static int md_domain_init(struct dmar_domain *domain, int guest_width) -{ - int adjust_width; - - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - domain_reserve_special_ranges(domain); - - /* calculate AGAW */ - domain->gaw = guest_width; - adjust_width = guestwidth_to_adjustwidth(guest_width); - domain->agaw = width_to_agaw(adjust_width); - - domain->iommu_coherency = 0; - domain->iommu_snooping = 0; - domain->iommu_superpage = 0; - domain->max_addr = 0; - - /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); - if (!domain->pgd) - return -ENOMEM; - domain_flush_cache(domain, domain->pgd, PAGE_SIZE); - return 0; -} - static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; struct iommu_domain *domain; - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; + switch (type) { + case IOMMU_DOMAIN_DMA: + /* fallthrough */ + case IOMMU_DOMAIN_UNMANAGED: + dmar_domain = alloc_domain(0); + if (!dmar_domain) { + pr_err("Can't allocate dmar_domain\n"); + return NULL; + } + if (domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + pr_err("Domain initialization failed\n"); + domain_exit(dmar_domain); + return NULL; + } - dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE); - if (!dmar_domain) { - pr_err("Can't allocate dmar_domain\n"); - return NULL; - } - if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { - pr_err("Domain initialization failed\n"); - domain_exit(dmar_domain); + if (type == IOMMU_DOMAIN_DMA && + init_iova_flush_queue(&dmar_domain->iovad, + iommu_flush_iova, iova_entry_free)) { + pr_warn("iova flush queue initialization failed\n"); + intel_iommu_strict = 1; + } + + domain_update_iommu_cap(dmar_domain); + + domain = &dmar_domain->domain; + domain->geometry.aperture_start = 0; + domain->geometry.aperture_end = + __DOMAIN_MAX_ADDR(dmar_domain->gaw); + domain->geometry.force_aperture = true; + + return domain; + case IOMMU_DOMAIN_IDENTITY: + return &si_domain->domain; + default: return NULL; } - domain_update_iommu_cap(dmar_domain); - - domain = &dmar_domain->domain; - domain->geometry.aperture_start = 0; - domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw); - domain->geometry.force_aperture = true; - return domain; + return NULL; } static void intel_iommu_domain_free(struct iommu_domain *domain) { - domain_exit(to_dmar_domain(domain)); + if (domain != &si_domain->domain) + domain_exit(to_dmar_domain(domain)); } /* @@ -5233,7 +5053,8 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, { int ret; - if (device_is_rmrr_locked(dev)) { + if (domain->type == IOMMU_DOMAIN_UNMANAGED && + device_is_rmrr_locked(dev)) { dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); return -EPERM; } @@ -5246,15 +5067,8 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct dmar_domain *old_domain; old_domain = find_domain(dev); - if (old_domain) { - rcu_read_lock(); + if (old_domain) dmar_remove_one_dev_info(dev); - rcu_read_unlock(); - - if (!domain_type_is_vm_or_si(old_domain) && - list_empty(&old_domain->devices)) - domain_exit(old_domain); - } } ret = prepare_domain_attach_device(domain, dev); @@ -5300,6 +5114,9 @@ static int intel_iommu_map(struct iommu_domain *domain, int prot = 0; int ret; + if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) + return -EINVAL; + if (iommu_prot & IOMMU_READ) prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) @@ -5341,6 +5158,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); + if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) + return 0; if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -5372,6 +5191,9 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; + if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN) + return 0; + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); if (pte) phys = dma_pte_addr(pte); @@ -5427,9 +5249,12 @@ static bool intel_iommu_capable(enum iommu_cap cap) static int intel_iommu_add_device(struct device *dev) { + struct dmar_domain *dmar_domain; + struct iommu_domain *domain; struct intel_iommu *iommu; struct iommu_group *group; u8 bus, devfn; + int ret; iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) @@ -5437,12 +5262,45 @@ static int intel_iommu_add_device(struct device *dev) iommu_device_link(&iommu->iommu, dev); + if (translation_pre_enabled(iommu)) + dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO; + group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return PTR_ERR(group); iommu_group_put(group); + + domain = iommu_get_domain_for_dev(dev); + dmar_domain = to_dmar_domain(domain); + if (domain->type == IOMMU_DOMAIN_DMA) { + if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) { + ret = iommu_request_dm_for_dev(dev); + if (ret) { + dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; + domain_add_dev_info(si_domain, dev); + dev_info(dev, + "Device uses a private identity domain.\n"); + } + } + } else { + if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) { + ret = iommu_request_dma_domain_for_dev(dev); + if (ret) { + dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; + if (!get_private_domain_for_dev(dev)) { + dev_warn(dev, + "Failed to get a private domain.\n"); + return -ENOMEM; + } + + dev_info(dev, + "Device uses a private dma domain.\n"); + } + } + } + return 0; } @@ -5463,22 +5321,51 @@ static void intel_iommu_remove_device(struct device *dev) static void intel_iommu_get_resv_regions(struct device *device, struct list_head *head) { + int prot = DMA_PTE_READ | DMA_PTE_WRITE; struct iommu_resv_region *reg; struct dmar_rmrr_unit *rmrr; struct device *i_dev; int i; - rcu_read_lock(); + down_read(&dmar_global_lock); for_each_rmrr_units(rmrr) { for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, i, i_dev) { - if (i_dev != device) + struct iommu_resv_region *resv; + enum iommu_resv_type type; + size_t length; + + if (i_dev != device && + !is_downstream_to_pci_bridge(device, i_dev)) continue; - list_add_tail(&rmrr->resv->list, head); + length = rmrr->end_address - rmrr->base_address + 1; + + type = device_rmrr_is_relaxable(device) ? + IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT; + + resv = iommu_alloc_resv_region(rmrr->base_address, + length, prot, type); + if (!resv) + break; + + list_add_tail(&resv->list, head); } } - rcu_read_unlock(); + up_read(&dmar_global_lock); + +#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA + if (dev_is_pci(device)) { + struct pci_dev *pdev = to_pci_dev(device); + + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) { + reg = iommu_alloc_resv_region(0, 1UL << 24, 0, + IOMMU_RESV_DIRECT); + if (reg) + list_add_tail(®->list, head); + } + } +#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */ reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, @@ -5493,10 +5380,8 @@ static void intel_iommu_put_resv_regions(struct device *dev, { struct iommu_resv_region *entry, *next; - list_for_each_entry_safe(entry, next, head, list) { - if (entry->type == IOMMU_RESV_MSI) - kfree(entry); - } + list_for_each_entry_safe(entry, next, head, list) + kfree(entry); } int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) @@ -5508,7 +5393,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) u64 ctx_lo; int ret; - domain = get_valid_domain_for_dev(dev); + domain = find_domain(dev); if (!domain) return -EINVAL; @@ -5550,6 +5435,19 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) return ret; } +static void intel_iommu_apply_resv_region(struct device *dev, + struct iommu_domain *domain, + struct iommu_resv_region *region) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + unsigned long start, end; + + start = IOVA_PFN(region->start); + end = IOVA_PFN(region->start + region->length - 1); + + WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end)); +} + #ifdef CONFIG_INTEL_IOMMU_SVM struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) { @@ -5699,6 +5597,12 @@ intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) dmar_domain->default_pasid : -EINVAL; } +static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, + struct device *dev) +{ + return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO; +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -5715,11 +5619,13 @@ const struct iommu_ops intel_iommu_ops = { .remove_device = intel_iommu_remove_device, .get_resv_regions = intel_iommu_get_resv_regions, .put_resv_regions = intel_iommu_put_resv_regions, + .apply_resv_region = intel_iommu_apply_resv_region, .device_group = pci_device_group, .dev_has_feat = intel_iommu_dev_has_feat, .dev_feat_enabled = intel_iommu_dev_feat_enabled, .dev_enable_feat = intel_iommu_dev_enable_feat, .dev_disable_feat = intel_iommu_dev_disable_feat, + .is_attach_deferred = intel_iommu_is_attach_deferred, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, }; diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index fe51d8af457f..040a445be300 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -169,23 +169,6 @@ attach_out: return 0; } -/* Get PRESENT bit of a PASID directory entry. */ -static inline bool -pasid_pde_is_present(struct pasid_dir_entry *pde) -{ - return READ_ONCE(pde->val) & PASID_PTE_PRESENT; -} - -/* Get PASID table from a PASID directory entry. */ -static inline struct pasid_entry * -get_pasid_table_from_pde(struct pasid_dir_entry *pde) -{ - if (!pasid_pde_is_present(pde)) - return NULL; - - return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK); -} - void intel_pasid_free_table(struct device *dev) { struct device_domain_info *info; diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 23537b3f34e3..fc8cd8f17de1 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -18,6 +18,10 @@ #define PDE_PFN_MASK PAGE_MASK #define PASID_PDE_SHIFT 6 #define MAX_NR_PASID_BITS 20 +#define PASID_TBL_ENTRIES BIT(PASID_PDE_SHIFT) + +#define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1) +#define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7)) /* * Domain ID reserved for pasid entries programmed for first-level @@ -49,6 +53,28 @@ struct pasid_table { struct list_head dev; /* device list */ }; +/* Get PRESENT bit of a PASID directory entry. */ +static inline bool pasid_pde_is_present(struct pasid_dir_entry *pde) +{ + return READ_ONCE(pde->val) & PASID_PTE_PRESENT; +} + +/* Get PASID table from a PASID directory entry. */ +static inline struct pasid_entry * +get_pasid_table_from_pde(struct pasid_dir_entry *pde) +{ + if (!pasid_pde_is_present(pde)) + return NULL; + + return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK); +} + +/* Get PRESENT bit of a PASID table entry. */ +static inline bool pasid_pte_is_present(struct pasid_entry *pte) +{ + return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT; +} + extern u32 intel_pasid_max_id; int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp); void intel_pasid_free_id(int pasid); diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index eceaa7e968ae..780de0caafe8 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -366,6 +366,21 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } list_add_tail(&svm->list, &global_svm_list); + } else { + /* + * Binding a new device with existing PASID, need to setup + * the PASID entry. + */ + spin_lock(&iommu->lock); + ret = intel_pasid_setup_first_level(iommu, dev, + mm ? mm->pgd : init_mm.pgd, + svm->pasid, FLPT_DEFAULT_DID, + mm ? 0 : PASID_FLAG_SUPERVISOR_MODE); + spin_unlock(&iommu->lock); + if (ret) { + kfree(sdev); + goto out; + } } list_add_rcu(&sdev->list, &svm->devs); diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 4160aa9f3f80..4786ca061e31 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -101,7 +101,7 @@ static void init_ir_status(struct intel_iommu *iommu) iommu->flags |= VTD_FLAG_IRQ_REMAP_PRE_ENABLED; } -static int alloc_irte(struct intel_iommu *iommu, int irq, +static int alloc_irte(struct intel_iommu *iommu, struct irq_2_iommu *irq_iommu, u16 count) { struct ir_table *table = iommu->ir_table; @@ -1374,7 +1374,7 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain, goto out_free_parent; down_read(&dmar_global_lock); - index = alloc_irte(iommu, virq, &data->irq_2_iommu, nr_irqs); + index = alloc_irte(iommu, &data->irq_2_iommu, nr_irqs); up_read(&dmar_global_lock); if (index < 0) { pr_warn("Failed to allocate IRTE\n"); diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index aa7a3fa6dd09..0fc8dfab2abf 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -204,7 +204,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, dev_err(dev, "Page table does not fit in PTE: %pa", &phys); goto out_free; } - if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { + if (table && !cfg->coherent_walk) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -238,7 +238,7 @@ static void __arm_v7s_free_table(void *table, int lvl, struct device *dev = cfg->iommu_dev; size_t size = ARM_V7S_TABLE_SIZE(lvl); - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, DMA_TO_DEVICE); if (lvl == 1) @@ -250,7 +250,7 @@ static void __arm_v7s_free_table(void *table, int lvl, static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries, struct io_pgtable_cfg *cfg) { - if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) + if (cfg->coherent_walk) return; dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep), @@ -716,7 +716,6 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, IO_PGTABLE_QUIRK_NO_PERMS | IO_PGTABLE_QUIRK_TLBI_ON_MAP | IO_PGTABLE_QUIRK_ARM_MTK_4GB | - IO_PGTABLE_QUIRK_NO_DMA | IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; @@ -779,8 +778,11 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, /* TTBRs */ cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) | ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS | - ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | - ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA); + (cfg->coherent_walk ? + (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | + ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) : + (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) | + ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC))); cfg->arm_v7s_cfg.ttbr[1] = 0; return &data->iop; @@ -835,7 +837,8 @@ static int __init arm_v7s_do_selftests(void) .tlb = &dummy_tlb_ops, .oas = 32, .ias = 32, - .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA, + .coherent_walk = true, + .quirks = IO_PGTABLE_QUIRK_ARM_NS, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, }; unsigned int iova, size, iova_start; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 4b6b2f3150a9..161a7d56264d 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -156,10 +156,12 @@ #define ARM_LPAE_MAIR_ATTR_MASK 0xff #define ARM_LPAE_MAIR_ATTR_DEVICE 0x04 #define ARM_LPAE_MAIR_ATTR_NC 0x44 +#define ARM_LPAE_MAIR_ATTR_INC_OWBRWA 0xf4 #define ARM_LPAE_MAIR_ATTR_WBRWA 0xff #define ARM_LPAE_MAIR_ATTR_IDX_NC 0 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2 +#define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE 3 #define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0) #define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2) @@ -239,7 +241,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, return NULL; pages = page_address(p); - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { + if (!cfg->coherent_walk) { dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -265,7 +267,7 @@ out_free: static void __arm_lpae_free_pages(void *pages, size_t size, struct io_pgtable_cfg *cfg) { - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), size, DMA_TO_DEVICE); free_pages((unsigned long)pages, get_order(size)); @@ -283,7 +285,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, { *ptep = pte; - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) __arm_lpae_sync_pte(ptep, cfg); } @@ -361,8 +363,7 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, old = cmpxchg64_relaxed(ptep, curr, new); - if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) || - (old & ARM_LPAE_PTE_SW_SYNC)) + if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC)) return old; /* Even if it's not ours, there's no point waiting; just kick it */ @@ -403,8 +404,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, pte = arm_lpae_install_table(cptep, ptep, 0, cfg); if (pte) __arm_lpae_free_pages(cptep, tblsz, cfg); - } else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) && - !(pte & ARM_LPAE_PTE_SW_SYNC)) { + } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) { __arm_lpae_sync_pte(ptep, cfg); } @@ -459,6 +459,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, else if (prot & IOMMU_CACHE) pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE << ARM_LPAE_PTE_ATTRINDX_SHIFT); + else if (prot & IOMMU_QCOM_SYS_CACHE) + pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE + << ARM_LPAE_PTE_ATTRINDX_SHIFT); } if (prot & IOMMU_NOEXEC) @@ -783,7 +786,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) u64 reg; struct arm_lpae_io_pgtable *data; - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA | + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; @@ -792,9 +795,15 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) return NULL; /* TCR */ - reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + if (cfg->coherent_walk) { + reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + } else { + reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) | + (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) | + (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT); + } switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: @@ -846,7 +855,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) (ARM_LPAE_MAIR_ATTR_WBRWA << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | (ARM_LPAE_MAIR_ATTR_DEVICE - << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) | + (ARM_LPAE_MAIR_ATTR_INC_OWBRWA + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE)); cfg->arm_lpae_s1_cfg.mair[0] = reg; cfg->arm_lpae_s1_cfg.mair[1] = 0; @@ -876,8 +887,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) struct arm_lpae_io_pgtable *data; /* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA | - IO_PGTABLE_QUIRK_NON_STRICT)) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -1212,7 +1222,7 @@ static int __init arm_lpae_do_selftests(void) struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, .oas = 48, - .quirks = IO_PGTABLE_QUIRK_NO_DMA, + .coherent_walk = true, }; for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9f0a2844371c..0c674d80c37f 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -61,10 +61,11 @@ struct iommu_group_attribute { }; static const char * const iommu_group_resv_type_string[] = { - [IOMMU_RESV_DIRECT] = "direct", - [IOMMU_RESV_RESERVED] = "reserved", - [IOMMU_RESV_MSI] = "msi", - [IOMMU_RESV_SW_MSI] = "msi", + [IOMMU_RESV_DIRECT] = "direct", + [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", + [IOMMU_RESV_RESERVED] = "reserved", + [IOMMU_RESV_MSI] = "msi", + [IOMMU_RESV_SW_MSI] = "msi", }; #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ @@ -95,15 +96,43 @@ void iommu_device_unregister(struct iommu_device *iommu) spin_unlock(&iommu_device_lock); } +static struct iommu_param *iommu_get_dev_param(struct device *dev) +{ + struct iommu_param *param = dev->iommu_param; + + if (param) + return param; + + param = kzalloc(sizeof(*param), GFP_KERNEL); + if (!param) + return NULL; + + mutex_init(¶m->lock); + dev->iommu_param = param; + return param; +} + +static void iommu_free_dev_param(struct device *dev) +{ + kfree(dev->iommu_param); + dev->iommu_param = NULL; +} + int iommu_probe_device(struct device *dev) { const struct iommu_ops *ops = dev->bus->iommu_ops; - int ret = -EINVAL; + int ret; WARN_ON(dev->iommu_group); + if (!ops) + return -EINVAL; - if (ops) - ret = ops->add_device(dev); + if (!iommu_get_dev_param(dev)) + return -ENOMEM; + + ret = ops->add_device(dev); + if (ret) + iommu_free_dev_param(dev); return ret; } @@ -114,6 +143,8 @@ void iommu_release_device(struct device *dev) if (dev->iommu_group) ops->remove_device(dev); + + iommu_free_dev_param(dev); } static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, @@ -225,18 +256,21 @@ static int iommu_insert_resv_region(struct iommu_resv_region *new, pos = pos->next; } else if ((start >= a) && (end <= b)) { if (new->type == type) - goto done; + return 0; else pos = pos->next; } else { if (new->type == type) { phys_addr_t new_start = min(a, start); phys_addr_t new_end = max(b, end); + int ret; list_del(&entry->list); entry->start = new_start; entry->length = new_end - new_start + 1; - iommu_insert_resv_region(entry, regions); + ret = iommu_insert_resv_region(entry, regions); + kfree(entry); + return ret; } else { pos = pos->next; } @@ -249,7 +283,6 @@ insert: return -ENOMEM; list_add_tail(®ion->list, pos); -done: return 0; } @@ -561,7 +594,8 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, start = ALIGN(entry->start, pg_size); end = ALIGN(entry->start + entry->length, pg_size); - if (entry->type != IOMMU_RESV_DIRECT) + if (entry->type != IOMMU_RESV_DIRECT && + entry->type != IOMMU_RESV_DIRECT_RELAXABLE) continue; for (addr = start; addr < end; addr += pg_size) { @@ -843,6 +877,206 @@ int iommu_group_unregister_notifier(struct iommu_group *group, EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); /** + * iommu_register_device_fault_handler() - Register a device fault handler + * @dev: the device + * @handler: the fault handler + * @data: private data passed as argument to the handler + * + * When an IOMMU fault event is received, this handler gets called with the + * fault event and data as argument. The handler should return 0 on success. If + * the fault is recoverable (IOMMU_FAULT_PAGE_REQ), the consumer should also + * complete the fault by calling iommu_page_response() with one of the following + * response code: + * - IOMMU_PAGE_RESP_SUCCESS: retry the translation + * - IOMMU_PAGE_RESP_INVALID: terminate the fault + * - IOMMU_PAGE_RESP_FAILURE: terminate the fault and stop reporting + * page faults if possible. + * + * Return 0 if the fault handler was installed successfully, or an error. + */ +int iommu_register_device_fault_handler(struct device *dev, + iommu_dev_fault_handler_t handler, + void *data) +{ + struct iommu_param *param = dev->iommu_param; + int ret = 0; + + if (!param) + return -EINVAL; + + mutex_lock(¶m->lock); + /* Only allow one fault handler registered for each device */ + if (param->fault_param) { + ret = -EBUSY; + goto done_unlock; + } + + get_device(dev); + param->fault_param = kzalloc(sizeof(*param->fault_param), GFP_KERNEL); + if (!param->fault_param) { + put_device(dev); + ret = -ENOMEM; + goto done_unlock; + } + param->fault_param->handler = handler; + param->fault_param->data = data; + mutex_init(¶m->fault_param->lock); + INIT_LIST_HEAD(¶m->fault_param->faults); + +done_unlock: + mutex_unlock(¶m->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler); + +/** + * iommu_unregister_device_fault_handler() - Unregister the device fault handler + * @dev: the device + * + * Remove the device fault handler installed with + * iommu_register_device_fault_handler(). + * + * Return 0 on success, or an error. + */ +int iommu_unregister_device_fault_handler(struct device *dev) +{ + struct iommu_param *param = dev->iommu_param; + int ret = 0; + + if (!param) + return -EINVAL; + + mutex_lock(¶m->lock); + + if (!param->fault_param) + goto unlock; + + /* we cannot unregister handler if there are pending faults */ + if (!list_empty(¶m->fault_param->faults)) { + ret = -EBUSY; + goto unlock; + } + + kfree(param->fault_param); + param->fault_param = NULL; + put_device(dev); +unlock: + mutex_unlock(¶m->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_unregister_device_fault_handler); + +/** + * iommu_report_device_fault() - Report fault event to device driver + * @dev: the device + * @evt: fault event data + * + * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ + * handler. When this function fails and the fault is recoverable, it is the + * caller's responsibility to complete the fault. + * + * Return 0 on success, or an error. + */ +int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) +{ + struct iommu_param *param = dev->iommu_param; + struct iommu_fault_event *evt_pending = NULL; + struct iommu_fault_param *fparam; + int ret = 0; + + if (!param || !evt) + return -EINVAL; + + /* we only report device fault if there is a handler registered */ + mutex_lock(¶m->lock); + fparam = param->fault_param; + if (!fparam || !fparam->handler) { + ret = -EINVAL; + goto done_unlock; + } + + if (evt->fault.type == IOMMU_FAULT_PAGE_REQ && + (evt->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { + evt_pending = kmemdup(evt, sizeof(struct iommu_fault_event), + GFP_KERNEL); + if (!evt_pending) { + ret = -ENOMEM; + goto done_unlock; + } + mutex_lock(&fparam->lock); + list_add_tail(&evt_pending->list, &fparam->faults); + mutex_unlock(&fparam->lock); + } + + ret = fparam->handler(&evt->fault, fparam->data); + if (ret && evt_pending) { + mutex_lock(&fparam->lock); + list_del(&evt_pending->list); + mutex_unlock(&fparam->lock); + kfree(evt_pending); + } +done_unlock: + mutex_unlock(¶m->lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_report_device_fault); + +int iommu_page_response(struct device *dev, + struct iommu_page_response *msg) +{ + bool pasid_valid; + int ret = -EINVAL; + struct iommu_fault_event *evt; + struct iommu_fault_page_request *prm; + struct iommu_param *param = dev->iommu_param; + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + + if (!domain || !domain->ops->page_response) + return -ENODEV; + + if (!param || !param->fault_param) + return -EINVAL; + + if (msg->version != IOMMU_PAGE_RESP_VERSION_1 || + msg->flags & ~IOMMU_PAGE_RESP_PASID_VALID) + return -EINVAL; + + /* Only send response if there is a fault report pending */ + mutex_lock(¶m->fault_param->lock); + if (list_empty(¶m->fault_param->faults)) { + dev_warn_ratelimited(dev, "no pending PRQ, drop response\n"); + goto done_unlock; + } + /* + * Check if we have a matching page request pending to respond, + * otherwise return -EINVAL + */ + list_for_each_entry(evt, ¶m->fault_param->faults, list) { + prm = &evt->fault.prm; + pasid_valid = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + + if ((pasid_valid && prm->pasid != msg->pasid) || + prm->grpid != msg->grpid) + continue; + + /* Sanitize the reply */ + msg->flags = pasid_valid ? IOMMU_PAGE_RESP_PASID_VALID : 0; + + ret = domain->ops->page_response(dev, evt, msg); + list_del(&evt->list); + kfree(evt); + break; + } + +done_unlock: + mutex_unlock(¶m->fault_param->lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_page_response); + +/** * iommu_group_id - Return ID for a group * @group: the group to ID * @@ -1895,24 +2129,23 @@ struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, return region; } -/* Request that a device is direct mapped by the IOMMU */ -int iommu_request_dm_for_dev(struct device *dev) +static int +request_default_domain_for_dev(struct device *dev, unsigned long type) { - struct iommu_domain *dm_domain; + struct iommu_domain *domain; struct iommu_group *group; int ret; /* Device must already be in a group before calling this function */ - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return PTR_ERR(group); + group = iommu_group_get(dev); + if (!group) + return -EINVAL; mutex_lock(&group->mutex); /* Check if the default domain is already direct mapped */ ret = 0; - if (group->default_domain && - group->default_domain->type == IOMMU_DOMAIN_IDENTITY) + if (group->default_domain && group->default_domain->type == type) goto out; /* Don't change mappings of existing devices */ @@ -1922,23 +2155,26 @@ int iommu_request_dm_for_dev(struct device *dev) /* Allocate a direct mapped domain */ ret = -ENOMEM; - dm_domain = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_IDENTITY); - if (!dm_domain) + domain = __iommu_domain_alloc(dev->bus, type); + if (!domain) goto out; /* Attach the device to the domain */ - ret = __iommu_attach_group(dm_domain, group); + ret = __iommu_attach_group(domain, group); if (ret) { - iommu_domain_free(dm_domain); + iommu_domain_free(domain); goto out; } + iommu_group_create_direct_mappings(group, dev); + /* Make the direct mapped domain the default for this group */ if (group->default_domain) iommu_domain_free(group->default_domain); - group->default_domain = dm_domain; + group->default_domain = domain; - dev_info(dev, "Using iommu direct mapping\n"); + dev_info(dev, "Using iommu %s mapping\n", + type == IOMMU_DOMAIN_DMA ? "dma" : "direct"); ret = 0; out: @@ -1948,6 +2184,18 @@ out: return ret; } +/* Request that a device is direct mapped by the IOMMU */ +int iommu_request_dm_for_dev(struct device *dev) +{ + return request_default_domain_for_dev(dev, IOMMU_DOMAIN_IDENTITY); +} + +/* Request that a device can't be direct mapped by the IOMMU */ +int iommu_request_dma_domain_for_dev(struct device *dev) +{ + return request_default_domain_for_dev(dev, IOMMU_DOMAIN_DMA); +} + const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) { const struct iommu_ops *ops = NULL; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 9a380c10655e..ad0098c0c87c 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -36,12 +36,16 @@ #define arm_iommu_detach_device(...) do {} while (0) #endif -#define IPMMU_CTX_MAX 8 +#define IPMMU_CTX_MAX 8U +#define IPMMU_CTX_INVALID -1 + +#define IPMMU_UTLB_MAX 48U struct ipmmu_features { bool use_ns_alias_offset; bool has_cache_leaf_nodes; unsigned int number_of_contexts; + unsigned int num_utlbs; bool setup_imbuscr; bool twobit_imttbcr_sl0; bool reserved_context; @@ -53,11 +57,11 @@ struct ipmmu_vmsa_device { struct iommu_device iommu; struct ipmmu_vmsa_device *root; const struct ipmmu_features *features; - unsigned int num_utlbs; unsigned int num_ctx; spinlock_t lock; /* Protects ctx and domains[] */ DECLARE_BITMAP(ctx, IPMMU_CTX_MAX); struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX]; + s8 utlb_ctx[IPMMU_UTLB_MAX]; struct iommu_group *group; struct dma_iommu_mapping *mapping; @@ -186,7 +190,8 @@ static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev) #define IMMAIR_ATTR_IDX_WBRWA 1 #define IMMAIR_ATTR_IDX_DEV 2 -#define IMEAR 0x0030 +#define IMELAR 0x0030 /* IMEAR on R-Car Gen2 */ +#define IMEUAR 0x0034 /* R-Car Gen3 only */ #define IMPCTR 0x0200 #define IMPSTR 0x0208 @@ -334,6 +339,7 @@ static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain, ipmmu_write(mmu, IMUCTR(utlb), IMUCTR_TTSEL_MMU(domain->context_id) | IMUCTR_FLUSH | IMUCTR_MMUEN); + mmu->utlb_ctx[utlb] = domain->context_id; } /* @@ -345,6 +351,7 @@ static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain, struct ipmmu_vmsa_device *mmu = domain->mmu; ipmmu_write(mmu, IMUCTR(utlb), 0); + mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID; } static void ipmmu_tlb_flush_all(void *cookie) @@ -403,52 +410,10 @@ static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu, spin_unlock_irqrestore(&mmu->lock, flags); } -static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) +static void ipmmu_domain_setup_context(struct ipmmu_vmsa_domain *domain) { u64 ttbr; u32 tmp; - int ret; - - /* - * Allocate the page table operations. - * - * VMSA states in section B3.6.3 "Control of Secure or Non-secure memory - * access, Long-descriptor format" that the NStable bit being set in a - * table descriptor will result in the NStable and NS bits of all child - * entries being ignored and considered as being set. The IPMMU seems - * not to comply with this, as it generates a secure access page fault - * if any of the NStable and NS bits isn't set when running in - * non-secure mode. - */ - domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS; - domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K; - domain->cfg.ias = 32; - domain->cfg.oas = 40; - domain->cfg.tlb = &ipmmu_gather_ops; - domain->io_domain.geometry.aperture_end = DMA_BIT_MASK(32); - domain->io_domain.geometry.force_aperture = true; - /* - * TODO: Add support for coherent walk through CCI with DVM and remove - * cache handling. For now, delegate it to the io-pgtable code. - */ - domain->cfg.iommu_dev = domain->mmu->root->dev; - - /* - * Find an unused context. - */ - ret = ipmmu_domain_allocate_context(domain->mmu->root, domain); - if (ret < 0) - return ret; - - domain->context_id = ret; - - domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, - domain); - if (!domain->iop) { - ipmmu_domain_free_context(domain->mmu->root, - domain->context_id); - return -EINVAL; - } /* TTBR0 */ ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; @@ -494,7 +459,55 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) */ ipmmu_ctx_write_all(domain, IMCTR, IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN); +} + +static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) +{ + int ret; + + /* + * Allocate the page table operations. + * + * VMSA states in section B3.6.3 "Control of Secure or Non-secure memory + * access, Long-descriptor format" that the NStable bit being set in a + * table descriptor will result in the NStable and NS bits of all child + * entries being ignored and considered as being set. The IPMMU seems + * not to comply with this, as it generates a secure access page fault + * if any of the NStable and NS bits isn't set when running in + * non-secure mode. + */ + domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS; + domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K; + domain->cfg.ias = 32; + domain->cfg.oas = 40; + domain->cfg.tlb = &ipmmu_gather_ops; + domain->io_domain.geometry.aperture_end = DMA_BIT_MASK(32); + domain->io_domain.geometry.force_aperture = true; + /* + * TODO: Add support for coherent walk through CCI with DVM and remove + * cache handling. For now, delegate it to the io-pgtable code. + */ + domain->cfg.coherent_walk = false; + domain->cfg.iommu_dev = domain->mmu->root->dev; + + /* + * Find an unused context. + */ + ret = ipmmu_domain_allocate_context(domain->mmu->root, domain); + if (ret < 0) + return ret; + + domain->context_id = ret; + + domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, + domain); + if (!domain->iop) { + ipmmu_domain_free_context(domain->mmu->root, + domain->context_id); + return -EINVAL; + } + ipmmu_domain_setup_context(domain); return 0; } @@ -522,14 +535,16 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) { const u32 err_mask = IMSTR_MHIT | IMSTR_ABORT | IMSTR_PF | IMSTR_TF; struct ipmmu_vmsa_device *mmu = domain->mmu; + unsigned long iova; u32 status; - u32 iova; status = ipmmu_ctx_read_root(domain, IMSTR); if (!(status & err_mask)) return IRQ_NONE; - iova = ipmmu_ctx_read_root(domain, IMEAR); + iova = ipmmu_ctx_read_root(domain, IMELAR); + if (IS_ENABLED(CONFIG_64BIT)) + iova |= (u64)ipmmu_ctx_read_root(domain, IMEUAR) << 32; /* * Clear the error status flags. Unlike traditional interrupt flag @@ -541,10 +556,10 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) /* Log fatal errors. */ if (status & IMSTR_MHIT) - dev_err_ratelimited(mmu->dev, "Multiple TLB hits @0x%08x\n", + dev_err_ratelimited(mmu->dev, "Multiple TLB hits @0x%lx\n", iova); if (status & IMSTR_ABORT) - dev_err_ratelimited(mmu->dev, "Page Table Walk Abort @0x%08x\n", + dev_err_ratelimited(mmu->dev, "Page Table Walk Abort @0x%lx\n", iova); if (!(status & (IMSTR_PF | IMSTR_TF))) @@ -560,7 +575,7 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain) return IRQ_HANDLED; dev_err_ratelimited(mmu->dev, - "Unhandled fault: status 0x%08x iova 0x%08x\n", + "Unhandled fault: status 0x%08x iova 0x%lx\n", status, iova); return IRQ_HANDLED; @@ -885,27 +900,37 @@ error: static int ipmmu_add_device(struct device *dev) { + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); struct iommu_group *group; + int ret; /* * Only let through devices that have been verified in xlate() */ - if (!to_ipmmu(dev)) + if (!mmu) return -ENODEV; - if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)) - return ipmmu_init_arm_mapping(dev); + if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)) { + ret = ipmmu_init_arm_mapping(dev); + if (ret) + return ret; + } else { + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return PTR_ERR(group); + iommu_group_put(group); + } - iommu_group_put(group); + iommu_device_link(&mmu->iommu, dev); return 0; } static void ipmmu_remove_device(struct device *dev) { + struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); + + iommu_device_unlink(&mmu->iommu, dev); arm_iommu_detach_device(dev); iommu_group_remove_device(dev); } @@ -959,6 +984,7 @@ static const struct ipmmu_features ipmmu_features_default = { .use_ns_alias_offset = true, .has_cache_leaf_nodes = false, .number_of_contexts = 1, /* software only tested with one context */ + .num_utlbs = 32, .setup_imbuscr = true, .twobit_imttbcr_sl0 = false, .reserved_context = false, @@ -968,6 +994,7 @@ static const struct ipmmu_features ipmmu_features_rcar_gen3 = { .use_ns_alias_offset = false, .has_cache_leaf_nodes = true, .number_of_contexts = 8, + .num_utlbs = 48, .setup_imbuscr = false, .twobit_imttbcr_sl0 = true, .reserved_context = true, @@ -1020,10 +1047,10 @@ static int ipmmu_probe(struct platform_device *pdev) } mmu->dev = &pdev->dev; - mmu->num_utlbs = 48; spin_lock_init(&mmu->lock); bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); mmu->features = of_device_get_match_data(&pdev->dev); + memset(mmu->utlb_ctx, IPMMU_CTX_INVALID, mmu->features->num_utlbs); dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); /* Map I/O memory and request IRQ. */ @@ -1047,8 +1074,7 @@ static int ipmmu_probe(struct platform_device *pdev) if (mmu->features->use_ns_alias_offset) mmu->base += IM_NS_ALIAS_OFFSET; - mmu->num_ctx = min_t(unsigned int, IPMMU_CTX_MAX, - mmu->features->number_of_contexts); + mmu->num_ctx = min(IPMMU_CTX_MAX, mmu->features->number_of_contexts); irq = platform_get_irq(pdev, 0); @@ -1140,10 +1166,48 @@ static int ipmmu_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +static int ipmmu_resume_noirq(struct device *dev) +{ + struct ipmmu_vmsa_device *mmu = dev_get_drvdata(dev); + unsigned int i; + + /* Reset root MMU and restore contexts */ + if (ipmmu_is_root(mmu)) { + ipmmu_device_reset(mmu); + + for (i = 0; i < mmu->num_ctx; i++) { + if (!mmu->domains[i]) + continue; + + ipmmu_domain_setup_context(mmu->domains[i]); + } + } + + /* Re-enable active micro-TLBs */ + for (i = 0; i < mmu->features->num_utlbs; i++) { + if (mmu->utlb_ctx[i] == IPMMU_CTX_INVALID) + continue; + + ipmmu_utlb_enable(mmu->root->domains[mmu->utlb_ctx[i]], i); + } + + return 0; +} + +static const struct dev_pm_ops ipmmu_pm = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(NULL, ipmmu_resume_noirq) +}; +#define DEV_PM_OPS &ipmmu_pm +#else +#define DEV_PM_OPS NULL +#endif /* CONFIG_PM_SLEEP */ + static struct platform_driver ipmmu_driver = { .driver = { .name = "ipmmu-vmsa", .of_match_table = of_match_ptr(ipmmu_of_ids), + .pm = DEV_PM_OPS, }, .probe = ipmmu_probe, .remove = ipmmu_remove, diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c index ff31bddba60a..8e19bfa94121 100644 --- a/drivers/iommu/omap-iommu-debug.c +++ b/drivers/iommu/omap-iommu-debug.c @@ -236,17 +236,6 @@ DEBUG_FOPS_RO(regs); DEFINE_SHOW_ATTRIBUTE(tlb); DEFINE_SHOW_ATTRIBUTE(pagetable); -#define __DEBUG_ADD_FILE(attr, mode) \ - { \ - struct dentry *dent; \ - dent = debugfs_create_file(#attr, mode, obj->debug_dir, \ - obj, &attr##_fops); \ - if (!dent) \ - goto err; \ - } - -#define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 0400) - void omap_iommu_debugfs_add(struct omap_iommu *obj) { struct dentry *d; @@ -254,23 +243,13 @@ void omap_iommu_debugfs_add(struct omap_iommu *obj) if (!iommu_debug_root) return; - obj->debug_dir = debugfs_create_dir(obj->name, iommu_debug_root); - if (!obj->debug_dir) - return; + d = debugfs_create_dir(obj->name, iommu_debug_root); + obj->debug_dir = d; - d = debugfs_create_u32("nr_tlb_entries", 0400, obj->debug_dir, - &obj->nr_tlb_entries); - if (!d) - return; - - DEBUG_ADD_FILE_RO(regs); - DEBUG_ADD_FILE_RO(tlb); - DEBUG_ADD_FILE_RO(pagetable); - - return; - -err: - debugfs_remove_recursive(obj->debug_dir); + debugfs_create_u32("nr_tlb_entries", 0400, d, &obj->nr_tlb_entries); + debugfs_create_file("regs", 0400, d, obj, ®s_fops); + debugfs_create_file("tlb", 0400, d, obj, &tlb_fops); + debugfs_create_file("pagetable", 0400, d, obj, &pagetable_fops); } void omap_iommu_debugfs_remove(struct omap_iommu *obj) @@ -284,8 +263,6 @@ void omap_iommu_debugfs_remove(struct omap_iommu *obj) void __init omap_iommu_debugfs_init(void) { iommu_debug_root = debugfs_create_dir("omap_iommu", NULL); - if (!iommu_debug_root) - pr_err("can't create debugfs dir\n"); } void __exit omap_iommu_debugfs_exit(void) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 62f9c61338a5..dfb961d8c21b 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -35,8 +35,7 @@ static const struct iommu_ops omap_iommu_ops; -#define to_iommu(dev) \ - ((struct omap_iommu *)platform_get_drvdata(to_platform_device(dev))) +#define to_iommu(dev) ((struct omap_iommu *)dev_get_drvdata(dev)) /* bitmap of the page sizes currently supported */ #define OMAP_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M) diff --git a/drivers/irqchip/irq-csky-mpintc.c b/drivers/irqchip/irq-csky-mpintc.c index c67c961ab6cc..a4c1aacba1ff 100644 --- a/drivers/irqchip/irq-csky-mpintc.c +++ b/drivers/irqchip/irq-csky-mpintc.c @@ -89,8 +89,19 @@ static int csky_irq_set_affinity(struct irq_data *d, if (cpu >= nr_cpu_ids) return -EINVAL; - /* Enable interrupt destination */ - cpu |= BIT(31); + /* + * The csky,mpintc could support auto irq deliver, but it only + * could deliver external irq to one cpu or all cpus. So it + * doesn't support deliver external irq to a group of cpus + * with cpu_mask. + * SO we only use auto deliver mode when affinity mask_val is + * equal to cpu_present_mask. + * + */ + if (cpumask_equal(mask_val, cpu_present_mask)) + cpu = 0; + else + cpu |= BIT(31); writel_relaxed(cpu, INTCG_base + INTCG_CIDSTR + offset); diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d29b44b677e4..35500801dc2b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -733,32 +733,43 @@ static void its_flush_cmd(struct its_node *its, struct its_cmd_block *cmd) } static int its_wait_for_range_completion(struct its_node *its, - struct its_cmd_block *from, + u64 prev_idx, struct its_cmd_block *to) { - u64 rd_idx, from_idx, to_idx; + u64 rd_idx, to_idx, linear_idx; u32 count = 1000000; /* 1s! */ - from_idx = its_cmd_ptr_to_offset(its, from); + /* Linearize to_idx if the command set has wrapped around */ to_idx = its_cmd_ptr_to_offset(its, to); + if (to_idx < prev_idx) + to_idx += ITS_CMD_QUEUE_SZ; + + linear_idx = prev_idx; while (1) { + s64 delta; + rd_idx = readl_relaxed(its->base + GITS_CREADR); - /* Direct case */ - if (from_idx < to_idx && rd_idx >= to_idx) - break; + /* + * Compute the read pointer progress, taking the + * potential wrap-around into account. + */ + delta = rd_idx - prev_idx; + if (rd_idx < prev_idx) + delta += ITS_CMD_QUEUE_SZ; - /* Wrapped case */ - if (from_idx >= to_idx && rd_idx >= to_idx && rd_idx < from_idx) + linear_idx += delta; + if (linear_idx >= to_idx) break; count--; if (!count) { - pr_err_ratelimited("ITS queue timeout (%llu %llu %llu)\n", - from_idx, to_idx, rd_idx); + pr_err_ratelimited("ITS queue timeout (%llu %llu)\n", + to_idx, linear_idx); return -1; } + prev_idx = rd_idx; cpu_relax(); udelay(1); } @@ -775,6 +786,7 @@ void name(struct its_node *its, \ struct its_cmd_block *cmd, *sync_cmd, *next_cmd; \ synctype *sync_obj; \ unsigned long flags; \ + u64 rd_idx; \ \ raw_spin_lock_irqsave(&its->lock, flags); \ \ @@ -796,10 +808,11 @@ void name(struct its_node *its, \ } \ \ post: \ + rd_idx = readl_relaxed(its->base + GITS_CREADR); \ next_cmd = its_post_commands(its); \ raw_spin_unlock_irqrestore(&its->lock, flags); \ \ - if (its_wait_for_range_completion(its, cmd, next_cmd)) \ + if (its_wait_for_range_completion(its, rd_idx, next_cmd)) \ pr_err_ratelimited("ITS cmd %ps failed\n", builder); \ } diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index d32268cc1174..f3985469c221 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -388,7 +388,7 @@ static void gic_all_vpes_irq_cpu_online(struct irq_data *d) intr = GIC_HWIRQ_TO_LOCAL(d->hwirq); cd = irq_data_get_irq_chip_data(d); - write_gic_vl_map(intr, cd->map); + write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map); if (cd->mask) write_gic_vl_smask(BIT(intr)); } @@ -517,7 +517,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, spin_lock_irqsave(&gic_lock, flags); for_each_online_cpu(cpu) { write_gic_vl_other(mips_cm_vp_id(cpu)); - write_gic_vo_map(intr, map); + write_gic_vo_map(mips_gic_vx_map_reg(intr), map); } spin_unlock_irqrestore(&gic_lock, flags); diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c index 011b60a49e3f..ef4d625d2d80 100644 --- a/drivers/irqchip/irq-ti-sci-inta.c +++ b/drivers/irqchip/irq-ti-sci-inta.c @@ -159,9 +159,9 @@ static struct ti_sci_inta_vint_desc *ti_sci_inta_alloc_parent_irq(struct irq_dom parent_fwspec.param[1] = vint_desc->vint_id; parent_virq = irq_create_fwspec_mapping(&parent_fwspec); - if (parent_virq <= 0) { + if (parent_virq == 0) { kfree(vint_desc); - return ERR_PTR(parent_virq); + return ERR_PTR(-EINVAL); } vint_desc->parent_virq = parent_virq; diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index 352e803f566e..728733a514c7 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -140,8 +140,8 @@ static char __init *dm_parse_table_entry(struct dm_device *dev, char *str) return ERR_PTR(-EINVAL); } /* target_args */ - dev->target_args_array[n] = kstrndup(field[3], GFP_KERNEL, - DM_MAX_STR_SIZE); + dev->target_args_array[n] = kstrndup(field[3], DM_MAX_STR_SIZE, + GFP_KERNEL); if (!dev->target_args_array[n]) return ERR_PTR(-ENOMEM); @@ -272,10 +272,10 @@ static int __init dm_init_init(void) return 0; if (strlen(create) >= DM_MAX_STR_SIZE) { - DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE); + DMERR("Argument is too big. Limit is %d", DM_MAX_STR_SIZE); return -EINVAL; } - str = kstrndup(create, GFP_KERNEL, DM_MAX_STR_SIZE); + str = kstrndup(create, DM_MAX_STR_SIZE, GFP_KERNEL); if (!str) return -ENOMEM; @@ -283,7 +283,7 @@ static int __init dm_init_init(void) if (r) goto out; - DMINFO("waiting for all devices to be available before creating mapped devices\n"); + DMINFO("waiting for all devices to be available before creating mapped devices"); wait_for_device_probe(); list_for_each_entry(dev, &devices, list) { diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 9ea2b0291f20..e549392e0ea5 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -60,6 +60,7 @@ #define WRITE_LOG_VERSION 1ULL #define WRITE_LOG_MAGIC 0x6a736677736872ULL +#define WRITE_LOG_SUPER_SECTOR 0 /* * The disk format for this is braindead simple. @@ -115,6 +116,7 @@ struct log_writes_c { struct list_head logging_blocks; wait_queue_head_t wait; struct task_struct *log_kthread; + struct completion super_done; }; struct pending_block { @@ -180,6 +182,14 @@ static void log_end_io(struct bio *bio) bio_put(bio); } +static void log_end_super(struct bio *bio) +{ + struct log_writes_c *lc = bio->bi_private; + + complete(&lc->super_done); + log_end_io(bio); +} + /* * Meant to be called if there is an error, it will free all the pages * associated with the block. @@ -215,7 +225,8 @@ static int write_metadata(struct log_writes_c *lc, void *entry, bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; bio_set_dev(bio, lc->logdev->bdev); - bio->bi_end_io = log_end_io; + bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ? + log_end_super : log_end_io; bio->bi_private = lc; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -418,11 +429,18 @@ static int log_super(struct log_writes_c *lc) super.nr_entries = cpu_to_le64(lc->logged_entries); super.sectorsize = cpu_to_le32(lc->sectorsize); - if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) { + if (write_metadata(lc, &super, sizeof(super), NULL, 0, + WRITE_LOG_SUPER_SECTOR)) { DMERR("Couldn't write super"); return -1; } + /* + * Super sector should be writen in-order, otherwise the + * nr_entries could be rewritten incorrectly by an old bio. + */ + wait_for_completion_io(&lc->super_done); + return 0; } @@ -531,6 +549,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_LIST_HEAD(&lc->unflushed_blocks); INIT_LIST_HEAD(&lc->logging_blocks); init_waitqueue_head(&lc->wait); + init_completion(&lc->super_done); atomic_set(&lc->io_blocks, 0); atomic_set(&lc->pending_blocks, 0); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 350cf0451456..ec8b27e20de3 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -561,7 +561,7 @@ static char **realloc_argv(unsigned *size, char **old_argv) gfp = GFP_NOIO; } argv = kmalloc_array(new_size, sizeof(*argv), gfp); - if (argv) { + if (argv && old_argv) { memcpy(argv, old_argv, *size * sizeof(*argv)); *size = new_size; } diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 720d06531aa3..ea24ff0612e3 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -235,8 +235,8 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, BUG(); } - DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str, - block); + DMERR_LIMIT("%s: %s block %llu is corrupted", v->data_dev->name, + type_str, block); if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS) DMERR("%s: reached maximum errors", v->data_dev->name); diff --git a/drivers/mfd/stmfx.c b/drivers/mfd/stmfx.c index fe8efba2d45f..857991cb3cbb 100644 --- a/drivers/mfd/stmfx.c +++ b/drivers/mfd/stmfx.c @@ -204,12 +204,11 @@ static struct irq_chip stmfx_irq_chip = { static irqreturn_t stmfx_irq_handler(int irq, void *data) { struct stmfx *stmfx = data; - unsigned long n, pending; - u32 ack; - int ret; + unsigned long bits; + u32 pending, ack; + int n, ret; - ret = regmap_read(stmfx->map, STMFX_REG_IRQ_PENDING, - (u32 *)&pending); + ret = regmap_read(stmfx->map, STMFX_REG_IRQ_PENDING, &pending); if (ret) return IRQ_NONE; @@ -224,7 +223,8 @@ static irqreturn_t stmfx_irq_handler(int irq, void *data) return IRQ_NONE; } - for_each_set_bit(n, &pending, STMFX_REG_IRQ_SRC_MAX) + bits = pending; + for_each_set_bit(n, &bits, STMFX_REG_IRQ_SRC_MAX) handle_nested_irq(irq_find_mapping(stmfx->irq_domain, n)); return IRQ_HANDLED; diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index b5b68aa16eb3..6eb131292eb2 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -4662,7 +4662,6 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type) memorg = nanddev_get_memorg(&chip->base); memorg->planes_per_lun = 1; memorg->luns_per_target = 1; - memorg->ntargets = 1; /* * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx) @@ -5027,6 +5026,8 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, if (ret) return ret; + memorg->ntargets = maxchips; + /* Read the flash type */ ret = nand_detect(chip, table); if (ret) { diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 73172d7f512b..0c2ec1c21434 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1636,6 +1636,95 @@ static int sr2_bit7_quad_enable(struct spi_nor *nor) return 0; } +/** + * spi_nor_clear_sr_bp() - clear the Status Register Block Protection bits. + * @nor: pointer to a 'struct spi_nor' + * + * Read-modify-write function that clears the Block Protection bits from the + * Status Register without affecting other bits. + * + * Return: 0 on success, -errno otherwise. + */ +static int spi_nor_clear_sr_bp(struct spi_nor *nor) +{ + int ret; + u8 mask = SR_BP2 | SR_BP1 | SR_BP0; + + ret = read_sr(nor); + if (ret < 0) { + dev_err(nor->dev, "error while reading status register\n"); + return ret; + } + + write_enable(nor); + + ret = write_sr(nor, ret & ~mask); + if (ret) { + dev_err(nor->dev, "write to status register failed\n"); + return ret; + } + + ret = spi_nor_wait_till_ready(nor); + if (ret) + dev_err(nor->dev, "timeout while writing status register\n"); + return ret; +} + +/** + * spi_nor_spansion_clear_sr_bp() - clear the Status Register Block Protection + * bits on spansion flashes. + * @nor: pointer to a 'struct spi_nor' + * + * Read-modify-write function that clears the Block Protection bits from the + * Status Register without affecting other bits. The function is tightly + * coupled with the spansion_quad_enable() function. Both assume that the Write + * Register with 16 bits, together with the Read Configuration Register (35h) + * instructions are supported. + * + * Return: 0 on success, -errno otherwise. + */ +static int spi_nor_spansion_clear_sr_bp(struct spi_nor *nor) +{ + int ret; + u8 mask = SR_BP2 | SR_BP1 | SR_BP0; + u8 sr_cr[2] = {0}; + + /* Check current Quad Enable bit value. */ + ret = read_cr(nor); + if (ret < 0) { + dev_err(nor->dev, + "error while reading configuration register\n"); + return ret; + } + + /* + * When the configuration register Quad Enable bit is one, only the + * Write Status (01h) command with two data bytes may be used. + */ + if (ret & CR_QUAD_EN_SPAN) { + sr_cr[1] = ret; + + ret = read_sr(nor); + if (ret < 0) { + dev_err(nor->dev, + "error while reading status register\n"); + return ret; + } + sr_cr[0] = ret & ~mask; + + ret = write_sr_cr(nor, sr_cr); + if (ret) + dev_err(nor->dev, "16-bit write register failed\n"); + return ret; + } + + /* + * If the Quad Enable bit is zero, use the Write Status (01h) command + * with one data byte. + */ + return spi_nor_clear_sr_bp(nor); +} + /* Used when the "_ext_id" is two bytes at most */ #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags) \ .id = { \ @@ -3660,6 +3749,8 @@ static int spi_nor_init_params(struct spi_nor *nor, default: /* Kept only for backward compatibility purpose. */ params->quad_enable = spansion_quad_enable; + if (nor->clear_sr_bp) + nor->clear_sr_bp = spi_nor_spansion_clear_sr_bp; break; } @@ -3912,17 +4003,13 @@ static int spi_nor_init(struct spi_nor *nor) { int err; - /* - * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up - * with the software protection bits set - */ - if (JEDEC_MFR(nor->info) == SNOR_MFR_ATMEL || - JEDEC_MFR(nor->info) == SNOR_MFR_INTEL || - JEDEC_MFR(nor->info) == SNOR_MFR_SST || - nor->info->flags & SPI_NOR_HAS_LOCK) { - write_enable(nor); - write_sr(nor, 0); - spi_nor_wait_till_ready(nor); + if (nor->clear_sr_bp) { + err = nor->clear_sr_bp(nor); + if (err) { + dev_err(nor->dev, + "fail to clear block protection bits\n"); + return err; + } } if (nor->quad_enable) { @@ -4047,6 +4134,16 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, if (info->flags & SPI_S3AN) nor->flags |= SNOR_F_READY_XSR_RDY; + /* + * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up + * with the software protection bits set. + */ + if (JEDEC_MFR(nor->info) == SNOR_MFR_ATMEL || + JEDEC_MFR(nor->info) == SNOR_MFR_INTEL || + JEDEC_MFR(nor->info) == SNOR_MFR_SST || + nor->info->flags & SPI_NOR_HAS_LOCK) + nor->clear_sr_bp = spi_nor_clear_sr_bp; + /* Parse the Serial Flash Discoverable Parameters table. */ ret = spi_nor_init_params(nor, ¶ms); if (ret) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 407f4095a37a..799fc38c5c34 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4320,12 +4320,12 @@ void bond_setup(struct net_device *bond_dev) bond_dev->features |= NETIF_F_NETNS_LOCAL; bond_dev->hw_features = BOND_VLAN_FEATURES | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; bond_dev->features |= bond_dev->hw_features; + bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; } /* Destroy a bonding device. diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index f46086fa9064..db91b213eae1 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -436,9 +436,9 @@ int ksz_switch_register(struct ksz_device *dev, return PTR_ERR(dev->reset_gpio); if (dev->reset_gpio) { - gpiod_set_value(dev->reset_gpio, 1); + gpiod_set_value_cansleep(dev->reset_gpio, 1); mdelay(10); - gpiod_set_value(dev->reset_gpio, 0); + gpiod_set_value_cansleep(dev->reset_gpio, 0); } mutex_init(&dev->dev_mutex); @@ -487,7 +487,7 @@ void ksz_switch_remove(struct ksz_device *dev) dsa_unregister_switch(dev->ds); if (dev->reset_gpio) - gpiod_set_value(dev->reset_gpio, 1); + gpiod_set_value_cansleep(dev->reset_gpio, 1); } EXPORT_SYMBOL(ksz_switch_remove); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index 18bc035da850..1fff462a4175 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -843,9 +843,14 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic) return err; if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { - if (hweight < AQ_VLAN_MAX_FILTERS) - err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, true); + if (hweight < AQ_VLAN_MAX_FILTERS && hweight > 0) { + err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, + !(aq_nic->packet_filter & IFF_PROMISC)); + aq_nic->aq_nic_cfg.is_vlan_force_promisc = false; + } else { /* otherwise left in promiscue mode */ + aq_nic->aq_nic_cfg.is_vlan_force_promisc = true; + } } return err; @@ -866,6 +871,7 @@ int aq_filters_vlan_offload_off(struct aq_nic_s *aq_nic) if (unlikely(!aq_hw_ops->hw_filter_vlan_ctrl)) return -EOPNOTSUPP; + aq_nic->aq_nic_cfg.is_vlan_force_promisc = true; err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false); if (err) return err; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 0da5e161ec5d..41172fbebddd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -126,6 +126,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self) cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk; cfg->features = cfg->aq_hw_caps->hw_features; + cfg->is_vlan_force_promisc = true; } static int aq_nic_update_link_status(struct aq_nic_s *self) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index eb2e3c7c36f9..0f22f5d5691b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -35,6 +35,7 @@ struct aq_nic_cfg_s { u32 flow_control; u32 link_speed_msk; u32 wol; + bool is_vlan_force_promisc; u16 is_mc_list_enabled; u16 mc_list_count; bool is_autoneg; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 1c7593d54035..13ac2661a473 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -778,8 +778,15 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, unsigned int packet_filter) { unsigned int i = 0U; + struct aq_nic_cfg_s *cfg = self->aq_nic_cfg; + + hw_atl_rpfl2promiscuous_mode_en_set(self, + IS_FILTER_ENABLED(IFF_PROMISC)); + + hw_atl_rpf_vlan_prom_mode_en_set(self, + IS_FILTER_ENABLED(IFF_PROMISC) || + cfg->is_vlan_force_promisc); - hw_atl_rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC)); hw_atl_rpfl2multicast_flr_en_set(self, IS_FILTER_ENABLED(IFF_ALLMULTI), 0); @@ -788,13 +795,13 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST)); - self->aq_nic_cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST); + cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST); for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i) hw_atl_rpfl2_uc_flr_en_set(self, - (self->aq_nic_cfg->is_mc_list_enabled && - (i <= self->aq_nic_cfg->mc_list_count)) ? - 1U : 0U, i); + (cfg->is_mc_list_enabled && + (i <= cfg->mc_list_count)) ? + 1U : 0U, i); return aq_hw_err_from_flags(self); } @@ -1086,7 +1093,7 @@ static int hw_atl_b0_hw_vlan_set(struct aq_hw_s *self, static int hw_atl_b0_hw_vlan_ctrl(struct aq_hw_s *self, bool enable) { /* set promisc in case of disabing the vland filter */ - hw_atl_rpf_vlan_prom_mode_en_set(self, !!!enable); + hw_atl_rpf_vlan_prom_mode_en_set(self, !enable); return aq_hw_err_from_flags(self); } diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 2375a13bb446..262a28ff81fc 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4180,7 +4180,7 @@ static int macb_probe(struct platform_device *pdev) if (PTR_ERR(mac) == -EPROBE_DEFER) { err = -EPROBE_DEFER; goto err_out_free_netdev; - } else if (!IS_ERR(mac)) { + } else if (!IS_ERR_OR_NULL(mac)) { ether_addr_copy(bp->dev->dev_addr, mac); } else { macb_get_hwaddr(bp); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 8a6785173228..492f8769ac12 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -891,7 +891,7 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data) { struct be_adapter *adapter = netdev_priv(netdev); - int status; + int status, cnt; u8 link_status = 0; if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) { @@ -902,6 +902,9 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test, memset(data, 0, sizeof(u64) * ETHTOOL_TESTS_NUM); + /* check link status before offline tests */ + link_status = netif_carrier_ok(netdev); + if (test->flags & ETH_TEST_FL_OFFLINE) { if (be_loopback_test(adapter, BE_MAC_LOOPBACK, &data[0]) != 0) test->flags |= ETH_TEST_FL_FAILED; @@ -922,13 +925,26 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test, test->flags |= ETH_TEST_FL_FAILED; } - status = be_cmd_link_status_query(adapter, NULL, &link_status, 0); - if (status) { - test->flags |= ETH_TEST_FL_FAILED; - data[4] = -1; - } else if (!link_status) { + /* link status was down prior to test */ + if (!link_status) { test->flags |= ETH_TEST_FL_FAILED; data[4] = 1; + return; + } + + for (cnt = 10; cnt; cnt--) { + status = be_cmd_link_status_query(adapter, NULL, &link_status, + 0); + if (status) { + test->flags |= ETH_TEST_FL_FAILED; + data[4] = -1; + break; + } + + if (link_status) + break; + + msleep_interruptible(500); } } diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 67f9bb6e941b..9b036c857b1d 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1057,7 +1057,7 @@ sis900_open(struct net_device *net_dev) sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED); /* Enable all known interrupts by setting the interrupt mask. */ - sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE); + sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC); sw32(cr, RxENA | sr32(cr)); sw32(ier, IE); @@ -1578,7 +1578,7 @@ static void sis900_tx_timeout(struct net_device *net_dev) sw32(txdp, sis_priv->tx_ring_dma); /* Enable all known interrupts by setting the interrupt mask. */ - sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE); + sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC); } /** @@ -1618,7 +1618,7 @@ sis900_start_xmit(struct sk_buff *skb, struct net_device *net_dev) spin_unlock_irqrestore(&sis_priv->lock, flags); return NETDEV_TX_OK; } - sis_priv->tx_ring[entry].cmdsts = (OWN | skb->len); + sis_priv->tx_ring[entry].cmdsts = (OWN | INTR | skb->len); sw32(cr, TxENA | sr32(cr)); sis_priv->cur_tx ++; @@ -1674,7 +1674,7 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance) do { status = sr32(isr); - if ((status & (HIBERR|TxURN|TxERR|TxIDLE|RxORN|RxERR|RxOK)) == 0) + if ((status & (HIBERR|TxURN|TxERR|TxIDLE|TxDESC|RxORN|RxERR|RxOK)) == 0) /* nothing intresting happened */ break; handled = 1; @@ -1684,7 +1684,7 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance) /* Rx interrupt */ sis900_rx(net_dev); - if (status & (TxURN | TxERR | TxIDLE)) + if (status & (TxURN | TxERR | TxIDLE | TxDESC)) /* Tx interrupt */ sis900_finish_xmit(net_dev); @@ -1896,8 +1896,8 @@ static void sis900_finish_xmit (struct net_device *net_dev) if (tx_status & OWN) { /* The packet is not transmitted yet (owned by hardware) ! - * Note: the interrupt is generated only when Tx Machine - * is idle, so this is an almost impossible case */ + * Note: this is an almost impossible condition + * in case of TxDESC ('descriptor interrupt') */ break; } @@ -2473,7 +2473,7 @@ static int sis900_resume(struct pci_dev *pci_dev) sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED); /* Enable all known interrupts by setting the interrupt mask. */ - sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE); + sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC); sw32(cr, RxENA | sr32(cr)); sw32(ier, IE); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 2dcdf761d525..020159622559 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -112,7 +112,7 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, * programmed with (2^32 – <new_sec_value>) */ if (gmac4) - sec = (100000000ULL - sec); + sec = -sec; value = readl(ioaddr + PTP_TCR); if (value & PTP_TCR_TSCTRLSSR) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 06dd51f47cfd..06358fe5b245 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2947,12 +2947,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) /* Manage tx mitigation */ tx_q->tx_count_frames += nfrags + 1; - if (priv->tx_coal_frames <= tx_q->tx_count_frames) { + if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && + !(priv->synopsys_id >= DWMAC_CORE_4_00 && + (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en)) { + stmmac_tx_timer_arm(priv, queue); + } else { + tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; - tx_q->tx_count_frames = 0; - } else { - stmmac_tx_timer_arm(priv, queue); } skb_tx_timestamp(skb); @@ -3166,12 +3169,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) * element in case of no SG. */ tx_q->tx_count_frames += nfrags + 1; - if (priv->tx_coal_frames <= tx_q->tx_count_frames) { + if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && + !(priv->synopsys_id >= DWMAC_CORE_4_00 && + (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en)) { + stmmac_tx_timer_arm(priv, queue); + } else { + tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; - tx_q->tx_count_frames = 0; - } else { - stmmac_tx_timer_arm(priv, queue); } skb_tx_timestamp(skb); diff --git a/drivers/net/ppp/ppp_mppe.c b/drivers/net/ppp/ppp_mppe.c index ff61dd8748de..66c8e65f6872 100644 --- a/drivers/net/ppp/ppp_mppe.c +++ b/drivers/net/ppp/ppp_mppe.c @@ -63,6 +63,7 @@ MODULE_AUTHOR("Frank Cusack <fcusack@fcusack.com>"); MODULE_DESCRIPTION("Point-to-Point Protocol Microsoft Point-to-Point Encryption support"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE)); +MODULE_SOFTDEP("pre: arc4"); MODULE_VERSION("1.0.2"); static unsigned int diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b48006e7fa2f..36916bf51ee6 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2128,12 +2128,12 @@ static void team_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->hw_features = TEAM_VLAN_FEATURES | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; dev->features |= dev->hw_features; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; } static int team_newlink(struct net *src_net, struct net_device *dev, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index d080f8048e52..8b4ad10cf940 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1482,7 +1482,7 @@ static int qmi_wwan_probe(struct usb_interface *intf, * different. Ignore the current interface if the number of endpoints * equals the number for the diag interface (two). */ - info = (void *)&id->driver_info; + info = (void *)id->driver_info; if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) { if (desc->bNumEndpoints == 2) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 11b9525dff27..311b0cc6eb98 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -350,8 +350,8 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + const struct in6_addr *nexthop; struct neighbour *neigh; - struct in6_addr *nexthop; int ret; nf_reset(skb); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 98af9ecd4a90..ca3793002e2f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -859,7 +859,7 @@ static int pci_pm_suspend_noirq(struct device *dev) pci_dev->bus->self->skip_bus_pm = true; } - if (pci_dev->skip_bus_pm && !pm_suspend_via_firmware()) { + if (pci_dev->skip_bus_pm && pm_suspend_no_platform()) { dev_dbg(dev, "PCI PM: Skipped\n"); goto Fixup; } @@ -914,10 +914,10 @@ static int pci_pm_resume_noirq(struct device *dev) /* * In the suspend-to-idle case, devices left in D0 during suspend will * stay in D0, so it is not necessary to restore or update their - * configuration here and attempting to put them into D0 again may - * confuse some firmware, so avoid doing that. + * configuration here and attempting to put them into D0 again is + * pointless, so avoid doing that. */ - if (!pci_dev->skip_bus_pm || pm_suspend_via_firmware()) + if (!(pci_dev->skip_bus_pm && pm_suspend_no_platform())) pci_pm_default_resume_early(pci_dev); pci_fixup_device(pci_fixup_resume_early, pci_dev); diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index f464f8cd274b..7e526bcf5e0b 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -113,6 +113,8 @@ static void mtk_eint_mask(struct irq_data *d) void __iomem *reg = mtk_eint_get_offset(eint, d->hwirq, eint->regs->mask_set); + eint->cur_mask[d->hwirq >> 5] &= ~mask; + writel(mask, reg); } @@ -123,6 +125,8 @@ static void mtk_eint_unmask(struct irq_data *d) void __iomem *reg = mtk_eint_get_offset(eint, d->hwirq, eint->regs->mask_clr); + eint->cur_mask[d->hwirq >> 5] |= mask; + writel(mask, reg); if (eint->dual_edge[d->hwirq]) @@ -217,19 +221,6 @@ static void mtk_eint_chip_write_mask(const struct mtk_eint *eint, } } -static void mtk_eint_chip_read_mask(const struct mtk_eint *eint, - void __iomem *base, u32 *buf) -{ - int port; - void __iomem *reg; - - for (port = 0; port < eint->hw->ports; port++) { - reg = base + eint->regs->mask + (port << 2); - buf[port] = ~readl_relaxed(reg); - /* Mask is 0 when irq is enabled, and 1 when disabled. */ - } -} - static int mtk_eint_irq_request_resources(struct irq_data *d) { struct mtk_eint *eint = irq_data_get_irq_chip_data(d); @@ -318,7 +309,7 @@ static void mtk_eint_irq_handler(struct irq_desc *desc) struct irq_chip *chip = irq_desc_get_chip(desc); struct mtk_eint *eint = irq_desc_get_handler_data(desc); unsigned int status, eint_num; - int offset, index, virq; + int offset, mask_offset, index, virq; void __iomem *reg = mtk_eint_get_offset(eint, 0, eint->regs->stat); int dual_edge, start_level, curr_level; @@ -328,10 +319,24 @@ static void mtk_eint_irq_handler(struct irq_desc *desc) status = readl(reg); while (status) { offset = __ffs(status); + mask_offset = eint_num >> 5; index = eint_num + offset; virq = irq_find_mapping(eint->domain, index); status &= ~BIT(offset); + /* + * If we get an interrupt on pin that was only required + * for wake (but no real interrupt requested), mask the + * interrupt (as would mtk_eint_resume do anyway later + * in the resume sequence). + */ + if (eint->wake_mask[mask_offset] & BIT(offset) && + !(eint->cur_mask[mask_offset] & BIT(offset))) { + writel_relaxed(BIT(offset), reg - + eint->regs->stat + + eint->regs->mask_set); + } + dual_edge = eint->dual_edge[index]; if (dual_edge) { /* @@ -370,7 +375,6 @@ static void mtk_eint_irq_handler(struct irq_desc *desc) int mtk_eint_do_suspend(struct mtk_eint *eint) { - mtk_eint_chip_read_mask(eint, eint->base, eint->cur_mask); mtk_eint_chip_write_mask(eint, eint->base, eint->wake_mask); return 0; diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index 568ca96cdb6d..3a235487e38d 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -771,6 +771,10 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, if (ret < 0) goto fail; + ret = devm_gpiochip_add_data(dev, &mcp->chip, mcp); + if (ret < 0) + goto fail; + mcp->irq_controller = device_property_read_bool(dev, "interrupt-controller"); if (mcp->irq && mcp->irq_controller) { @@ -812,10 +816,6 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, goto fail; } - ret = devm_gpiochip_add_data(dev, &mcp->chip, mcp); - if (ret < 0) - goto fail; - if (one_regmap_config) { mcp->pinctrl_desc.name = devm_kasprintf(dev, GFP_KERNEL, "mcp23xxx-pinctrl.%d", raw_chip_address); diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index 3b4ca52d2456..fb76fb2e9ea5 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -396,7 +396,7 @@ static int ocelot_pin_function_idx(struct ocelot_pinctrl *info, return -1; } -#define REG(r, info, p) ((r) * (info)->stride + (4 * ((p) / 32))) +#define REG_ALT(msb, info, p) (OCELOT_GPIO_ALT0 * (info)->stride + 4 * ((msb) + ((info)->stride * ((p) / 32)))) static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int selector, unsigned int group) @@ -412,19 +412,21 @@ static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev, /* * f is encoded on two bits. - * bit 0 of f goes in BIT(pin) of ALT0, bit 1 of f goes in BIT(pin) of - * ALT1 + * bit 0 of f goes in BIT(pin) of ALT[0], bit 1 of f goes in BIT(pin) of + * ALT[1] * This is racy because both registers can't be updated at the same time * but it doesn't matter much for now. */ - regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT0, info, pin->pin), + regmap_update_bits(info->map, REG_ALT(0, info, pin->pin), BIT(p), f << p); - regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT1, info, pin->pin), + regmap_update_bits(info->map, REG_ALT(1, info, pin->pin), BIT(p), f << (p - 1)); return 0; } +#define REG(r, info, p) ((r) * (info)->stride + (4 * ((p) / 32))) + static int ocelot_gpio_set_direction(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned int pin, bool input) @@ -432,7 +434,7 @@ static int ocelot_gpio_set_direction(struct pinctrl_dev *pctldev, struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); unsigned int p = pin % 32; - regmap_update_bits(info->map, REG(OCELOT_GPIO_OE, info, p), BIT(p), + regmap_update_bits(info->map, REG(OCELOT_GPIO_OE, info, pin), BIT(p), input ? 0 : BIT(p)); return 0; @@ -445,9 +447,9 @@ static int ocelot_gpio_request_enable(struct pinctrl_dev *pctldev, struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); unsigned int p = offset % 32; - regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT0, info, offset), + regmap_update_bits(info->map, REG_ALT(0, info, offset), BIT(p), 0); - regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT1, info, offset), + regmap_update_bits(info->map, REG_ALT(1, info, offset), BIT(p), 0); return 0; diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index ecee4b3ff073..377b07b2feeb 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -763,6 +763,7 @@ static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd struct pvscsi_adapter *adapter = shost_priv(host); struct pvscsi_ctx *ctx; unsigned long flags; + unsigned char op; spin_lock_irqsave(&adapter->hw_lock, flags); @@ -775,13 +776,14 @@ static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd } cmd->scsi_done = done; + op = cmd->cmnd[0]; dev_dbg(&cmd->device->sdev_gendev, - "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, cmd->cmnd[0]); + "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op); spin_unlock_irqrestore(&adapter->hw_lock, flags); - pvscsi_kick_io(adapter, cmd->cmnd[0]); + pvscsi_kick_io(adapter, op); return 0; } diff --git a/fs/afs/callback.c b/fs/afs/callback.c index d441bef72163..915010464572 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -275,9 +275,9 @@ static void afs_break_one_callback(struct afs_server *server, struct afs_super_info *as = AFS_FS_S(cbi->sb); struct afs_volume *volume = as->volume; - write_lock(&volume->cb_break_lock); + write_lock(&volume->cb_v_break_lock); volume->cb_v_break++; - write_unlock(&volume->cb_break_lock); + write_unlock(&volume->cb_v_break_lock); } else { data.volume = NULL; data.fid = *fid; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index b42d9d09669c..18a50d4febcf 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -56,6 +56,16 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren } /* + * Set the file size and block count. Estimate the number of 512 bytes blocks + * used, rounded up to nearest 1K for consistency with other AFS clients. + */ +static void afs_set_i_size(struct afs_vnode *vnode, u64 size) +{ + i_size_write(&vnode->vfs_inode, size); + vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1; +} + +/* * Initialise an inode from the vnode status. */ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, @@ -124,12 +134,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type); } - /* - * Estimate 512 bytes blocks used, rounded up to nearest 1K - * for consistency with other AFS clients. - */ - inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1; - i_size_write(&vnode->vfs_inode, status->size); + afs_set_i_size(vnode, status->size); vnode->invalid_before = status->data_version; inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); @@ -207,11 +212,13 @@ static void afs_apply_status(struct afs_fs_cursor *fc, if (expected_version && *expected_version != status->data_version) { - kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s", - (unsigned long long) status->data_version, - vnode->fid.vid, vnode->fid.vnode, - (unsigned long long) *expected_version, - fc->type ? fc->type->name : "???"); + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n", + vnode->fid.vid, vnode->fid.vnode, + (unsigned long long)*expected_version, + (unsigned long long)status->data_version, + fc->type ? fc->type->name : "???"); + vnode->invalid_before = status->data_version; if (vnode->status.type == AFS_FTYPE_DIR) { if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) @@ -230,7 +237,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc, if (data_changed) { inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); - i_size_write(&vnode->vfs_inode, status->size); + afs_set_i_size(vnode, status->size); } } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8a67bf741880..7ee63526c6a2 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -109,10 +109,8 @@ struct afs_call { struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ struct afs_net *net; /* The network namespace */ - union { - struct afs_server *server; - struct afs_vlserver *vlserver; - }; + struct afs_server *server; /* The fileserver record if fs op (pins ref) */ + struct afs_vlserver *vlserver; /* The vlserver record if vl op */ struct afs_cb_interest *cbi; /* Callback interest for server used */ struct afs_vnode *lvnode; /* vnode being locked */ void *request; /* request data (first part) */ @@ -616,7 +614,7 @@ struct afs_volume { unsigned int servers_seq; /* Incremented each time ->servers changes */ unsigned cb_v_break; /* Break-everything counter. */ - rwlock_t cb_break_lock; + rwlock_t cb_v_break_lock; afs_voltype_t type; /* type of volume */ short error; diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 08fdb3951c49..1a414300b654 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -43,6 +43,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, atomic_set(&volume->usage, 1); INIT_LIST_HEAD(&volume->proc_link); rwlock_init(&volume->servers_lock); + rwlock_init(&volume->cb_v_break_lock); memcpy(volume->name, vldb->name, vldb->name_len + 1); slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); @@ -2095,6 +2095,7 @@ SYSCALL_DEFINE6(io_pgetevents, struct __aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_timespec64(&ts, timeout))) @@ -2108,8 +2109,10 @@ SYSCALL_DEFINE6(io_pgetevents, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2128,6 +2131,7 @@ SYSCALL_DEFINE6(io_pgetevents_time32, struct __aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_old_timespec32(&ts, timeout))) @@ -2142,8 +2146,10 @@ SYSCALL_DEFINE6(io_pgetevents_time32, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2193,6 +2199,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, struct __compat_aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_old_timespec32(&t, timeout)) @@ -2206,8 +2213,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2226,6 +2235,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, struct __compat_aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_timespec64(&t, timeout)) @@ -2239,8 +2249,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 82a48e830018..e4b59e76afb0 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -856,9 +856,14 @@ err: static int load_flat_shared_library(int id, struct lib_info *libs) { + /* + * This is a fake bprm struct; only the members "buf", "file" and + * "filename" are actually used. + */ struct linux_binprm bprm; int res; char buf[16]; + loff_t pos = 0; memset(&bprm, 0, sizeof(bprm)); @@ -872,25 +877,11 @@ static int load_flat_shared_library(int id, struct lib_info *libs) if (IS_ERR(bprm.file)) return res; - bprm.cred = prepare_exec_creds(); - res = -ENOMEM; - if (!bprm.cred) - goto out; - - /* We don't really care about recalculating credentials at this point - * as we're past the point of no return and are dealing with shared - * libraries. - */ - bprm.called_set_creds = 1; + res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos); - res = prepare_binprm(&bprm); - - if (!res) + if (res >= 0) res = load_flat_file(&bprm, libs, id, NULL); - abort_creds(bprm.cred); - -out: allow_write_access(bprm.file); fput(bprm.file); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6af2d0d4a87a..c8a9b89b922d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2121,9 +2121,10 @@ retry: if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { dout("build_path path+%d: %p SNAPDIR\n", pos, temp); - } else if (stop_on_nosnap && inode && + } else if (stop_on_nosnap && inode && dentry != temp && ceph_snap(inode) == CEPH_NOSNAP) { spin_unlock(&temp->d_lock); + pos++; /* get rid of any prepended '/' */ break; } else { pos -= temp->d_name.len; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c6f513100cc9..4c74c768ae43 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2325,7 +2325,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, error = do_epoll_wait(epfd, events, maxevents, timeout); - restore_user_sigmask(sigmask, &sigsaved); + restore_user_sigmask(sigmask, &sigsaved, error == -EINTR); return error; } @@ -2350,7 +2350,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, err = do_epoll_wait(epfd, events, maxevents, timeout); - restore_user_sigmask(sigmask, &sigsaved); + restore_user_sigmask(sigmask, &sigsaved, err == -EINTR); return err; } diff --git a/fs/inode.c b/fs/inode.c index df6542ec3b88..2bf21e2c90fc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -362,7 +362,7 @@ EXPORT_SYMBOL(inc_nlink); static void __address_space_init_once(struct address_space *mapping) { - xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); diff --git a/fs/io_uring.c b/fs/io_uring.c index 86a2bd721900..4ef62a45045d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -579,6 +579,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, state->cur_req++; } + req->file = NULL; req->ctx = ctx; req->flags = 0; /* one is dropped after submission, the other at completion */ @@ -1801,10 +1802,8 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, req->sequence = ctx->cached_sq_head - 1; } - if (!io_op_needs_file(s->sqe)) { - req->file = NULL; + if (!io_op_needs_file(s->sqe)) return 0; - } if (flags & IOSQE_FIXED_FILE) { if (unlikely(!ctx->user_files || @@ -2201,11 +2200,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); - if (ret == -ERESTARTSYS) - ret = -EINTR; if (sig) - restore_user_sigmask(sig, &sigsaved); + restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS); + + if (ret == -ERESTARTSYS) + ret = -EINTR; return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0; } diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index a809989807d6..19f856f45689 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -18,7 +18,7 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; +static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO; static unsigned int dataserver_retrans; static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); diff --git a/fs/proc/array.c b/fs/proc/array.c index 2edbb657f859..55180501b915 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -462,7 +462,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * a program is not able to use ptrace(2) in that case. It is * safe because the task has stopped executing permanently. */ - if (permitted && (task->flags & PF_DUMPCORE)) { + if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) { if (try_get_task_stack(task)) { eip = KSTK_EIP(task); esp = KSTK_ESP(task); diff --git a/fs/proc/base.c b/fs/proc/base.c index 9c8ca6cd3ce4..255f6754c70d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3077,8 +3077,7 @@ static const struct file_operations proc_tgid_base_operations = { struct pid *tgid_pidfd_to_pid(const struct file *file) { - if (!d_is_dir(file->f_path.dentry) || - (file->f_op != &proc_tgid_base_operations)) + if (file->f_op != &proc_tgid_base_operations) return ERR_PTR(-EBADF); return proc_pid(file_inode(file)); diff --git a/fs/select.c b/fs/select.c index 6cbc9ff56ba0..a4d8f6e8b63c 100644 --- a/fs/select.c +++ b/fs/select.c @@ -758,10 +758,9 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, return ret; ret = core_sys_select(n, inp, outp, exp, to); + restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND); ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - restore_user_sigmask(sigmask, &sigsaved); - return ret; } @@ -1106,8 +1105,7 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1142,8 +1140,7 @@ SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1350,10 +1347,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return ret; ret = compat_core_sys_select(n, inp, outp, exp, to); + restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND); ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - restore_user_sigmask(sigmask, &sigsaved); - return ret; } @@ -1425,8 +1421,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1461,8 +1456,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; diff --git a/include/dt-bindings/clock/g12a-clkc.h b/include/dt-bindings/clock/g12a-clkc.h index 82c9e0c020b2..e10470ed7c4f 100644 --- a/include/dt-bindings/clock/g12a-clkc.h +++ b/include/dt-bindings/clock/g12a-clkc.h @@ -130,7 +130,7 @@ #define CLKID_MALI_1_SEL 172 #define CLKID_MALI_1 174 #define CLKID_MALI 175 -#define CLKID_MPLL_5OM 177 +#define CLKID_MPLL_50M 177 #define CLKID_CPU_CLK 187 #define CLKID_PCIE_PLL 201 #define CLKID_VDEC_1 204 diff --git a/include/dt-bindings/clock/sifive-fu540-prci.h b/include/dt-bindings/clock/sifive-fu540-prci.h index 6a0b70a37d78..3b21d0522c91 100644 --- a/include/dt-bindings/clock/sifive-fu540-prci.h +++ b/include/dt-bindings/clock/sifive-fu540-prci.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ /* * Copyright (C) 2018-2019 SiFive, Inc. * Wesley Terpstra diff --git a/include/linux/device.h b/include/linux/device.h index 848fc71c6ba6..3b06b6b73cbb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -42,6 +42,7 @@ struct iommu_ops; struct iommu_group; struct iommu_fwspec; struct dev_pin_info; +struct iommu_param; struct bus_attribute { struct attribute attr; @@ -960,6 +961,7 @@ struct dev_links_info { * device (i.e. the bus driver that discovered the device). * @iommu_group: IOMMU group the device belongs to. * @iommu_fwspec: IOMMU-specific properties supplied by firmware. + * @iommu_param: Per device generic IOMMU runtime data * * @offline_disabled: If set, the device is permanently online. * @offline: Set after successful invocation of bus type's .offline(). @@ -1053,6 +1055,7 @@ struct device { void (*release)(struct device *dev); struct iommu_group *iommu_group; struct iommu_fwspec *iommu_fwspec; + struct iommu_param *iommu_param; bool offline_disabled:1; bool offline:1; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 6a8dd4af0147..f2ae8a006ff8 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -435,6 +435,12 @@ enum { #define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0) #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1) +extern int intel_iommu_sm; + +#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) +#define pasid_supported(iommu) (sm_supported(iommu) && \ + ecap_pasid((iommu)->ecap)) + struct pasid_entry; struct pasid_state_entry; struct page_req_dsc; @@ -642,7 +648,6 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern int dmar_ir_support(void); -struct dmar_domain *get_valid_domain_for_dev(struct device *dev); void *alloc_pgtable_page(int node); void free_pgtable_page(void *vaddr); struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h index 16255c2ca2f4..0d6b4bc191c5 100644 --- a/include/linux/intel-ish-client-if.h +++ b/include/linux/intel-ish-client-if.h @@ -103,6 +103,7 @@ void ishtp_put_device(struct ishtp_cl_device *cl_dev); void ishtp_get_device(struct ishtp_cl_device *cl_dev); void ishtp_set_drvdata(struct ishtp_cl_device *cl_device, void *data); void *ishtp_get_drvdata(struct ishtp_cl_device *cl_device); +struct ishtp_cl_device *ishtp_dev_to_cl_device(struct device *dev); int ishtp_register_event_cb(struct ishtp_cl_device *device, void (*read_cb)(struct ishtp_cl_device *)); struct ishtp_fw_client *ishtp_fw_cl_get_client(struct ishtp_device *dev, diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 54ffcc6a322e..94f047a8a845 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -49,7 +49,7 @@ struct svm_dev_ops { /** * intel_svm_bind_mm() - Bind the current process to a PASID - * @dev: Device to be granted acccess + * @dev: Device to be granted access * @pasid: Address for allocated PASID * @flags: Flags. Later for requesting supervisor mode, etc. * @ops: Callbacks to device driver diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 76969a564831..b5a450a3bb47 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -44,6 +44,8 @@ struct iommu_gather_ops { * tables. * @ias: Input address (iova) size, in bits. * @oas: Output address (paddr) size, in bits. + * @coherent_walk A flag to indicate whether or not page table walks made + * by the IOMMU are coherent with the CPU caches. * @tlb: TLB management callbacks for this set of tables. * @iommu_dev: The device representing the DMA configuration for the * page table walker. @@ -68,11 +70,6 @@ struct io_pgtable_cfg { * when the SoC is in "4GB mode" and they can only access the high * remap of DRAM (0x1_00000000 to 0x1_ffffffff). * - * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever - * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a - * software-emulated IOMMU), such that pagetable updates need not - * be treated as explicit DMA data. - * * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs * on unmap, for DMA domains using the flush queue mechanism for * delayed invalidation. @@ -81,12 +78,12 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3) - #define IO_PGTABLE_QUIRK_NO_DMA BIT(4) - #define IO_PGTABLE_QUIRK_NON_STRICT BIT(5) + #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; unsigned int oas; + bool coherent_walk; const struct iommu_gather_ops *tlb; struct device *iommu_dev; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e552c3b63f6f..fdc355ccc570 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -13,6 +13,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/of.h> +#include <uapi/linux/iommu.h> #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) @@ -29,6 +30,12 @@ * if the IOMMU page table format is equivalent. */ #define IOMMU_PRIV (1 << 5) +/* + * Non-coherent masters on few Qualcomm SoCs can use this page protection flag + * to set correct cacheability attributes to use an outer level of cache - + * last level cache, aka system cache. + */ +#define IOMMU_QCOM_SYS_CACHE (1 << 6) struct iommu_ops; struct iommu_group; @@ -37,6 +44,7 @@ struct device; struct iommu_domain; struct notifier_block; struct iommu_sva; +struct iommu_fault_event; /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 @@ -46,6 +54,7 @@ typedef int (*iommu_fault_handler_t)(struct iommu_domain *, struct device *, unsigned long, int, void *); typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct iommu_sva *, void *); +typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *); struct iommu_domain_geometry { dma_addr_t aperture_start; /* First address that can be mapped */ @@ -123,6 +132,12 @@ enum iommu_attr { enum iommu_resv_type { /* Memory regions which must be mapped 1:1 at all times */ IOMMU_RESV_DIRECT, + /* + * Memory regions which are advertised to be 1:1 but are + * commonly considered relaxable in some conditions, + * for instance in device assignment use case (USB, Graphics) + */ + IOMMU_RESV_DIRECT_RELAXABLE, /* Arbitrary "never map this or give it to a device" address ranges */ IOMMU_RESV_RESERVED, /* Hardware MSI region (untranslated) */ @@ -212,6 +227,7 @@ struct iommu_sva_ops { * @sva_bind: Bind process address space to device * @sva_unbind: Unbind process address space from device * @sva_get_pasid: Get PASID associated to a SVA handle + * @page_response: handle page request response * @pgsize_bitmap: bitmap of all possible supported page sizes */ struct iommu_ops { @@ -272,6 +288,10 @@ struct iommu_ops { void (*sva_unbind)(struct iommu_sva *handle); int (*sva_get_pasid)(struct iommu_sva *handle); + int (*page_response)(struct device *dev, + struct iommu_fault_event *evt, + struct iommu_page_response *msg); + unsigned long pgsize_bitmap; }; @@ -289,6 +309,48 @@ struct iommu_device { struct device *dev; }; +/** + * struct iommu_fault_event - Generic fault event + * + * Can represent recoverable faults such as a page requests or + * unrecoverable faults such as DMA or IRQ remapping faults. + * + * @fault: fault descriptor + * @list: pending fault event list, used for tracking responses + */ +struct iommu_fault_event { + struct iommu_fault fault; + struct list_head list; +}; + +/** + * struct iommu_fault_param - per-device IOMMU fault data + * @handler: Callback function to handle IOMMU faults at device level + * @data: handler private data + * @faults: holds the pending faults which needs response + * @lock: protect pending faults list + */ +struct iommu_fault_param { + iommu_dev_fault_handler_t handler; + void *data; + struct list_head faults; + struct mutex lock; +}; + +/** + * struct iommu_param - collection of per-device IOMMU data + * + * @fault_param: IOMMU detected device fault reporting data + * + * TODO: migrate other per device data pointers under iommu_dev_data, e.g. + * struct iommu_group *iommu_group; + * struct iommu_fwspec *iommu_fwspec; + */ +struct iommu_param { + struct mutex lock; + struct iommu_fault_param *fault_param; +}; + int iommu_device_register(struct iommu_device *iommu); void iommu_device_unregister(struct iommu_device *iommu); int iommu_device_sysfs_add(struct iommu_device *iommu, @@ -350,6 +412,7 @@ extern void iommu_set_fault_handler(struct iommu_domain *domain, extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); +extern int iommu_request_dma_domain_for_dev(struct device *dev); extern struct iommu_resv_region * iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, enum iommu_resv_type type); @@ -378,6 +441,17 @@ extern int iommu_group_register_notifier(struct iommu_group *group, struct notifier_block *nb); extern int iommu_group_unregister_notifier(struct iommu_group *group, struct notifier_block *nb); +extern int iommu_register_device_fault_handler(struct device *dev, + iommu_dev_fault_handler_t handler, + void *data); + +extern int iommu_unregister_device_fault_handler(struct device *dev); + +extern int iommu_report_device_fault(struct device *dev, + struct iommu_fault_event *evt); +extern int iommu_page_response(struct device *dev, + struct iommu_page_response *msg); + extern int iommu_group_id(struct iommu_group *group); extern struct iommu_group *iommu_group_get_for_dev(struct device *dev); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); @@ -492,6 +566,7 @@ struct iommu_ops {}; struct iommu_group {}; struct iommu_fwspec {}; struct iommu_device {}; +struct iommu_fault_param {}; static inline bool iommu_present(struct bus_type *bus) { @@ -614,6 +689,11 @@ static inline int iommu_request_dm_for_dev(struct device *dev) return -ENODEV; } +static inline int iommu_request_dma_domain_for_dev(struct device *dev) +{ + return -ENODEV; +} + static inline int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { @@ -685,6 +765,31 @@ static inline int iommu_group_unregister_notifier(struct iommu_group *group, return 0; } +static inline +int iommu_register_device_fault_handler(struct device *dev, + iommu_dev_fault_handler_t handler, + void *data) +{ + return -ENODEV; +} + +static inline int iommu_unregister_device_fault_handler(struct device *dev) +{ + return 0; +} + +static inline +int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) +{ + return -ENODEV; +} + +static inline int iommu_page_response(struct device *dev, + struct iommu_page_response *msg) +{ + return -ENODEV; +} + static inline int iommu_group_id(struct iommu_group *group) { return -ENODEV; diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 74b1ee9027f5..0c9bc231107f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -93,7 +93,8 @@ #define DIV_ROUND_DOWN_ULL(ll, d) \ ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) -#define DIV_ROUND_UP_ULL(ll, d) DIV_ROUND_DOWN_ULL((ll) + (d) - 1, (d)) +#define DIV_ROUND_UP_ULL(ll, d) \ + DIV_ROUND_DOWN_ULL((unsigned long long)(ll) + (d) - 1, (d)) #if BITS_PER_LONG == 32 # define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index b3d360b0ee3d..9f57cdfcc93d 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -373,6 +373,8 @@ struct flash_info; * @flash_unlock: [FLASH-SPECIFIC] unlock a region of the SPI NOR * @flash_is_locked: [FLASH-SPECIFIC] check if a region of the SPI NOR is * @quad_enable: [FLASH-SPECIFIC] enables SPI NOR quad mode + * @clear_sr_bp: [FLASH-SPECIFIC] clears the Block Protection Bits from + * the SPI NOR Status Register. * completely locked * @priv: the private data */ @@ -410,6 +412,7 @@ struct spi_nor { int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len); int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len); int (*quad_enable)(struct spi_nor *nor); + int (*clear_sr_bp)(struct spi_nor *nor); void *priv; }; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0ab99c7b652d..2bca72f3028b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -241,6 +241,7 @@ struct perf_event; #define PERF_PMU_CAP_NO_INTERRUPT 0x01 #define PERF_PMU_CAP_NO_NMI 0x02 #define PERF_PMU_CAP_AUX_NO_SG 0x04 +#define PERF_PMU_CAP_EXTENDED_REGS 0x08 #define PERF_PMU_CAP_EXCLUSIVE 0x10 #define PERF_PMU_CAP_ITRACE 0x20 #define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40 diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index 476747456bca..2d12e97d5e7b 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -11,6 +11,11 @@ struct perf_regs { #ifdef CONFIG_HAVE_PERF_REGS #include <asm/perf_regs.h> + +#ifndef PERF_REG_EXTENDED_MASK +#define PERF_REG_EXTENDED_MASK 0 +#endif + u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); u64 perf_reg_abi(struct task_struct *task); @@ -18,6 +23,9 @@ void perf_get_regs_user(struct perf_regs *regs_user, struct pt_regs *regs, struct pt_regs *regs_user_copy); #else + +#define PERF_REG_EXTENDED_MASK 0 + static inline u64 perf_reg_value(struct pt_regs *regs, int idx) { return 0; diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 7bb77850c65a..3c202a11a79e 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -68,7 +68,7 @@ static inline phys_addr_t pfn_t_to_phys(pfn_t pfn) static inline void *pfn_t_to_virt(pfn_t pfn) { - if (pfn_t_has_page(pfn)) + if (pfn_t_has_page(pfn) && !is_device_private_page(pfn_t_to_page(pfn))) return __va(pfn_t_to_phys(pfn)); return NULL; } diff --git a/include/linux/signal.h b/include/linux/signal.h index 9702016734b1..78c2bb376954 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -276,7 +276,7 @@ extern int sigprocmask(int, sigset_t *, sigset_t *); extern int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set, sigset_t *oldset, size_t sigsetsize); extern void restore_user_sigmask(const void __user *usigmask, - sigset_t *sigsaved); + sigset_t *sigsaved, bool interrupted); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 8594001e8be8..f0d262ad7b78 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -209,8 +209,9 @@ extern int suspend_valid_only_mem(suspend_state_t state); extern unsigned int pm_suspend_global_flags; -#define PM_SUSPEND_FLAG_FW_SUSPEND (1 << 0) -#define PM_SUSPEND_FLAG_FW_RESUME (1 << 1) +#define PM_SUSPEND_FLAG_FW_SUSPEND BIT(0) +#define PM_SUSPEND_FLAG_FW_RESUME BIT(1) +#define PM_SUSPEND_FLAG_NO_PLATFORM BIT(2) static inline void pm_suspend_clear_flags(void) { @@ -227,6 +228,11 @@ static inline void pm_set_resume_via_firmware(void) pm_suspend_global_flags |= PM_SUSPEND_FLAG_FW_RESUME; } +static inline void pm_set_suspend_no_platform(void) +{ + pm_suspend_global_flags |= PM_SUSPEND_FLAG_NO_PLATFORM; +} + /** * pm_suspend_via_firmware - Check if platform firmware will suspend the system. * @@ -268,6 +274,22 @@ static inline bool pm_resume_via_firmware(void) return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_FW_RESUME); } +/** + * pm_suspend_no_platform - Check if platform may change device power states. + * + * To be called during system-wide power management transitions to sleep states + * or during the subsequent system-wide transitions back to the working state. + * + * Return 'true' if the power states of devices remain under full control of the + * kernel throughout the system-wide suspend and resume cycle in progress (that + * is, if a device is put into a certain power state during suspend, it can be + * expected to remain in that state during resume). + */ +static inline bool pm_suspend_no_platform(void) +{ + return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_NO_PLATFORM); +} + /* Suspend-to-idle state machnine. */ enum s2idle_states { S2IDLE_STATE_NONE, /* Not suspended/suspending. */ diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 0e01e6129145..5921599b6dc4 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -265,6 +265,7 @@ enum xa_lock_type { #define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U) #define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U) #define XA_FLAGS_ALLOC_WRAPPED ((__force gfp_t)16U) +#define XA_FLAGS_ACCOUNT ((__force gfp_t)32U) #define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \ (__force unsigned)(mark))) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 4790beaa86e0..ee7405e759ba 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -262,8 +262,8 @@ static inline bool ip6_sk_ignore_df(const struct sock *sk) inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; } -static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, - struct in6_addr *daddr) +static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt, + const struct in6_addr *daddr) { if (rt->rt6i_flags & RTF_GATEWAY) return &rt->rt6i_gateway; diff --git a/include/net/route.h b/include/net/route.h index 065b47754f05..55ff71ffb796 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -221,6 +221,7 @@ void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, bool nopolicy, bool noxfrm, bool will_cache); +struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt); struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); diff --git a/include/net/tls.h b/include/net/tls.h index 4a55ce6a303f..53d96bca220d 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -373,21 +373,6 @@ static inline bool tls_is_partially_sent_record(struct tls_context *ctx) return !!ctx->partially_sent_record; } -static inline int tls_complete_pending_work(struct sock *sk, - struct tls_context *ctx, - int flags, long *timeo) -{ - int rc = 0; - - if (unlikely(sk->sk_write_pending)) - rc = wait_on_pending_writer(sk, timeo); - - if (!rc && tls_is_partially_sent_record(ctx)) - rc = tls_push_partial_record(sk, ctx, flags); - - return rc; -} - static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) { return tls_ctx->pending_open_record_frags; diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h new file mode 100644 index 000000000000..fc00c5d4741b --- /dev/null +++ b/include/uapi/linux/iommu.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * IOMMU user API definitions + */ + +#ifndef _UAPI_IOMMU_H +#define _UAPI_IOMMU_H + +#include <linux/types.h> + +#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ +#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ +#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */ +#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */ + +/* Generic fault types, can be expanded IRQ remapping fault */ +enum iommu_fault_type { + IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */ + IOMMU_FAULT_PAGE_REQ, /* page request fault */ +}; + +enum iommu_fault_reason { + IOMMU_FAULT_REASON_UNKNOWN = 0, + + /* Could not access the PASID table (fetch caused external abort) */ + IOMMU_FAULT_REASON_PASID_FETCH, + + /* PASID entry is invalid or has configuration errors */ + IOMMU_FAULT_REASON_BAD_PASID_ENTRY, + + /* + * PASID is out of range (e.g. exceeds the maximum PASID + * supported by the IOMMU) or disabled. + */ + IOMMU_FAULT_REASON_PASID_INVALID, + + /* + * An external abort occurred fetching (or updating) a translation + * table descriptor + */ + IOMMU_FAULT_REASON_WALK_EABT, + + /* + * Could not access the page table entry (Bad address), + * actual translation fault + */ + IOMMU_FAULT_REASON_PTE_FETCH, + + /* Protection flag check failed */ + IOMMU_FAULT_REASON_PERMISSION, + + /* access flag check failed */ + IOMMU_FAULT_REASON_ACCESS, + + /* Output address of a translation stage caused Address Size fault */ + IOMMU_FAULT_REASON_OOR_ADDRESS, +}; + +/** + * struct iommu_fault_unrecoverable - Unrecoverable fault data + * @reason: reason of the fault, from &enum iommu_fault_reason + * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values) + * @pasid: Process Address Space ID + * @perm: requested permission access using by the incoming transaction + * (IOMMU_FAULT_PERM_* values) + * @addr: offending page address + * @fetch_addr: address that caused a fetch abort, if any + */ +struct iommu_fault_unrecoverable { + __u32 reason; +#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0) +#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1) +#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2) + __u32 flags; + __u32 pasid; + __u32 perm; + __u64 addr; + __u64 fetch_addr; +}; + +/** + * struct iommu_fault_page_request - Page Request data + * @flags: encodes whether the corresponding fields are valid and whether this + * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values) + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @perm: requested page permissions (IOMMU_FAULT_PERM_* values) + * @addr: page address + * @private_data: device-specific private information + */ +struct iommu_fault_page_request { +#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0) +#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) +#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2) + __u32 flags; + __u32 pasid; + __u32 grpid; + __u32 perm; + __u64 addr; + __u64 private_data[2]; +}; + +/** + * struct iommu_fault - Generic fault data + * @type: fault type from &enum iommu_fault_type + * @padding: reserved for future use (should be zero) + * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV + * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ + * @padding2: sets the fault size to allow for future extensions + */ +struct iommu_fault { + __u32 type; + __u32 padding; + union { + struct iommu_fault_unrecoverable event; + struct iommu_fault_page_request prm; + __u8 padding2[56]; + }; +}; + +/** + * enum iommu_page_response_code - Return status of fault handlers + * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables + * populated, retry the access. This is "Success" in PCI PRI. + * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from + * this device if possible. This is "Response Failure" in PCI PRI. + * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the + * access. This is "Invalid Request" in PCI PRI. + */ +enum iommu_page_response_code { + IOMMU_PAGE_RESP_SUCCESS = 0, + IOMMU_PAGE_RESP_INVALID, + IOMMU_PAGE_RESP_FAILURE, +}; + +/** + * struct iommu_page_response - Generic page response information + * @version: API version of this structure + * @flags: encodes whether the corresponding fields are valid + * (IOMMU_FAULT_PAGE_RESPONSE_* values) + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @code: response code from &enum iommu_page_response_code + */ +struct iommu_page_response { +#define IOMMU_PAGE_RESP_VERSION_1 1 + __u32 version; +#define IOMMU_PAGE_RESP_PASID_VALID (1 << 0) + __u32 flags; + __u32 pasid; + __u32 grpid; + __u32 code; +}; + +#endif /* _UAPI_IOMMU_H */ diff --git a/init/initramfs.c b/init/initramfs.c index 178130fd61c2..c47dad0884f7 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -617,7 +617,7 @@ static inline void clean_rootfs(void) #endif /* CONFIG_BLK_DEV_RAM */ #ifdef CONFIG_BLK_DEV_RAM -static void populate_initrd_image(char *err) +static void __init populate_initrd_image(char *err) { ssize_t written; int fd; @@ -637,7 +637,7 @@ static void populate_initrd_image(char *err) ksys_close(fd); } #else -static void populate_initrd_image(char *err) +static void __init populate_initrd_image(char *err) { printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); } diff --git a/kernel/cpu.c b/kernel/cpu.c index 077fde6fb953..ef1c565edc5d 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1964,6 +1964,9 @@ static ssize_t write_cpuhp_fail(struct device *dev, if (ret) return ret; + if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE) + return -EINVAL; + /* * Cannot fail STARTING/DYING callbacks. */ @@ -2339,6 +2342,9 @@ static int __init mitigations_parse_cmdline(char *arg) cpu_mitigations = CPU_MITIGATIONS_AUTO; else if (!strcmp(arg, "auto,nosmt")) cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; + else + pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n", + arg); return 0; } diff --git a/kernel/events/core.c b/kernel/events/core.c index abbd4b3b96c2..f85929ce13be 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5005,6 +5005,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) if (perf_event_check_period(event, value)) return -EINVAL; + if (!event->attr.freq && (value & (1ULL << 63))) + return -EINVAL; + event_function_call(event, __perf_event_period, &value); return 0; @@ -5923,7 +5926,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user, if (user_mode(regs)) { regs_user->abi = perf_reg_abi(current); regs_user->regs = regs; - } else if (current->mm) { + } else if (!(current->flags & PF_KTHREAD)) { perf_get_regs_user(regs_user, regs, regs_user_copy); } else { regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; @@ -10033,6 +10036,12 @@ void perf_pmu_unregister(struct pmu *pmu) } EXPORT_SYMBOL_GPL(perf_pmu_unregister); +static inline bool has_extended_regs(struct perf_event *event) +{ + return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) || + (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK); +} + static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) { struct perf_event_context *ctx = NULL; @@ -10064,12 +10073,16 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) perf_event_ctx_unlock(event->group_leader, ctx); if (!ret) { + if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) && + has_extended_regs(event)) + ret = -EOPNOTSUPP; + if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE && - event_has_any_exclude_flag(event)) { - if (event->destroy) - event->destroy(event); + event_has_any_exclude_flag(event)) ret = -EINVAL; - } + + if (ret && event->destroy) + event->destroy(event); } if (ret) diff --git a/kernel/fork.c b/kernel/fork.c index 75675b9bf6df..61667909ce83 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -248,7 +248,11 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) struct page *page = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); - return page ? page_address(page) : NULL; + if (likely(page)) { + tsk->stack = page_address(page); + return tsk->stack; + } + return NULL; #endif } @@ -1712,31 +1716,6 @@ const struct file_operations pidfd_fops = { #endif }; -/** - * pidfd_create() - Create a new pid file descriptor. - * - * @pid: struct pid that the pidfd will reference - * - * This creates a new pid file descriptor with the O_CLOEXEC flag set. - * - * Note, that this function can only be called after the fd table has - * been unshared to avoid leaking the pidfd to the new process. - * - * Return: On success, a cloexec pidfd is returned. - * On error, a negative errno number will be returned. - */ -static int pidfd_create(struct pid *pid) -{ - int fd; - - fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), - O_RDWR | O_CLOEXEC); - if (fd < 0) - put_pid(pid); - - return fd; -} - static void __delayed_free_task(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); @@ -1774,6 +1753,7 @@ static __latent_entropy struct task_struct *copy_process( int pidfd = -1, retval; struct task_struct *p; struct multiprocess_signals delayed; + struct file *pidfile = NULL; /* * Don't allow sharing the root directory with processes in a different @@ -1822,8 +1802,6 @@ static __latent_entropy struct task_struct *copy_process( } if (clone_flags & CLONE_PIDFD) { - int reserved; - /* * - CLONE_PARENT_SETTID is useless for pidfds and also * parent_tidptr is used to return pidfds. @@ -1834,16 +1812,6 @@ static __latent_entropy struct task_struct *copy_process( if (clone_flags & (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) return ERR_PTR(-EINVAL); - - /* - * Verify that parent_tidptr is sane so we can potentially - * reuse it later. - */ - if (get_user(reserved, parent_tidptr)) - return ERR_PTR(-EFAULT); - - if (reserved != 0) - return ERR_PTR(-EINVAL); } /* @@ -2058,11 +2026,20 @@ static __latent_entropy struct task_struct *copy_process( * if the fd table isn't shared). */ if (clone_flags & CLONE_PIDFD) { - retval = pidfd_create(pid); + retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC); if (retval < 0) goto bad_fork_free_pid; pidfd = retval; + + pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid, + O_RDWR | O_CLOEXEC); + if (IS_ERR(pidfile)) { + put_unused_fd(pidfd); + goto bad_fork_free_pid; + } + get_pid(pid); /* held by pidfile now */ + retval = put_user(pidfd, parent_tidptr); if (retval) goto bad_fork_put_pidfd; @@ -2180,6 +2157,9 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cancel_cgroup; } + /* past the last point of failure */ + if (pidfile) + fd_install(pidfd, pidfile); init_task_pid_links(p); if (likely(p->pid)) { @@ -2246,8 +2226,10 @@ bad_fork_cancel_cgroup: bad_fork_cgroup_threadgroup_change_end: cgroup_threadgroup_change_end(current); bad_fork_put_pidfd: - if (clone_flags & CLONE_PIDFD) - ksys_close(pidfd); + if (clone_flags & CLONE_PIDFD) { + fput(pidfile); + put_unused_fd(pidfd); + } bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 9505101ed2bc..096211299c07 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -493,6 +493,9 @@ int suspend_devices_and_enter(suspend_state_t state) pm_suspend_target_state = state; + if (state == PM_SUSPEND_TO_IDLE) + pm_set_suspend_no_platform(); + error = platform_suspend_begin(state); if (error) goto Close; diff --git a/kernel/signal.c b/kernel/signal.c index d622eac9d169..edf8915ddd54 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2912,7 +2912,8 @@ EXPORT_SYMBOL(set_compat_user_sigmask); * This is useful for syscalls such as ppoll, pselect, io_pgetevents and * epoll_pwait where a new sigmask is passed in from userland for the syscalls. */ -void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved) +void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved, + bool interrupted) { if (!usigmask) @@ -2922,7 +2923,7 @@ void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved) * Restoring sigmask here can lead to delivering signals that the above * syscalls are intended to block because of the sigmask passed in. */ - if (signal_pending(current)) { + if (interrupted) { current->saved_sigmask = *sigsaved; set_restore_sigmask(); return; diff --git a/lib/idr.c b/lib/idr.c index c34e256d2f01..66a374892482 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -228,11 +228,21 @@ void *idr_get_next(struct idr *idr, int *nextid) { struct radix_tree_iter iter; void __rcu **slot; + void *entry = NULL; unsigned long base = idr->idr_base; unsigned long id = *nextid; id = (id < base) ? 0 : id - base; - slot = radix_tree_iter_find(&idr->idr_rt, &iter, id); + radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, id) { + entry = rcu_dereference_raw(*slot); + if (!entry) + continue; + if (!xa_is_internal(entry)) + break; + if (slot != &idr->idr_rt.xa_head && !xa_is_retry(entry)) + break; + slot = radix_tree_iter_retry(&iter); + } if (!slot) return NULL; id = iter.index + base; @@ -241,7 +251,7 @@ void *idr_get_next(struct idr *idr, int *nextid) return NULL; *nextid = id; - return rcu_dereference_raw(*slot); + return entry; } EXPORT_SYMBOL(idr_get_next); diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 5d4bad8bd96a..9d631a7b6a70 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -38,6 +38,12 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp) return xa_store(xa, index, xa_mk_index(index), gfp); } +static void xa_insert_index(struct xarray *xa, unsigned long index) +{ + XA_BUG_ON(xa, xa_insert(xa, index, xa_mk_index(index), + GFP_KERNEL) != 0); +} + static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp) { u32 id; @@ -338,6 +344,37 @@ static noinline void check_xa_shrink(struct xarray *xa) } } +static noinline void check_insert(struct xarray *xa) +{ + unsigned long i; + + for (i = 0; i < 1024; i++) { + xa_insert_index(xa, i); + XA_BUG_ON(xa, xa_load(xa, i - 1) != NULL); + XA_BUG_ON(xa, xa_load(xa, i + 1) != NULL); + xa_erase_index(xa, i); + } + + for (i = 10; i < BITS_PER_LONG; i++) { + xa_insert_index(xa, 1UL << i); + XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 1) != NULL); + XA_BUG_ON(xa, xa_load(xa, (1UL << i) + 1) != NULL); + xa_erase_index(xa, 1UL << i); + + xa_insert_index(xa, (1UL << i) - 1); + XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 2) != NULL); + XA_BUG_ON(xa, xa_load(xa, 1UL << i) != NULL); + xa_erase_index(xa, (1UL << i) - 1); + } + + xa_insert_index(xa, ~0UL); + XA_BUG_ON(xa, xa_load(xa, 0UL) != NULL); + XA_BUG_ON(xa, xa_load(xa, ~1UL) != NULL); + xa_erase_index(xa, ~0UL); + + XA_BUG_ON(xa, !xa_empty(xa)); +} + static noinline void check_cmpxchg(struct xarray *xa) { void *FIVE = xa_mk_value(5); @@ -1527,6 +1564,7 @@ static int xarray_checks(void) check_xa_mark(&array); check_xa_shrink(&array); check_xas_erase(&array); + check_insert(&array); check_cmpxchg(&array); check_reserve(&array); check_reserve(&xa0); diff --git a/lib/xarray.c b/lib/xarray.c index 6be3acbb861f..446b956c9188 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -298,6 +298,8 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp) xas_destroy(xas); return false; } + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); if (!xas->xa_alloc) return false; @@ -325,6 +327,8 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp) xas_destroy(xas); return false; } + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; if (gfpflags_allow_blocking(gfp)) { xas_unlock_type(xas, lock_type); xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); @@ -358,8 +362,12 @@ static void *xas_alloc(struct xa_state *xas, unsigned int shift) if (node) { xas->xa_alloc = NULL; } else { - node = kmem_cache_alloc(radix_tree_node_cachep, - GFP_NOWAIT | __GFP_NOWARN); + gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; + + if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) + gfp |= __GFP_ACCOUNT; + + node = kmem_cache_alloc(radix_tree_node_cachep, gfp); if (!node) { xas_set_err(xas, -ENOMEM); return NULL; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ac843d32b019..ede7e7f5d1ab 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1510,16 +1510,29 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, /* * Dissolve a given free hugepage into free buddy pages. This function does - * nothing for in-use (including surplus) hugepages. Returns -EBUSY if the - * dissolution fails because a give page is not a free hugepage, or because - * free hugepages are fully reserved. + * nothing for in-use hugepages and non-hugepages. + * This function returns values like below: + * + * -EBUSY: failed to dissolved free hugepages or the hugepage is in-use + * (allocated or reserved.) + * 0: successfully dissolved free hugepages or the page is not a + * hugepage (considered as already dissolved) */ int dissolve_free_huge_page(struct page *page) { int rc = -EBUSY; + /* Not to disrupt normal path by vainly holding hugetlb_lock */ + if (!PageHuge(page)) + return 0; + spin_lock(&hugetlb_lock); - if (PageHuge(page) && !page_count(page)) { + if (!PageHuge(page)) { + rc = 0; + goto out; + } + + if (!page_count(page)) { struct page *head = compound_head(page); struct hstate *h = page_hstate(head); int nid = page_to_nid(head); @@ -1564,11 +1577,9 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) { page = pfn_to_page(pfn); - if (PageHuge(page) && !page_count(page)) { - rc = dissolve_free_huge_page(page); - if (rc) - break; - } + rc = dissolve_free_huge_page(page); + if (rc) + break; } return rc; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 8da0334b9ca0..d9cc6606f409 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1730,6 +1730,8 @@ static int soft_offline_huge_page(struct page *page, int flags) if (!ret) { if (set_hwpoison_free_buddy_page(page)) num_poisoned_pages_inc(); + else + ret = -EBUSY; } } return ret; @@ -1854,11 +1856,8 @@ static int soft_offline_in_use_page(struct page *page, int flags) static int soft_offline_free_page(struct page *page) { - int rc = 0; - struct page *head = compound_head(page); + int rc = dissolve_free_huge_page(page); - if (PageHuge(head)) - rc = dissolve_free_huge_page(page); if (!rc) { if (set_hwpoison_free_buddy_page(page)) num_poisoned_pages_inc(); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 01600d80ae01..fdcb73536319 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -306,7 +306,7 @@ static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes) else { nodes_remap(tmp, pol->v.nodes,pol->w.cpuset_mems_allowed, *nodes); - pol->w.cpuset_mems_allowed = tmp; + pol->w.cpuset_mems_allowed = *nodes; } if (nodes_empty(tmp)) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 5a58778c91d4..f719b64741d6 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -987,8 +987,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message) /* * Determines whether the kernel must panic because of the panic_on_oom sysctl. */ -static void check_panic_on_oom(struct oom_control *oc, - enum oom_constraint constraint) +static void check_panic_on_oom(struct oom_control *oc) { if (likely(!sysctl_panic_on_oom)) return; @@ -998,7 +997,7 @@ static void check_panic_on_oom(struct oom_control *oc, * does not panic for cpuset, mempolicy, or memcg allocation * failures. */ - if (constraint != CONSTRAINT_NONE) + if (oc->constraint != CONSTRAINT_NONE) return; } /* Do not panic for oom kills triggered by sysrq */ @@ -1035,7 +1034,6 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier); bool out_of_memory(struct oom_control *oc) { unsigned long freed = 0; - enum oom_constraint constraint = CONSTRAINT_NONE; if (oom_killer_disabled) return false; @@ -1071,10 +1069,10 @@ bool out_of_memory(struct oom_control *oc) * Check if there were limitations on the allocation (only relevant for * NUMA and memcg) that may require different handling. */ - constraint = constrained_alloc(oc); - if (constraint != CONSTRAINT_MEMORY_POLICY) + oc->constraint = constrained_alloc(oc); + if (oc->constraint != CONSTRAINT_MEMORY_POLICY) oc->nodemask = NULL; - check_panic_on_oom(oc, constraint); + check_panic_on_oom(oc); if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task && current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) && diff --git a/mm/page_idle.c b/mm/page_idle.c index 0b39ec0c945c..295512465065 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -136,7 +136,7 @@ static ssize_t page_idle_bitmap_read(struct file *file, struct kobject *kobj, end_pfn = pfn + count * BITS_PER_BYTE; if (end_pfn > max_pfn) - end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS); + end_pfn = max_pfn; for (; pfn < end_pfn; pfn++) { bit = pfn % BITMAP_CHUNK_BITS; @@ -181,7 +181,7 @@ static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj, end_pfn = pfn + count * BITS_PER_BYTE; if (end_pfn > max_pfn) - end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS); + end_pfn = max_pfn; for (; pfn < end_pfn; pfn++) { bit = pfn % BITMAP_CHUNK_BITS; diff --git a/mm/page_io.c b/mm/page_io.c index 2e8019d0e048..189415852077 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -29,10 +29,9 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, struct page *page, bio_end_io_t end_io) { - int i, nr = hpage_nr_pages(page); struct bio *bio; - bio = bio_alloc(gfp_flags, nr); + bio = bio_alloc(gfp_flags, 1); if (bio) { struct block_device *bdev; @@ -41,9 +40,7 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io; - for (i = 0; i < nr; i++) - bio_add_page(bio, page + i, PAGE_SIZE, 0); - VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr); + bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0); } return bio; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4c9e150e5ad3..0f76cca32a1c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -913,7 +913,7 @@ adjust_va_to_fit_type(struct vmap_area *va, unsigned long nva_start_addr, unsigned long size, enum fit_type type) { - struct vmap_area *lva; + struct vmap_area *lva = NULL; if (type == FL_FIT_TYPE) { /* @@ -972,7 +972,7 @@ adjust_va_to_fit_type(struct vmap_area *va, if (type != FL_FIT_TYPE) { augment_tree_propagate_from(va); - if (type == NE_FIT_TYPE) + if (lva) /* type == NE_FIT_TYPE */ insert_vmap_area_augment(lva, &va->rb_node, &free_vmap_area_root, &free_vmap_area_list); } diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 19d27bee285e..1555b0c6f7ec 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -160,10 +160,10 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev, struct in6_addr *daddr, struct sk_buff *skb) { - struct lowpan_peer *peer; - struct in6_addr *nexthop; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); int count = atomic_read(&dev->peer_count); + const struct in6_addr *nexthop; + struct lowpan_peer *peer; BT_DBG("peers %d addr %pI6c rt %p", count, daddr, rt); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 16f9159234a2..8c2ec35b6512 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -318,6 +318,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk static int ip_mc_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { + struct rtable *new_rt; int ret; ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); @@ -326,6 +327,17 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk, return ret; } + /* Reset rt_iif so that inet_iif() will return skb->skb_iif. Setting + * this to non-zero causes ipi_ifindex in in_pktinfo to be overwritten, + * see ipv4_pktinfo_prepare(). + */ + new_rt = rt_dst_clone(net->loopback_dev, skb_rtable(skb)); + if (new_rt) { + new_rt->rt_iif = 0; + skb_dst_drop(skb); + skb_dst_set(skb, &new_rt->dst); + } + return dev_loopback_xmit(net, sk, skb); } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 0b8e06ca75d6..40a6abbc9cf6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -197,7 +197,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) } sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, iph->saddr, iph->daddr, - skb->dev->ifindex, sdif); + dif, sdif); } out: read_unlock(&raw_v4_hashinfo.lock); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6cb7cff22db9..8ea0735a6754 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1647,6 +1647,39 @@ struct rtable *rt_dst_alloc(struct net_device *dev, } EXPORT_SYMBOL(rt_dst_alloc); +struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) +{ + struct rtable *new_rt; + + new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, + rt->dst.flags); + + if (new_rt) { + new_rt->rt_genid = rt_genid_ipv4(dev_net(dev)); + new_rt->rt_flags = rt->rt_flags; + new_rt->rt_type = rt->rt_type; + new_rt->rt_is_input = rt->rt_is_input; + new_rt->rt_iif = rt->rt_iif; + new_rt->rt_pmtu = rt->rt_pmtu; + new_rt->rt_mtu_locked = rt->rt_mtu_locked; + new_rt->rt_gw_family = rt->rt_gw_family; + if (rt->rt_gw_family == AF_INET) + new_rt->rt_gw4 = rt->rt_gw4; + else if (rt->rt_gw_family == AF_INET6) + new_rt->rt_gw6 = rt->rt_gw6; + INIT_LIST_HEAD(&new_rt->rt_uncached); + + new_rt->dst.flags |= DST_HOST; + new_rt->dst.input = rt->dst.input; + new_rt->dst.output = rt->dst.output; + new_rt->dst.error = rt->dst.error; + new_rt->dst.lastuse = jiffies; + new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate); + } + return new_rt; +} +EXPORT_SYMBOL(rt_dst_clone); + /* called in rcu_read_lock() section */ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 834475717110..21efcd02f337 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -59,8 +59,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + const struct in6_addr *nexthop; struct neighbour *neigh; - struct in6_addr *nexthop; int ret; if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 11ad62effd56..97a843cf164c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -218,7 +218,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, { const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); - return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr); + return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any), + dst->dev, skb, daddr); } static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) @@ -5281,7 +5282,7 @@ static struct ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &one, }, diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 241317473114..cdfc33517e85 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -439,9 +439,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, struct nf_flowtable *flow_table = priv; struct flow_offload_tuple tuple = {}; enum flow_offload_tuple_dir dir; + const struct in6_addr *nexthop; struct flow_offload *flow; struct net_device *outdev; - struct in6_addr *nexthop; struct ipv6hdr *ip6h; struct rt6_info *rt; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a29d66da7394..5f78df080573 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2401,6 +2401,9 @@ static void tpacket_destruct_skb(struct sk_buff *skb) ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); + + if (!packet_read_pending(&po->tx_ring)) + complete(&po->skb_completion); } sock_wfree(skb); @@ -2585,7 +2588,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame, static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct net_device *dev; struct virtio_net_hdr *vnet_hdr = NULL; struct sockcm_cookie sockc; @@ -2600,6 +2603,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) int len_sum = 0; int status = TP_STATUS_AVAILABLE; int hlen, tlen, copylen = 0; + long timeo = 0; mutex_lock(&po->pg_vec_lock); @@ -2646,12 +2650,21 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr) size_max = dev->mtu + reserve + VLAN_HLEN; + reinit_completion(&po->skb_completion); + do { ph = packet_current_frame(po, &po->tx_ring, TP_STATUS_SEND_REQUEST); if (unlikely(ph == NULL)) { - if (need_wait && need_resched()) - schedule(); + if (need_wait && skb) { + timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT); + timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo); + if (timeo <= 0) { + err = !timeo ? -ETIMEDOUT : -ERESTARTSYS; + goto out_put; + } + } + /* check for additional frames */ continue; } @@ -3207,6 +3220,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); po = pkt_sk(sk); + init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; po->xmit = dev_queue_xmit; @@ -4314,7 +4328,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, req3->tp_sizeof_priv || req3->tp_feature_req_word) { err = -EINVAL; - goto out; + goto out_free_pg_vec; } } break; @@ -4378,6 +4392,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, prb_shutdown_retire_blk_timer(po, rb_queue); } +out_free_pg_vec: if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: diff --git a/net/packet/internal.h b/net/packet/internal.h index 3bb7c5fb3bff..c70a2794456f 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -128,6 +128,7 @@ struct packet_sock { unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_tstamp; + struct completion skb_completion; struct net_device __rcu *cached_dev; int (*xmit)(struct sk_buff *skb); struct packet_type prot_hook ____cacheline_aligned_in_smp; diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index e16a3d37d2bc..732e109c3055 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -549,12 +549,17 @@ static struct notifier_block cbs_device_notifier = { static int __init cbs_module_init(void) { - int err = register_netdevice_notifier(&cbs_device_notifier); + int err; + err = register_netdevice_notifier(&cbs_device_notifier); if (err) return err; - return register_qdisc(&cbs_qdisc_ops); + err = register_qdisc(&cbs_qdisc_ops); + if (err) + unregister_netdevice_notifier(&cbs_device_notifier); + + return err; } static void __exit cbs_module_exit(void) diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index e358437ba29b..69cebb2c998b 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -118,10 +118,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the bind addr area */ sctp_bind_addr_init(&ep->base.bind_addr, 0); - /* Remember who we are attached to. */ - ep->base.sk = sk; - sock_hold(ep->base.sk); - /* Create the lists of associations. */ INIT_LIST_HEAD(&ep->asocs); @@ -154,6 +150,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, ep->prsctp_enable = net->sctp.prsctp_enable; ep->reconf_enable = net->sctp.reconf_enable; + /* Remember who we are attached to. */ + ep->base.sk = sk; + sock_hold(ep->base.sk); + return ep; nomem_shkey: diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 0c874e996f85..7621ec2f539c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2029,7 +2029,7 @@ static int __init smc_init(void) rc = smc_pnet_init(); if (rc) - return rc; + goto out_pernet_subsys; rc = smc_llc_init(); if (rc) { @@ -2080,6 +2080,9 @@ out_proto: proto_unregister(&smc_proto); out_pnet: smc_pnet_exit(); +out_pernet_subsys: + unregister_pernet_subsys(&smc_net_ops); + return rc; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2d2850adc2a3..4ca50ddf8d16 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -652,7 +652,10 @@ create: rc = smc_lgr_create(smc, ini); if (rc) goto out; + lgr = conn->lgr; + write_lock_bh(&lgr->conns_lock); smc_lgr_register_conn(conn); /* add smc conn to lgr */ + write_unlock_bh(&lgr->conns_lock); } conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c69951ed2ebc..36652352a38c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -950,6 +950,8 @@ static int xs_local_send_request(struct rpc_rqst *req) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; + rpc_fraghdr rm = xs_stream_record_marker(xdr); + unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; int status; int sent = 0; @@ -964,9 +966,7 @@ static int xs_local_send_request(struct rpc_rqst *req) req->rq_xtime = ktime_get(); status = xs_sendpages(transport->sock, NULL, 0, xdr, - transport->xmit.offset, - xs_stream_record_marker(xdr), - &sent); + transport->xmit.offset, rm, &sent); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - transport->xmit.offset, status); @@ -976,7 +976,7 @@ static int xs_local_send_request(struct rpc_rqst *req) if (likely(sent > 0) || status == 0) { transport->xmit.offset += sent; req->rq_bytes_sent = transport->xmit.offset; - if (likely(req->rq_bytes_sent >= req->rq_slen)) { + if (likely(req->rq_bytes_sent >= msglen)) { req->rq_xmit_bytes_sent += transport->xmit.offset; transport->xmit.offset = 0; return 0; @@ -1097,6 +1097,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req) struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; + rpc_fraghdr rm = xs_stream_record_marker(xdr); + unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; bool vm_wait = false; int status; int sent; @@ -1122,9 +1124,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) while (1) { sent = 0; status = xs_sendpages(transport->sock, NULL, 0, xdr, - transport->xmit.offset, - xs_stream_record_marker(xdr), - &sent); + transport->xmit.offset, rm, &sent); dprintk("RPC: xs_tcp_send_request(%u) = %d\n", xdr->len - transport->xmit.offset, status); @@ -1133,7 +1133,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) * reset the count of bytes sent. */ transport->xmit.offset += sent; req->rq_bytes_sent = transport->xmit.offset; - if (likely(req->rq_bytes_sent >= req->rq_slen)) { + if (likely(req->rq_bytes_sent >= msglen)) { req->rq_xmit_bytes_sent += transport->xmit.offset; transport->xmit.offset = 0; return 0; diff --git a/net/tipc/core.c b/net/tipc/core.c index ed536c05252a..c8370722f0bb 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -134,7 +134,7 @@ static int __init tipc_init(void) if (err) goto out_sysctl; - err = register_pernet_subsys(&tipc_net_ops); + err = register_pernet_device(&tipc_net_ops); if (err) goto out_pernet; @@ -142,7 +142,7 @@ static int __init tipc_init(void) if (err) goto out_socket; - err = register_pernet_subsys(&tipc_topsrv_net_ops); + err = register_pernet_device(&tipc_topsrv_net_ops); if (err) goto out_pernet_topsrv; @@ -153,11 +153,11 @@ static int __init tipc_init(void) pr_info("Started in single node mode\n"); return 0; out_bearer: - unregister_pernet_subsys(&tipc_topsrv_net_ops); + unregister_pernet_device(&tipc_topsrv_net_ops); out_pernet_topsrv: tipc_socket_stop(); out_socket: - unregister_pernet_subsys(&tipc_net_ops); + unregister_pernet_device(&tipc_net_ops); out_pernet: tipc_unregister_sysctl(); out_sysctl: @@ -172,9 +172,9 @@ out_netlink: static void __exit tipc_exit(void) { tipc_bearer_cleanup(); - unregister_pernet_subsys(&tipc_topsrv_net_ops); + unregister_pernet_device(&tipc_topsrv_net_ops); tipc_socket_stop(); - unregister_pernet_subsys(&tipc_net_ops); + unregister_pernet_device(&tipc_net_ops); tipc_netlink_stop(); tipc_netlink_compat_stop(); tipc_unregister_sysctl(); diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index c6a04c09d075..cf155061c472 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -445,7 +445,11 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -539,7 +543,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, name = (char *)TLV_DATA(msg->req); - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -817,7 +825,11 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, if (!link) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(name, len)) return -EINVAL; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index fc81ae18cc44..e2b69e805d46 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -279,7 +279,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) goto skip_tx_cleanup; } - if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) + if (unlikely(sk->sk_write_pending) && + !wait_on_pending_writer(sk, &timeo)) tls_handle_open_record(sk, 0); /* We need these for tls_sw_fallback handling of other packets */ diff --git a/samples/pidfd/pidfd-metadata.c b/samples/pidfd/pidfd-metadata.c index 14b454448429..c459155daf9a 100644 --- a/samples/pidfd/pidfd-metadata.c +++ b/samples/pidfd/pidfd-metadata.c @@ -83,7 +83,7 @@ static int pidfd_metadata_fd(pid_t pid, int pidfd) int main(int argc, char *argv[]) { - int pidfd = 0, ret = EXIT_FAILURE; + int pidfd = -1, ret = EXIT_FAILURE; char buf[4096] = { 0 }; pid_t pid; int procfd, statusfd; @@ -91,7 +91,11 @@ int main(int argc, char *argv[]) pid = pidfd_clone(CLONE_PIDFD, &pidfd); if (pid < 0) - exit(ret); + err(ret, "CLONE_PIDFD"); + if (pidfd == -1) { + warnx("CLONE_PIDFD is not supported by the kernel"); + goto out; + } procfd = pidfd_metadata_fd(pid, pidfd); close(pidfd); diff --git a/tools/arch/x86/include/uapi/asm/perf_regs.h b/tools/arch/x86/include/uapi/asm/perf_regs.h index ac67bbea10ca..7c9d2bb3833b 100644 --- a/tools/arch/x86/include/uapi/asm/perf_regs.h +++ b/tools/arch/x86/include/uapi/asm/perf_regs.h @@ -52,4 +52,7 @@ enum perf_event_x86_regs { /* These include both GPRs and XMMX registers */ PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2, }; + +#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1)) + #endif /* _ASM_X86_PERF_REGS_H */ diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index b7cd91a9014f..b7321337d100 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -9,7 +9,6 @@ void perf_regs_load(u64 *regs); #define PERF_REGS_MAX PERF_REG_X86_XMM_MAX -#define PERF_XMM_REGS_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1)) #ifndef HAVE_ARCH_X86_64_SUPPORT #define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32 diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 7886ca5263e3..3666c0076df9 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -277,7 +277,7 @@ uint64_t arch__intr_reg_mask(void) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, .sample_type = PERF_SAMPLE_REGS_INTR, - .sample_regs_intr = PERF_XMM_REGS_MASK, + .sample_regs_intr = PERF_REG_EXTENDED_MASK, .precise_ip = 1, .disabled = 1, .exclude_kernel = 1, @@ -293,7 +293,7 @@ uint64_t arch__intr_reg_mask(void) fd = sys_perf_event_open(&attr, 0, -1, -1, 0); if (fd != -1) { close(fd); - return (PERF_XMM_REGS_MASK | PERF_REGS_MASK); + return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK); } return PERF_REGS_MASK; diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 698c08f851b8..8995092d541e 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -279,6 +279,51 @@ static void idr_align_test(struct idr *idr) } } +DEFINE_IDR(find_idr); + +static void *idr_throbber(void *arg) +{ + time_t start = time(NULL); + int id = *(int *)arg; + + rcu_register_thread(); + do { + idr_alloc(&find_idr, xa_mk_value(id), id, id + 1, GFP_KERNEL); + idr_remove(&find_idr, id); + } while (time(NULL) < start + 10); + rcu_unregister_thread(); + + return NULL; +} + +void idr_find_test_1(int anchor_id, int throbber_id) +{ + pthread_t throbber; + time_t start = time(NULL); + + pthread_create(&throbber, NULL, idr_throbber, &throbber_id); + + BUG_ON(idr_alloc(&find_idr, xa_mk_value(anchor_id), anchor_id, + anchor_id + 1, GFP_KERNEL) != anchor_id); + + do { + int id = 0; + void *entry = idr_get_next(&find_idr, &id); + BUG_ON(entry != xa_mk_value(id)); + } while (time(NULL) < start + 11); + + pthread_join(throbber, NULL); + + idr_remove(&find_idr, anchor_id); + BUG_ON(!idr_is_empty(&find_idr)); +} + +void idr_find_test(void) +{ + idr_find_test_1(100000, 0); + idr_find_test_1(0, 100000); +} + void idr_checks(void) { unsigned long i; @@ -360,6 +405,7 @@ void idr_checks(void) idr_u32_test(1); idr_u32_test(0); idr_align_test(&idr); + idr_find_test(); } #define module_init(x) diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index ba919308fe30..d503b8764a8e 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -3,4 +3,5 @@ subpage_prot tempfile prot_sao segv_errors -wild_bctr
\ No newline at end of file +wild_bctr +large_vm_fork_separation
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 43d68420e363..f1fbc15800c4 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,7 +2,8 @@ noarg: $(MAKE) -C ../ -TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr +TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ + large_vm_fork_separation TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. @@ -13,6 +14,7 @@ $(TEST_GEN_PROGS): ../harness.c $(OUTPUT)/prot_sao: ../utils.c $(OUTPUT)/wild_bctr: CFLAGS += -m64 +$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 diff --git a/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c new file mode 100644 index 000000000000..2363a7f3ab0d --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright 2019, Michael Ellerman, IBM Corp. +// +// Test that allocating memory beyond the memory limit and then forking is +// handled correctly, ie. the child is able to access the mappings beyond the +// memory limit and the child's writes are not visible to the parent. + +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "utils.h" + + +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB +#endif + + +static int test(void) +{ + int p2c[2], c2p[2], rc, status, c, *p; + unsigned long page_size; + pid_t pid; + + page_size = sysconf(_SC_PAGESIZE); + SKIP_IF(page_size != 65536); + + // Create a mapping at 512TB to allocate an extended_id + p = mmap((void *)(512ul << 40), page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0); + if (p == MAP_FAILED) { + perror("mmap"); + printf("Error: couldn't mmap(), confirm kernel has 4TB support?\n"); + return 1; + } + + printf("parent writing %p = 1\n", p); + *p = 1; + + FAIL_IF(pipe(p2c) == -1 || pipe(c2p) == -1); + + pid = fork(); + if (pid == 0) { + FAIL_IF(read(p2c[0], &c, 1) != 1); + + pid = getpid(); + printf("child writing %p = %d\n", p, pid); + *p = pid; + + FAIL_IF(write(c2p[1], &c, 1) != 1); + FAIL_IF(read(p2c[0], &c, 1) != 1); + exit(0); + } + + c = 0; + FAIL_IF(write(p2c[1], &c, 1) != 1); + FAIL_IF(read(c2p[0], &c, 1) != 1); + + // Prevent compiler optimisation + barrier(); + + rc = 0; + printf("parent reading %p = %d\n", p, *p); + if (*p != 1) { + printf("Error: BUG! parent saw child's write! *p = %d\n", *p); + rc = 1; + } + + FAIL_IF(write(p2c[1], &c, 1) != 1); + FAIL_IF(waitpid(pid, &status, 0) == -1); + FAIL_IF(!WIFEXITED(status) || WEXITSTATUS(status)); + + if (rc == 0) + printf("success: test completed OK\n"); + + return rc; +} + +int main(void) +{ + return test_harness(test, "large_vm_fork_separation"); +} |