diff options
407 files changed, 4390 insertions, 1950 deletions
@@ -70,6 +70,7 @@ Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@bootlin.com> Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com> Brian Avery <b.avery@hp.com> Brian King <brking@us.ibm.com> +Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com> Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com> Changbin Du <changbin.du@intel.com> <changbin.du@intel.com> Chao Yu <chao@kernel.org> <chao2.yu@samsung.com> diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst index f2b3439edcc2..860fe651d645 100644 --- a/Documentation/accounting/psi.rst +++ b/Documentation/accounting/psi.rst @@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more than one trigger for the same psi metric can be specified. However for each trigger a separate file descriptor is required to be able to poll it separately from others, therefore for each trigger a separate open() syscall should be made even -when opening the same psi interface file. +when opening the same psi interface file. Write operations to a file descriptor +with an already existing psi trigger will fail with EBUSY. Monitors activate only when system enters stall state for the monitored psi metric and deactivates upon exit from the stall state. While system is diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst index 9485a5a2e2e9..2f41caa0096c 100644 --- a/Documentation/arm/marvell.rst +++ b/Documentation/arm/marvell.rst @@ -266,10 +266,12 @@ Avanta family ------------- Flavors: + - 88F6500 - 88F6510 - 88F6530P - 88F6550 - 88F6560 + - 88F6601 Homepage: https://web.archive.org/web/20181005145041/http://www.marvell.com/broadband/ diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 5342e895fb60..0ec7b7f1524b 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -52,6 +52,12 @@ stable kernels. | Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2038923 | ARM64_ERRATUM_2038923 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #1902691 | ARM64_ERRATUM_1902691 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | @@ -92,12 +98,18 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt index 0968b40aef1e..e3501bfa22e9 100644 --- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt +++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt @@ -31,7 +31,7 @@ tcan4x5x: tcan4x5x@0 { #address-cells = <1>; #size-cells = <1>; spi-max-frequency = <10000000>; - bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; + bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>; interrupt-parent = <&gpio1>; interrupts = <14 IRQ_TYPE_LEVEL_LOW>; device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; diff --git a/Documentation/index.rst b/Documentation/index.rst index 2b4de3926858..b58692d687f6 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -166,6 +166,7 @@ to ReStructured Text format, or are simply too old. .. toctree:: :maxdepth: 2 + tools/index staging/index watch_queue diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst index e6cd40663ea5..4cbd50edf277 100644 --- a/Documentation/kernel-hacking/locking.rst +++ b/Documentation/kernel-hacking/locking.rst @@ -295,7 +295,7 @@ Pete Zaitcev gives the following summary: - If you are in a process context (any syscall) and want to lock other process out, use a mutex. You can take a mutex and sleep - (``copy_from_user*(`` or ``kmalloc(x,GFP_KERNEL)``). + (``copy_from_user()`` or ``kmalloc(x,GFP_KERNEL)``). - Otherwise (== data can be touched in an interrupt), use spin_lock_irqsave() and diff --git a/Documentation/tools/index.rst b/Documentation/tools/index.rst new file mode 100644 index 000000000000..0bb1e61bdcc0 --- /dev/null +++ b/Documentation/tools/index.rst @@ -0,0 +1,20 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============ +Kernel tools +============ + +This book covers user-space tools that are shipped with the kernel source; +more additions are needed here: + +.. toctree:: + :maxdepth: 1 + + rtla/index + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/tools/rtla/index.rst b/Documentation/tools/rtla/index.rst new file mode 100644 index 000000000000..840f0bf3e803 --- /dev/null +++ b/Documentation/tools/rtla/index.rst @@ -0,0 +1,26 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================ +The realtime Linux analysis tool +================================ + +RTLA provides a set of tools for the analysis of the kernel's realtime +behavior on specific hardware. + +.. toctree:: + :maxdepth: 1 + + rtla + rtla-osnoise + rtla-osnoise-hist + rtla-osnoise-top + rtla-timerlat + rtla-timerlat-hist + rtla-timerlat-top + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index bb8cfddbb22d..a4267104db50 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -3268,6 +3268,7 @@ number. :Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, KVM_CAP_VCPU_ATTRIBUTES for vcpu device + KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device (no set) :Type: device ioctl, vm ioctl, vcpu ioctl :Parameters: struct kvm_device_attr :Returns: 0 on success, -1 on error @@ -3302,7 +3303,8 @@ transferred is defined by the particular attribute. ------------------------ :Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, - KVM_CAP_VCPU_ATTRIBUTES for vcpu device + KVM_CAP_VCPU_ATTRIBUTES for vcpu device + KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device :Type: device ioctl, vm ioctl, vcpu ioctl :Parameters: struct kvm_device_attr :Returns: 0 on success, -1 on error diff --git a/Documentation/vm/page_table_check.rst b/Documentation/vm/page_table_check.rst index 81f521ff7ea7..1a09472f10a3 100644 --- a/Documentation/vm/page_table_check.rst +++ b/Documentation/vm/page_table_check.rst @@ -9,7 +9,7 @@ Page Table Check Introduction ============ -Page table check allows to hardern the kernel by ensuring that some types of +Page table check allows to harden the kernel by ensuring that some types of the memory corruptions are prevented. Page table check performs extra verifications at the time when new pages become diff --git a/MAINTAINERS b/MAINTAINERS index ea3e6c914384..f41088418aae 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -190,8 +190,9 @@ M: Johannes Berg <johannes@sipsolutions.net> L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git +Q: https://patchwork.kernel.org/project/linux-wireless/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git F: Documentation/driver-api/80211/cfg80211.rst F: Documentation/networking/regulatory.rst F: include/linux/ieee80211.h @@ -7208,8 +7209,10 @@ F: drivers/net/mdio/of_mdio.c F: drivers/net/pcs/ F: drivers/net/phy/ F: include/dt-bindings/net/qca-ar803x.h +F: include/linux/linkmode.h F: include/linux/*mdio*.h F: include/linux/mdio/*.h +F: include/linux/mii.h F: include/linux/of_net.h F: include/linux/phy.h F: include/linux/phy_fixed.h @@ -11366,8 +11369,9 @@ M: Johannes Berg <johannes@sipsolutions.net> L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git +Q: https://patchwork.kernel.org/project/linux-wireless/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git F: Documentation/networking/mac80211-injection.rst F: Documentation/networking/mac80211_hwsim/mac80211_hwsim.rst F: drivers/net/wireless/mac80211_hwsim.[ch] @@ -13374,9 +13378,10 @@ NETWORKING DRIVERS (WIRELESS) M: Kalle Valo <kvalo@kernel.org> L: linux-wireless@vger.kernel.org S: Maintained -Q: http://patchwork.kernel.org/project/linux-wireless/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git +W: https://wireless.wiki.kernel.org/ +Q: https://patchwork.kernel.org/project/linux-wireless/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git F: Documentation/devicetree/bindings/net/wireless/ F: drivers/net/wireless/ @@ -13449,7 +13454,11 @@ L: netdev@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git F: arch/x86/net/* +F: include/linux/ip.h +F: include/linux/ipv6* +F: include/net/fib* F: include/net/ip* +F: include/net/route.h F: net/ipv4/ F: net/ipv6/ @@ -13510,10 +13519,6 @@ F: include/net/tls.h F: include/uapi/linux/tls.h F: net/tls/* -NETWORKING [WIRELESS] -L: linux-wireless@vger.kernel.org -Q: http://patchwork.kernel.org/project/linux-wireless/list/ - NETXEN (1/10) GbE SUPPORT M: Manish Chopra <manishc@marvell.com> M: Rahul Verma <rahulv@marvell.com> @@ -16532,8 +16537,9 @@ M: Johannes Berg <johannes@sipsolutions.net> L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git +Q: https://patchwork.kernel.org/project/linux-wireless/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git F: Documentation/ABI/stable/sysfs-class-rfkill F: Documentation/driver-api/rfkill.rst F: include/linux/rfkill.h diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fabe39169b12..4c97cb40eebb 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -83,6 +83,7 @@ config ARM select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 select HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT + select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_DEBUG_KMEMLEAK if !XIP_KERNEL select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 7d23d4bb2168..6fe67963ba5a 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -288,6 +288,7 @@ */ #define ALT_UP(instr...) \ .pushsection ".alt.smp.init", "a" ;\ + .align 2 ;\ .long 9998b - . ;\ 9997: instr ;\ .if . - 9997b == 2 ;\ @@ -299,6 +300,7 @@ .popsection #define ALT_UP_B(label) \ .pushsection ".alt.smp.init", "a" ;\ + .align 2 ;\ .long 9998b - . ;\ W(b) . + (label - 9998b) ;\ .popsection diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 6af68edfa53a..bdc35c0e8dfb 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -96,6 +96,7 @@ unsigned long __get_wchan(struct task_struct *p); #define __ALT_SMP_ASM(smp, up) \ "9998: " smp "\n" \ " .pushsection \".alt.smp.init\", \"a\"\n" \ + " .align 2\n" \ " .long 9998b - .\n" \ " " up "\n" \ " .popsection\n" diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 36fbc3329252..32dbfd81f42a 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -11,6 +11,7 @@ #include <linux/string.h> #include <asm/memory.h> #include <asm/domain.h> +#include <asm/unaligned.h> #include <asm/unified.h> #include <asm/compiler.h> @@ -497,7 +498,10 @@ do { \ } \ default: __err = __get_user_bad(); break; \ } \ - *(type *)(dst) = __val; \ + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) \ + put_unaligned(__val, (type *)(dst)); \ + else \ + *(type *)(dst) = __val; /* aligned by caller */ \ if (__err) \ goto err_label; \ } while (0) @@ -507,7 +511,9 @@ do { \ const type *__pk_ptr = (dst); \ unsigned long __dst = (unsigned long)__pk_ptr; \ int __err = 0; \ - type __val = *(type *)src; \ + type __val = IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ + ? get_unaligned((type *)(src)) \ + : *(type *)(src); /* aligned by caller */ \ switch (sizeof(type)) { \ case 1: __put_user_asm_byte(__val, __dst, __err, ""); break; \ case 2: __put_user_asm_half(__val, __dst, __err, ""); break; \ diff --git a/arch/arm/probes/kprobes/Makefile b/arch/arm/probes/kprobes/Makefile index 14db56f49f0a..6159010dac4a 100644 --- a/arch/arm/probes/kprobes/Makefile +++ b/arch/arm/probes/kprobes/Makefile @@ -1,4 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +KASAN_SANITIZE_actions-common.o := n +KASAN_SANITIZE_actions-arm.o := n +KASAN_SANITIZE_actions-thumb.o := n obj-$(CONFIG_KPROBES) += core.o actions-common.o checkers-common.o obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o test-kprobes-objs := test-core.o diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6978140edfa4..f2b5a4abef21 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -670,15 +670,25 @@ config ARM64_ERRATUM_1508412 config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE bool +config ARM64_ERRATUM_2051678 + bool "Cortex-A510: 2051678: disable Hardware Update of the page table dirty bit" + help + This options adds the workaround for ARM Cortex-A510 erratum ARM64_ERRATUM_2051678. + Affected Coretex-A510 might not respect the ordering rules for + hardware update of the page table's dirty bit. The workaround + is to not enable the feature on affected CPUs. + + If unsure, say Y. + config ARM64_ERRATUM_2119858 - bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode" + bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode" default y depends on CORESIGHT_TRBE select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE help - This option adds the workaround for ARM Cortex-A710 erratum 2119858. + This option adds the workaround for ARM Cortex-A710/X2 erratum 2119858. - Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace + Affected Cortex-A710/X2 cores could overwrite up to 3 cache lines of trace data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in the event of a WRAP event. @@ -761,14 +771,14 @@ config ARM64_ERRATUM_2253138 If unsure, say Y. config ARM64_ERRATUM_2224489 - bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range" + bool "Cortex-A710/X2: 2224489: workaround TRBE writing to address out-of-range" depends on CORESIGHT_TRBE default y select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE help - This option adds the workaround for ARM Cortex-A710 erratum 2224489. + This option adds the workaround for ARM Cortex-A710/X2 erratum 2224489. - Affected Cortex-A710 cores might write to an out-of-range address, not reserved + Affected Cortex-A710/X2 cores might write to an out-of-range address, not reserved for TRBE. Under some conditions, the TRBE might generate a write to the next virtually addressed page following the last page of the TRBE address space (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base. @@ -778,6 +788,65 @@ config ARM64_ERRATUM_2224489 If unsure, say Y. +config ARM64_ERRATUM_2064142 + bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 2064142. + + Affected Cortex-A510 core might fail to write into system registers after the + TRBE has been disabled. Under some conditions after the TRBE has been disabled + writes into TRBE registers TRBLIMITR_EL1, TRBPTR_EL1, TRBBASER_EL1, TRBSR_EL1, + and TRBTRG_EL1 will be ignored and will not be effected. + + Work around this in the driver by executing TSB CSYNC and DSB after collection + is stopped and before performing a system register write to one of the affected + registers. + + If unsure, say Y. + +config ARM64_ERRATUM_2038923 + bool "Cortex-A510: 2038923: workaround TRBE corruption with enable" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 2038923. + + Affected Cortex-A510 core might cause an inconsistent view on whether trace is + prohibited within the CPU. As a result, the trace buffer or trace buffer state + might be corrupted. This happens after TRBE buffer has been enabled by setting + TRBLIMITR_EL1.E, followed by just a single context synchronization event before + execution changes from a context, in which trace is prohibited to one where it + isn't, or vice versa. In these mentioned conditions, the view of whether trace + is prohibited is inconsistent between parts of the CPU, and the trace buffer or + the trace buffer state might be corrupted. + + Work around this in the driver by preventing an inconsistent view of whether the + trace is prohibited or not based on TRBLIMITR_EL1.E by immediately following a + change to TRBLIMITR_EL1.E with at least one ISB instruction before an ERET, or + two ISB instructions if no ERET is to take place. + + If unsure, say Y. + +config ARM64_ERRATUM_1902691 + bool "Cortex-A510: 1902691: workaround TRBE trace corruption" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 1902691. + + Affected Cortex-A510 core might cause trace data corruption, when being written + into the memory. Effectively TRBE is broken and hence cannot be used to capture + trace data. + + Work around this problem in the driver by just preventing TRBE initialization on + affected cpus. The firmware must have disabled the access to TRBE for the kernel + on such implementations. This will cover the kernel for any firmware that doesn't + do this already. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 19b8441aa8f2..999b9149f856 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -73,7 +73,9 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define APM_CPU_PART_POTENZA 0x000 @@ -115,7 +117,9 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9e1c1aef9ebd..066098198c24 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -347,6 +347,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_2119858 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0), #endif {}, }; @@ -371,6 +372,7 @@ static struct midr_range trbe_write_out_of_range_cpus[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_2224489 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0), #endif {}, }; @@ -598,6 +600,33 @@ const struct arm64_cpu_capabilities arm64_errata[] = { CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2064142 + { + .desc = "ARM erratum 2064142", + .capability = ARM64_WORKAROUND_2064142, + + /* Cortex-A510 r0p0 - r0p2 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2) + }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_2038923 + { + .desc = "ARM erratum 2038923", + .capability = ARM64_WORKAROUND_2038923, + + /* Cortex-A510 r0p0 - r0p2 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2) + }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1902691 + { + .desc = "ARM erratum 1902691", + .capability = ARM64_WORKAROUND_1902691, + + /* Cortex-A510 r0p0 - r0p1 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1) + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a46ab3b1c4d5..e5f23dab1c8d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1646,6 +1646,9 @@ static bool cpu_has_broken_dbm(void) /* Kryo4xx Silver (rdpe => r1p0) */ MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe), #endif +#ifdef CONFIG_ARM64_ERRATUM_2051678 + MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2), +#endif {}, }; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 0fb58fed54cb..e4103e085681 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -33,8 +33,8 @@ */ -static void start_backtrace(struct stackframe *frame, unsigned long fp, - unsigned long pc) +static notrace void start_backtrace(struct stackframe *frame, unsigned long fp, + unsigned long pc) { frame->fp = fp; frame->pc = pc; @@ -55,6 +55,7 @@ static void start_backtrace(struct stackframe *frame, unsigned long fp, frame->prev_fp = 0; frame->prev_type = STACK_TYPE_UNKNOWN; } +NOKPROBE_SYMBOL(start_backtrace); /* * Unwind from one frame record (A) to the next frame record (B). diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 60813497a381..172452f79e46 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -29,8 +29,11 @@ ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO +# -Wmissing-prototypes and -Wmissing-declarations are removed from +# the CFLAGS of vgettimeofday.c to make possible to build the +# kernel with CONFIG_WERROR enabled. CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \ - $(CC_FLAGS_LTO) + $(CC_FLAGS_LTO) -Wmissing-prototypes -Wmissing-declarations KASAN_SANITIZE := n KCSAN_SANITIZE := n UBSAN_SANITIZE := n diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 0418399e0a20..c5d009715402 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) { - write_sysreg_el1(val, SYS_SPSR); + if (has_vhe()) + write_sysreg_el1(val, SYS_SPSR); + else + __vcpu_sys_reg(vcpu, SPSR_EL1) = val; } static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 844a6f003fd5..2cb3867eb7c2 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -983,13 +983,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, */ stage2_put_pte(ptep, mmu, addr, level, mm_ops); - if (need_flush) { - kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops); - - dcache_clean_inval_poc((unsigned long)pte_follow, - (unsigned long)pte_follow + - kvm_granule_size(level)); - } + if (need_flush && mm_ops->dcache_clean_inval_poc) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); if (childp) mm_ops->put_page(childp); @@ -1151,15 +1147,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct kvm_pgtable *pgt = arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; kvm_pte_t pte = *ptep; - kvm_pte_t *pte_follow; if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) return 0; - pte_follow = kvm_pte_follow(pte, mm_ops); - dcache_clean_inval_poc((unsigned long)pte_follow, - (unsigned long)pte_follow + - kvm_granule_size(level)); + if (mm_ops->dcache_clean_inval_poc) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); return 0; } diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 20db2f281cf2..4fb419f7b8b6 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -983,6 +983,9 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; /* IDbits */ val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; + /* SEIS */ + if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) + val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT); /* A3V */ val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; /* EOImode */ diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index a33d4366b326..b549af8b1dc2 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -609,6 +609,18 @@ static int __init early_gicv4_enable(char *buf) } early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); +static const struct midr_range broken_seis[] = { + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), + {}, +}; + +static bool vgic_v3_broken_seis(void) +{ + return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) && + is_midr_in_range_list(read_cpuid_id(), broken_seis)); +} + /** * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller * @info: pointer to the GIC description @@ -676,9 +688,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info) group1_trap = true; } - if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) { - kvm_info("GICv3 with locally generated SEI\n"); + if (vgic_v3_broken_seis()) { + kvm_info("GICv3 with broken locally generated SEI\n"); + kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK; group0_trap = true; group1_trap = true; if (ich_vtr_el2 & ICH_VTR_TDS_MASK) diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index c0181e60cc98..489455309695 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -40,8 +40,8 @@ static bool ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, struct pt_regs *regs) { - int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type); - int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type); + int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data); + int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data); unsigned long data, addr, offset; addr = pt_regs_read_reg(regs, reg_addr); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 870c39537dd0..e7719e8f18de 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -55,6 +55,9 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_2064142 +WORKAROUND_2038923 +WORKAROUND_1902691 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_TRBE_WRITE_OUT_OF_RANGE diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h index 6ffdd4b5e1d0..336ac9b65235 100644 --- a/arch/mips/include/asm/asm.h +++ b/arch/mips/include/asm/asm.h @@ -285,7 +285,7 @@ symbol = value #define PTR_SCALESHIFT 2 -#define PTR .word +#define PTR_WD .word #define PTRSIZE 4 #define PTRLOG 2 #endif @@ -310,7 +310,7 @@ symbol = value #define PTR_SCALESHIFT 3 -#define PTR .dword +#define PTR_WD .dword #define PTRSIZE 8 #define PTRLOG 3 #endif diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h index b463f2aa5a61..db497a8167da 100644 --- a/arch/mips/include/asm/ftrace.h +++ b/arch/mips/include/asm/ftrace.h @@ -32,7 +32,7 @@ do { \ ".previous\n" \ \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR) "\t1b, 3b\n\t" \ + STR(PTR_WD) "\t1b, 3b\n\t" \ ".previous\n" \ \ : [tmp_dst] "=&r" (dst), [tmp_err] "=r" (error)\ @@ -54,7 +54,7 @@ do { \ ".previous\n" \ \ ".section\t__ex_table,\"a\"\n\t"\ - STR(PTR) "\t1b, 3b\n\t" \ + STR(PTR_WD) "\t1b, 3b\n\t" \ ".previous\n" \ \ : [tmp_err] "=r" (error) \ diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index af3788589ee6..431a1c9d53fc 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -119,7 +119,7 @@ static inline void flush_scache_line(unsigned long addr) " j 2b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ - " "STR(PTR)" 1b, 3b \n" \ + " "STR(PTR_WD)" 1b, 3b \n" \ " .previous" \ : "+r" (__err) \ : "i" (op), "r" (addr), "i" (-EFAULT)); \ @@ -142,7 +142,7 @@ static inline void flush_scache_line(unsigned long addr) " j 2b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ - " "STR(PTR)" 1b, 3b \n" \ + " "STR(PTR_WD)" 1b, 3b \n" \ " .previous" \ : "+r" (__err) \ : "i" (op), "r" (addr), "i" (-EFAULT)); \ diff --git a/arch/mips/include/asm/unaligned-emul.h b/arch/mips/include/asm/unaligned-emul.h index 2022b18944b9..9af0f4d3d288 100644 --- a/arch/mips/include/asm/unaligned-emul.h +++ b/arch/mips/include/asm/unaligned-emul.h @@ -20,8 +20,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -41,8 +41,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -74,10 +74,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -102,8 +102,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -125,8 +125,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -145,8 +145,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -178,10 +178,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -223,14 +223,14 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t5b, 11b\n\t" \ + STR(PTR_WD)"\t6b, 11b\n\t" \ + STR(PTR_WD)"\t7b, 11b\n\t" \ + STR(PTR_WD)"\t8b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -255,8 +255,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT));\ @@ -276,8 +276,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ @@ -296,8 +296,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ @@ -325,10 +325,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT) \ @@ -365,14 +365,14 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t5b, 11b\n\t" \ + STR(PTR_WD)"\t6b, 11b\n\t" \ + STR(PTR_WD)"\t7b, 11b\n\t" \ + STR(PTR_WD)"\t8b, 11b\n\t" \ ".previous" \ : "=&r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT) \ @@ -398,8 +398,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -419,8 +419,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -452,10 +452,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -481,8 +481,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -504,8 +504,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -524,8 +524,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -557,10 +557,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -602,14 +602,14 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t5b, 11b\n\t" \ + STR(PTR_WD)"\t6b, 11b\n\t" \ + STR(PTR_WD)"\t7b, 11b\n\t" \ + STR(PTR_WD)"\t8b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -632,8 +632,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT));\ @@ -653,8 +653,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ @@ -673,8 +673,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ @@ -703,10 +703,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT) \ @@ -743,14 +743,14 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t5b, 11b\n\t" \ + STR(PTR_WD)"\t6b, 11b\n\t" \ + STR(PTR_WD)"\t7b, 11b\n\t" \ + STR(PTR_WD)"\t8b, 11b\n\t" \ ".previous" \ : "=&r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT) \ diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index a39ec755e4c2..750fe569862b 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -1258,10 +1258,10 @@ fpu_emul: " j 10b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1333,10 +1333,10 @@ fpu_emul: " j 10b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1404,10 +1404,10 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1474,10 +1474,10 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1589,14 +1589,14 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" - STR(PTR) " 5b,8b\n" - STR(PTR) " 6b,8b\n" - STR(PTR) " 7b,8b\n" - STR(PTR) " 0b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" + STR(PTR_WD) " 5b,8b\n" + STR(PTR_WD) " 6b,8b\n" + STR(PTR_WD) " 7b,8b\n" + STR(PTR_WD) " 0b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1708,14 +1708,14 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" - STR(PTR) " 5b,8b\n" - STR(PTR) " 6b,8b\n" - STR(PTR) " 7b,8b\n" - STR(PTR) " 0b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" + STR(PTR_WD) " 5b,8b\n" + STR(PTR_WD) " 6b,8b\n" + STR(PTR_WD) " 7b,8b\n" + STR(PTR_WD) " 0b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1827,14 +1827,14 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" - STR(PTR) " 5b,8b\n" - STR(PTR) " 6b,8b\n" - STR(PTR) " 7b,8b\n" - STR(PTR) " 0b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" + STR(PTR_WD) " 5b,8b\n" + STR(PTR_WD) " 6b,8b\n" + STR(PTR_WD) " 7b,8b\n" + STR(PTR_WD) " 0b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1945,14 +1945,14 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" - STR(PTR) " 5b,8b\n" - STR(PTR) " 6b,8b\n" - STR(PTR) " 7b,8b\n" - STR(PTR) " 0b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" + STR(PTR_WD) " 5b,8b\n" + STR(PTR_WD) " 6b,8b\n" + STR(PTR_WD) " 7b,8b\n" + STR(PTR_WD) " 0b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -2007,7 +2007,7 @@ fpu_emul: "j 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" - STR(PTR) " 1b,3b\n" + STR(PTR_WD) " 1b,3b\n" ".previous\n" : "=&r"(res), "+&r"(err) : "r"(vaddr), "i"(SIGSEGV) @@ -2065,7 +2065,7 @@ fpu_emul: "j 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" - STR(PTR) " 1b,3b\n" + STR(PTR_WD) " 1b,3b\n" ".previous\n" : "+&r"(res), "+&r"(err) : "r"(vaddr), "i"(SIGSEGV)); @@ -2126,7 +2126,7 @@ fpu_emul: "j 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" - STR(PTR) " 1b,3b\n" + STR(PTR_WD) " 1b,3b\n" ".previous\n" : "=&r"(res), "+&r"(err) : "r"(vaddr), "i"(SIGSEGV) @@ -2189,7 +2189,7 @@ fpu_emul: "j 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" - STR(PTR) " 1b,3b\n" + STR(PTR_WD) " 1b,3b\n" ".previous\n" : "+&r"(res), "+&r"(err) : "r"(vaddr), "i"(SIGSEGV)); diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index cbf6db98cfb3..2748c55820c2 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -23,14 +23,14 @@ #define EX(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,fault; \ + PTR_WD 9b,fault; \ .previous #define EX2(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,fault; \ - PTR 9b+4,fault; \ + PTR_WD 9b,fault; \ + PTR_WD 9b+4,fault; \ .previous .set mips1 diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index b91e91106475..2e687c60bc4f 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -31,7 +31,7 @@ .ex\@: \insn \reg, \src .set pop .section __ex_table,"a" - PTR .ex\@, fault + PTR_WD .ex\@, fault .previous .endm diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index f3c908abdbb8..cfde14b48fd8 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -147,10 +147,10 @@ LEAF(kexec_smp_wait) kexec_args: EXPORT(kexec_args) -arg0: PTR 0x0 -arg1: PTR 0x0 -arg2: PTR 0x0 -arg3: PTR 0x0 +arg0: PTR_WD 0x0 +arg1: PTR_WD 0x0 +arg2: PTR_WD 0x0 +arg3: PTR_WD 0x0 .size kexec_args,PTRSIZE*4 #ifdef CONFIG_SMP @@ -161,10 +161,10 @@ arg3: PTR 0x0 */ secondary_kexec_args: EXPORT(secondary_kexec_args) -s_arg0: PTR 0x0 -s_arg1: PTR 0x0 -s_arg2: PTR 0x0 -s_arg3: PTR 0x0 +s_arg0: PTR_WD 0x0 +s_arg1: PTR_WD 0x0 +s_arg2: PTR_WD 0x0 +s_arg3: PTR_WD 0x0 .size secondary_kexec_args,PTRSIZE*4 kexec_flag: LONG 0x1 @@ -173,17 +173,17 @@ kexec_flag: kexec_start_address: EXPORT(kexec_start_address) - PTR 0x0 + PTR_WD 0x0 .size kexec_start_address, PTRSIZE kexec_indirection_page: EXPORT(kexec_indirection_page) - PTR 0 + PTR_WD 0 .size kexec_indirection_page, PTRSIZE relocate_new_kernel_end: relocate_new_kernel_size: EXPORT(relocate_new_kernel_size) - PTR relocate_new_kernel_end - relocate_new_kernel + PTR_WD relocate_new_kernel_end - relocate_new_kernel .size relocate_new_kernel_size, PTRSIZE diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index b1b2e106f711..9bfce5f75f60 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -72,10 +72,10 @@ loads_done: .set pop .section __ex_table,"a" - PTR load_a4, bad_stack_a4 - PTR load_a5, bad_stack_a5 - PTR load_a6, bad_stack_a6 - PTR load_a7, bad_stack_a7 + PTR_WD load_a4, bad_stack_a4 + PTR_WD load_a5, bad_stack_a5 + PTR_WD load_a6, bad_stack_a6 + PTR_WD load_a7, bad_stack_a7 .previous lw t0, TI_FLAGS($28) # syscall tracing enabled? @@ -216,7 +216,7 @@ einval: li v0, -ENOSYS #endif /* CONFIG_MIPS_MT_FPAFF */ #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) -#define __SYSCALL(nr, entry) PTR entry +#define __SYSCALL(nr, entry) PTR_WD entry .align 2 .type sys_call_table, @object EXPORT(sys_call_table) diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index f650c55a17dc..97456b2ca7dc 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -101,7 +101,7 @@ not_n32_scall: END(handle_sysn32) -#define __SYSCALL(nr, entry) PTR entry +#define __SYSCALL(nr, entry) PTR_WD entry .type sysn32_call_table, @object EXPORT(sysn32_call_table) #include <asm/syscall_table_n32.h> diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S index 5d7bfc65e4d0..5f6ed4b4c399 100644 --- a/arch/mips/kernel/scall64-n64.S +++ b/arch/mips/kernel/scall64-n64.S @@ -109,7 +109,7 @@ illegal_syscall: j n64_syscall_exit END(handle_sys64) -#define __SYSCALL(nr, entry) PTR entry +#define __SYSCALL(nr, entry) PTR_WD entry .align 3 .type sys_call_table, @object EXPORT(sys_call_table) diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index cedc8bd88804..d3c2616cba22 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -73,10 +73,10 @@ load_a7: lw a7, 28(t0) # argument #8 from usp loads_done: .section __ex_table,"a" - PTR load_a4, bad_stack_a4 - PTR load_a5, bad_stack_a5 - PTR load_a6, bad_stack_a6 - PTR load_a7, bad_stack_a7 + PTR_WD load_a4, bad_stack_a4 + PTR_WD load_a5, bad_stack_a5 + PTR_WD load_a6, bad_stack_a6 + PTR_WD load_a7, bad_stack_a7 .previous li t1, _TIF_WORK_SYSCALL_ENTRY @@ -214,7 +214,7 @@ einval: li v0, -ENOSYS END(sys32_syscall) #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat) -#define __SYSCALL(nr, entry) PTR entry +#define __SYSCALL(nr, entry) PTR_WD entry .align 3 .type sys32_call_table,@object EXPORT(sys32_call_table) diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 5512cd586e6e..ae93a607ddf7 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -122,8 +122,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) " j 3b \n" " .previous \n" " .section __ex_table,\"a\" \n" - " "STR(PTR)" 1b, 4b \n" - " "STR(PTR)" 2b, 4b \n" + " "STR(PTR_WD)" 1b, 4b \n" + " "STR(PTR_WD)" 2b, 4b \n" " .previous \n" " .set pop \n" : [old] "=&r" (old), @@ -152,8 +152,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) " j 3b \n" " .previous \n" " .section __ex_table,\"a\" \n" - " "STR(PTR)" 1b, 5b \n" - " "STR(PTR)" 2b, 5b \n" + " "STR(PTR_WD)" 1b, 5b \n" + " "STR(PTR_WD)" 2b, 5b \n" " .previous \n" " .set pop \n" : [old] "=&r" (old), diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index a46db0807195..7767137c3e49 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -347,7 +347,7 @@ EXPORT_SYMBOL(csum_partial) .if \mode == LEGACY_MODE; \ 9: insn reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, .L_exc; \ + PTR_WD 9b, .L_exc; \ .previous; \ /* This is enabled in EVA mode */ \ .else; \ @@ -356,7 +356,7 @@ EXPORT_SYMBOL(csum_partial) ((\to == USEROP) && (type == ST_INSN)); \ 9: __BUILD_EVA_INSN(insn##e, reg, addr); \ .section __ex_table,"a"; \ - PTR 9b, .L_exc; \ + PTR_WD 9b, .L_exc; \ .previous; \ .else; \ /* EVA without exception */ \ diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index 277c32296636..18a43f2e29c8 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -116,7 +116,7 @@ .if \mode == LEGACY_MODE; \ 9: insn reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous; \ /* This is assembled in EVA mode */ \ .else; \ @@ -125,7 +125,7 @@ ((\to == USEROP) && (type == ST_INSN)); \ 9: __BUILD_EVA_INSN(insn##e, reg, addr); \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous; \ .else; \ /* \ diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index b0baa3c79fad..0b342bae9a98 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -52,7 +52,7 @@ 9: ___BUILD_EVA_INSN(insn, reg, addr); \ .endif; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous .macro f_fill64 dst, offset, val, fixup, mode diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S index 556acf684d7b..13aaa9927ad1 100644 --- a/arch/mips/lib/strncpy_user.S +++ b/arch/mips/lib/strncpy_user.S @@ -15,7 +15,7 @@ #define EX(insn,reg,addr,handler) \ 9: insn reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous /* @@ -59,7 +59,7 @@ LEAF(__strncpy_from_user_asm) jr ra .section __ex_table,"a" - PTR 1b, .Lfault + PTR_WD 1b, .Lfault .previous EXPORT_SYMBOL(__strncpy_from_user_asm) diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S index 92b63f20ec05..6de31b616f9c 100644 --- a/arch/mips/lib/strnlen_user.S +++ b/arch/mips/lib/strnlen_user.S @@ -14,7 +14,7 @@ #define EX(insn,reg,addr,handler) \ 9: insn reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous /* diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 7be27862329f..78c6a5fde1d6 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -223,6 +223,8 @@ static __always_inline void update_user_segments(u32 val) update_user_segment(15, val); } +int __init find_free_bat(void); +unsigned int bat_block_size(unsigned long base, unsigned long top); #endif /* !__ASSEMBLY__ */ /* We happily ignore the smaller BATs on 601, we don't actually use diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 609c80f67194..f8b94f78403f 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -178,6 +178,7 @@ static inline bool pte_user(pte_t pte) #ifndef __ASSEMBLY__ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 33e073d6b0c4..875730d5af40 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1082,6 +1082,8 @@ static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t p return hash__map_kernel_page(ea, pa, prot); } +void unmap_kernel_page(unsigned long va); + static inline int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 947b5b9c4424..a832aeafe560 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -111,8 +111,10 @@ static inline void __set_fixmap(enum fixed_addresses idx, BUILD_BUG_ON(idx >= __end_of_fixed_addresses); else if (WARN_ON(idx >= __end_of_fixed_addresses)) return; - - map_kernel_page(__fix_to_virt(idx), phys, flags); + if (pgprot_val(flags)) + map_kernel_page(__fix_to_virt(idx), phys, flags); + else + unmap_kernel_page(__fix_to_virt(idx)); } #define __early_set_fixmap __set_fixmap diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index a58fb4aa6c81..674e5aaafcbd 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -473,7 +473,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) return !(regs->msr & MSR_EE); } -static inline bool should_hard_irq_enable(void) +static __always_inline bool should_hard_irq_enable(void) { return false; } diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fe07558173ef..827038a33064 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -39,7 +39,6 @@ struct kvm_nested_guest { pgd_t *shadow_pgtable; /* our page table for this guest */ u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */ u64 process_table; /* process table entry for this guest */ - u64 hfscr; /* HFSCR that the L1 requested for this nested guest */ long refcnt; /* number of pointers to this struct */ struct mutex tlb_lock; /* serialize page faults and tlbies */ struct kvm_nested_guest *next; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a770443cd6e0..d9bf60bf0816 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -818,6 +818,7 @@ struct kvm_vcpu_arch { /* For support of nested guests */ struct kvm_nested_guest *nested; + u64 nested_hfscr; /* HFSCR that the L1 requested for the nested guest */ u32 nested_vcpu_id; gpa_t nested_io_gpr; #endif diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b67742e2a9b2..d959c2a73fbf 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -64,6 +64,7 @@ extern int icache_44x_need_flush; #ifndef __ASSEMBLY__ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index a3313e853e5e..2816d158280a 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -308,6 +308,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __swp_entry_to_pte(x) __pte((x).val) int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); extern int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index efad07081cc0..9675303b724e 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -500,6 +500,7 @@ #define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) #define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i)) diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 52d05b465e3e..25fc8ad9a27a 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -90,7 +90,7 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long val, mask = -1UL; unsigned int n = 6; - if (is_32bit_task()) + if (is_tsk_32bit_task(task)) mask = 0xffffffff; while (n--) { @@ -105,7 +105,7 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline int syscall_get_arch(struct task_struct *task) { - if (is_32bit_task()) + if (is_tsk_32bit_task(task)) return AUDIT_ARCH_PPC; else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) return AUDIT_ARCH_PPC64LE; diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 5725029aaa29..d6e649b3c70b 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -168,8 +168,10 @@ static inline bool test_thread_local_flags(unsigned int flags) #ifdef CONFIG_COMPAT #define is_32bit_task() (test_thread_flag(TIF_32BIT)) +#define is_tsk_32bit_task(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT)) #else #define is_32bit_task() (IS_ENABLED(CONFIG_PPC32)) +#define is_tsk_32bit_task(tsk) (IS_ENABLED(CONFIG_PPC32)) #endif #if defined(CONFIG_PPC64) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 92088f848266..7bab2d7de372 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -30,6 +30,7 @@ COMPAT_SYS_CALL_TABLE: .ifc \srr,srr mfspr r11,SPRN_SRR0 ld r12,_NIP(r1) + clrrdi r11,r11,2 clrrdi r12,r12,2 100: tdne r11,r12 EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) @@ -40,6 +41,7 @@ COMPAT_SYS_CALL_TABLE: .else mfspr r11,SPRN_HSRR0 ld r12,_NIP(r1) + clrrdi r11,r11,2 clrrdi r12,r12,2 100: tdne r11,r12 EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 62361cc7281c..cd0b8b71ecdd 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -649,8 +649,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; - if (now <= decrementer_max) - set_dec_or_work(now); + if (now > decrementer_max) + now = decrementer_max; + set_dec_or_work(now); __this_cpu_inc(irq_stat.timer_irqs_others); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index d1817cd9a691..84c89f08ae9a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1816,7 +1816,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) { - struct kvm_nested_guest *nested = vcpu->arch.nested; int r; int srcu_idx; @@ -1922,7 +1921,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) * it into a HEAI. */ if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) || - (nested->hfscr & (1UL << cause))) { + (vcpu->arch.nested_hfscr & (1UL << cause))) { vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST; /* diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 8f8daaeeb3b7..9d373f8963ee 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -363,7 +363,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* set L1 state to L2 state */ vcpu->arch.nested = l2; vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; - l2->hfscr = l2_hv.hfscr; + vcpu->arch.nested_hfscr = l2_hv.hfscr; vcpu->arch.regs = l2_regs; /* Guest must always run with ME enabled, HV disabled. */ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 94045b265b6b..203735caf691 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -76,7 +76,7 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } -static int __init find_free_bat(void) +int __init find_free_bat(void) { int b; int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; @@ -100,7 +100,7 @@ static int __init find_free_bat(void) * - block size has to be a power of two. This is calculated by finding the * highest bit set to 1. */ -static unsigned int block_size(unsigned long base, unsigned long top) +unsigned int bat_block_size(unsigned long base, unsigned long top) { unsigned int max_size = SZ_256M; unsigned int base_shift = (ffs(base) - 1) & 31; @@ -145,7 +145,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to int idx; while ((idx = find_free_bat()) != -1 && base != top) { - unsigned int size = block_size(base, top); + unsigned int size = bat_block_size(base, top); if (size < 128 << 10) break; @@ -201,12 +201,12 @@ void mmu_mark_initmem_nx(void) unsigned long size; for (i = 0; i < nb - 1 && base < top;) { - size = block_size(base, top); + size = bat_block_size(base, top); setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); base += size; } if (base < top) { - size = block_size(base, top); + size = bat_block_size(base, top); if ((top - base) > size) { size <<= 1; if (strict_kernel_rwx_enabled() && base + size > border) diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c index 35b287b0a8da..450a67ef0bbe 100644 --- a/arch/powerpc/mm/kasan/book3s_32.c +++ b/arch/powerpc/mm/kasan/book3s_32.c @@ -10,48 +10,51 @@ int __init kasan_init_region(void *start, size_t size) { unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); - unsigned long k_cur = k_start; - int k_size = k_end - k_start; - int k_size_base = 1 << (ffs(k_size) - 1); + unsigned long k_nobat = k_start; + unsigned long k_cur; + phys_addr_t phys; int ret; - void *block; - block = memblock_alloc(k_size, k_size_base); - - if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) { - int shift = ffs(k_size - k_size_base); - int k_size_more = shift ? 1 << (shift - 1) : 0; - - setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL); - if (k_size_more >= SZ_128K) - setbat(-1, k_start + k_size_base, __pa(block) + k_size_base, - k_size_more, PAGE_KERNEL); - if (v_block_mapped(k_start)) - k_cur = k_start + k_size_base; - if (v_block_mapped(k_start + k_size_base)) - k_cur = k_start + k_size_base + k_size_more; - - update_bats(); + while (k_nobat < k_end) { + unsigned int k_size = bat_block_size(k_nobat, k_end); + int idx = find_free_bat(); + + if (idx == -1) + break; + if (k_size < SZ_128K) + break; + phys = memblock_phys_alloc_range(k_size, k_size, 0, + MEMBLOCK_ALLOC_ANYWHERE); + if (!phys) + break; + + setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL); + k_nobat += k_size; } + if (k_nobat != k_start) + update_bats(); - if (!block) - block = memblock_alloc(k_size, PAGE_SIZE); - if (!block) - return -ENOMEM; + if (k_nobat < k_end) { + phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0, + MEMBLOCK_ALLOC_ANYWHERE); + if (!phys) + return -ENOMEM; + } ret = kasan_init_shadow_page_tables(k_start, k_end); if (ret) return ret; - kasan_update_early_region(k_start, k_cur, __pte(0)); + kasan_update_early_region(k_start, k_nobat, __pte(0)); - for (; k_cur < k_end; k_cur += PAGE_SIZE) { + for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_off_k(k_cur); - void *va = block + k_cur - k_start; - pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL); __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); } flush_tlb_kernel_range(k_start, k_end); + memset(kasan_mem_to_shadow(start), 0, k_end - k_start); + return 0; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index abb3198bd277..6ec5a7dd7913 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -206,6 +206,15 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, __set_pte_at(mm, addr, ptep, pte, 0); } +void unmap_kernel_page(unsigned long va) +{ + pmd_t *pmdp = pmd_off_k(va); + pte_t *ptep = pte_offset_kernel(pmdp, va); + + pte_clear(&init_mm, va, ptep); + flush_tlb_kernel_range(va, va + PAGE_SIZE); +} + /* * This is called when relaxing access to a PTE. It's also called in the page * fault path when we don't hit any of the major fault cases, ie, a minor diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index d6ffdd0f2309..56dd1f4e3e44 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -23,15 +23,15 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size) memset32(area, BREAKPOINT_INSTRUCTION, size / 4); } -/* Fix the branch target addresses for subprog calls */ -static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, u32 *addrs) +/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */ +static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) { const struct bpf_insn *insn = fp->insnsi; bool func_addr_fixed; u64 func_addr; u32 tmp_idx; - int i, ret; + int i, j, ret; for (i = 0; i < fp->len; i++) { /* @@ -66,6 +66,23 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, * of the JITed sequence remains unchanged. */ ctx->idx = tmp_idx; + } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) { + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; +#ifdef CONFIG_PPC32 + PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm); + PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm); + for (j = ctx->idx - addrs[i] / 4; j < 4; j++) + EMIT(PPC_RAW_NOP()); +#else + func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32); + PPC_LI64(b2p[insn[i].dst_reg], func_addr); + /* overwrite rest with nops */ + for (j = ctx->idx - addrs[i] / 4; j < 5; j++) + EMIT(PPC_RAW_NOP()); +#endif + ctx->idx = tmp_idx; + i++; } } @@ -200,13 +217,13 @@ skip_init_ctx: /* * Do not touch the prologue and epilogue as they will remain * unchanged. Only fix the branch target address for subprog - * calls in the body. + * calls in the body, and ldimm64 instructions. * * This does not change the offsets and lengths of the subprog * call instruction sequences and hence, the size of the JITed * image as well. */ - bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs); /* There is no need to perform the usual passes. */ goto skip_codegen_passes; diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index faaebd446cad..cf8dd8aea386 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -191,6 +191,9 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun if (image && rel < 0x2000000 && rel >= -0x2000000) { PPC_BL_ABS(func); + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_NOP()); } else { /* Load function address into r0 */ EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); @@ -290,6 +293,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * bool func_addr_fixed; u64 func_addr; u32 true_cond; + u32 tmp_idx; + int j; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -905,8 +910,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * 16 byte instruction that uses two 'struct bpf_insn' */ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ + tmp_idx = ctx->idx; PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); PPC_LI32(dst_reg, (u32)insn[i].imm); + /* padding to allow full 4 instructions for later patching */ + for (j = ctx->idx - tmp_idx; j < 4; j++) + EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; break; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 9eae8d8ed340..e1e8c934308a 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -319,6 +319,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u64 imm64; u32 true_cond; u32 tmp_idx; + int j; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -633,17 +634,21 @@ bpf_alu32_trunc: EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); break; case 64: - /* - * Way easier and faster(?) to store the value - * into stack and then use ldbrx - * - * ctx->seen will be reliable in pass2, but - * the instructions generated will remain the - * same across all passes - */ + /* Store the value to stack and then use byte-reverse loads */ PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); - EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + } else { + EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1])); + if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32)); + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4)); + EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1])); + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2])); + } break; } break; @@ -848,9 +853,13 @@ emit_clear: case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ imm64 = ((u64)(u32) insn[i].imm) | (((u64)(u32) insn[i+1].imm) << 32); + tmp_idx = ctx->idx; + PPC_LI64(dst_reg, imm64); + /* padding to allow full 5 instructions for later patching */ + for (j = ctx->idx - tmp_idx; j < 5; j++) + EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; - PPC_LI64(dst_reg, imm64); break; /* diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index a684901b6965..b5b42cf0a703 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -776,6 +776,34 @@ static void pmao_restore_workaround(bool ebb) mtspr(SPRN_PMC6, pmcs[5]); } +/* + * If the perf subsystem wants performance monitor interrupts as soon as + * possible (e.g., to sample the instruction address and stack chain), + * this should return true. The IRQ masking code can then enable MSR[EE] + * in some places (e.g., interrupt handlers) that allows PMI interrupts + * through to improve accuracy of profiles, at the cost of some performance. + * + * The PMU counters can be enabled by other means (e.g., sysfs raw SPR + * access), but in that case there is no need for prompt PMI handling. + * + * This currently returns true if any perf counter is being used. It + * could possibly return false if only events are being counted rather than + * samples being taken, but for now this is good enough. + */ +bool power_pmu_wants_prompt_pmi(void) +{ + struct cpu_hw_events *cpuhw; + + /* + * This could simply test local_paca->pmcregs_in_use if that were not + * under ifdef KVM. + */ + if (!ppmu) + return false; + + cpuhw = this_cpu_ptr(&cpu_hw_events); + return cpuhw->n_events; +} #endif /* CONFIG_PPC64 */ static void perf_event_interrupt(struct pt_regs *regs); @@ -1327,9 +1355,20 @@ static void power_pmu_disable(struct pmu *pmu) * Otherwise provide a warning if there is PMI pending, but * no counter is found overflown. */ - if (any_pmc_overflown(cpuhw)) - clear_pmi_irq_pending(); - else + if (any_pmc_overflown(cpuhw)) { + /* + * Since power_pmu_disable runs under local_irq_save, it + * could happen that code hits a PMC overflow without PMI + * pending in paca. Hence only clear PMI pending if it was + * set. + * + * If a PMI is pending, then MSR[EE] must be disabled (because + * the masked PMI handler disabling EE). So it is safe to + * call clear_pmi_irq_pending(). + */ + if (pmi_irq_pending()) + clear_pmi_irq_pending(); + } else WARN_ON(pmi_irq_pending()); val = mmcra = cpuhw->mmcr.mmcra; @@ -2438,36 +2477,6 @@ static void perf_event_interrupt(struct pt_regs *regs) perf_sample_event_took(sched_clock() - start_clock); } -/* - * If the perf subsystem wants performance monitor interrupts as soon as - * possible (e.g., to sample the instruction address and stack chain), - * this should return true. The IRQ masking code can then enable MSR[EE] - * in some places (e.g., interrupt handlers) that allows PMI interrupts - * though to improve accuracy of profiles, at the cost of some performance. - * - * The PMU counters can be enabled by other means (e.g., sysfs raw SPR - * access), but in that case there is no need for prompt PMI handling. - * - * This currently returns true if any perf counter is being used. It - * could possibly return false if only events are being counted rather than - * samples being taken, but for now this is good enough. - */ -bool power_pmu_wants_prompt_pmi(void) -{ - struct cpu_hw_events *cpuhw; - - /* - * This could simply test local_paca->pmcregs_in_use if that were not - * under ifdef KVM. - */ - - if (!ppmu) - return false; - - cpuhw = this_cpu_ptr(&cpu_hw_events); - return cpuhw->n_events; -} - static int power_pmu_prepare_cpu(unsigned int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9750f92380f5..be9f39fd06df 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -945,6 +945,9 @@ config S390_GUEST endmenu +config S390_MODULES_SANITY_TEST_HELPERS + def_bool n + menu "Selftests" config S390_UNWIND_SELFTEST @@ -971,4 +974,16 @@ config S390_KPROBES_SANITY_TEST Say N if you are unsure. +config S390_MODULES_SANITY_TEST + def_tristate n + depends on KUNIT + default KUNIT_ALL_TESTS + prompt "Enable s390 specific modules tests" + select S390_MODULES_SANITY_TEST_HELPERS + help + This option enables an s390 specific modules test. This option is + not useful for distributions or general kernels, but only for + kernel developers working on architecture code. + + Say N if you are unsure. endmenu diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 7fe8975b49ec..498bed9b261b 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -63,6 +63,7 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m +CONFIG_S390_MODULES_SANITY_TEST=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y @@ -96,7 +97,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_FRONTSWAP=y CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y @@ -109,6 +109,7 @@ CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y CONFIG_GUP_TEST=y +CONFIG_ANON_VMA_NAME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -116,7 +117,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -185,7 +185,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m @@ -391,6 +390,7 @@ CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m +CONFIG_NET_SWITCHDEV=y CONFIG_CGROUP_NET_PRIO=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y @@ -400,6 +400,7 @@ CONFIG_PCI_IOV=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_SAFE=y CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m @@ -501,6 +502,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_ENGLEDER is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set @@ -511,7 +513,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -542,6 +543,7 @@ CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_TEHUTI is not set # CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VERTEXCOM is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set @@ -592,6 +594,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -756,9 +759,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y -CONFIG_CRYPTO_LIB_BLAKE2S=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -774,6 +774,8 @@ CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_CRC32_SELFTEST=y CONFIG_CRC4=m CONFIG_CRC7=m @@ -807,7 +809,6 @@ CONFIG_SLUB_DEBUG_ON=y CONFIG_SLUB_STATS=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y -CONFIG_DEBUG_VM_VMACACHE=y CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m @@ -819,12 +820,11 @@ CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y CONFIG_TEST_LOCKUP=m -CONFIG_DEBUG_TIMEKEEPING=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y -CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y +CONFIG_DEBUG_IRQFLAGS=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 466780c465f5..61e36b999f67 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -61,6 +61,7 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m +CONFIG_S390_MODULES_SANITY_TEST=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y # CONFIG_GCC_PLUGINS is not set @@ -91,7 +92,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_FRONTSWAP=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y @@ -101,6 +101,7 @@ CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y +CONFIG_ANON_VMA_NAME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -108,7 +109,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -177,7 +177,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m @@ -382,6 +381,7 @@ CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m +CONFIG_NET_SWITCHDEV=y CONFIG_CGROUP_NET_PRIO=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y @@ -391,6 +391,7 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_SAFE=y CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m @@ -492,6 +493,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_ENGLEDER is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set @@ -502,7 +504,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -533,6 +534,7 @@ CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_TEHUTI is not set # CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VERTEXCOM is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set @@ -582,6 +584,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -743,9 +746,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y -CONFIG_CRYPTO_LIB_BLAKE2S=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -762,6 +762,8 @@ CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_CRC4=m CONFIG_CRC7=m CONFIG_CRC8=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index eed3b9acfa71..c55c668dc3c7 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,6 +1,7 @@ # CONFIG_SWAP is not set CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_SYSCALL=y # CONFIG_CPU_ISOLATION is not set # CONFIG_UTS_NS is not set # CONFIG_TIME_NS is not set @@ -34,6 +35,7 @@ CONFIG_NET=y # CONFIG_PCPU_DEV_REFCNT is not set # CONFIG_ETHTOOL_NETLINK is not set CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_SAFE=y CONFIG_BLK_DEV_RAM=y # CONFIG_DCSSBLK is not set # CONFIG_DASD is not set @@ -58,6 +60,7 @@ CONFIG_ZFCP=y # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set +# CONFIG_SURFACE_PLATFORMS is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 33f973ff9744..e8f15dbb89d0 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -20,6 +20,7 @@ static char local_guest[] = " "; static char all_guests[] = "* "; +static char *all_groups = all_guests; static char *guest_query; struct diag2fc_data { @@ -62,10 +63,11 @@ static int diag2fc(int size, char* query, void *addr) memcpy(parm_list.userid, query, NAME_LEN); ASCEBC(parm_list.userid, NAME_LEN); - parm_list.addr = (unsigned long) addr ; + memcpy(parm_list.aci_grp, all_groups, NAME_LEN); + ASCEBC(parm_list.aci_grp, NAME_LEN); + parm_list.addr = (unsigned long)addr; parm_list.size = size; parm_list.fmt = 0x02; - memset(parm_list.aci_grp, 0x40, NAME_LEN); rc = -1; diag_stat_inc(DIAG_STAT_X2FC); diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 147cb3534ce4..d74e26b48604 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -47,8 +47,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); int __put_user_bad(void) __attribute__((noreturn)); int __get_user_bad(void) __attribute__((noreturn)); -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - union oac { unsigned int val; struct { @@ -71,6 +69,8 @@ union oac { }; }; +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + #define __put_get_user_asm(to, from, size, oac_spec) \ ({ \ int __rc; \ diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index d52d85367bf7..b032e556eeb7 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -33,7 +33,7 @@ #define DEBUGP(fmt , ...) #endif -#define PLT_ENTRY_SIZE 20 +#define PLT_ENTRY_SIZE 22 void *module_alloc(unsigned long size) { @@ -341,27 +341,26 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { - unsigned int insn[5]; - unsigned int *ip = me->core_layout.base + - me->arch.plt_offset + - info->plt_offset; - - insn[0] = 0x0d10e310; /* basr 1,0 */ - insn[1] = 0x100a0004; /* lg 1,10(1) */ + unsigned char insn[PLT_ENTRY_SIZE]; + char *plt_base; + char *ip; + + plt_base = me->core_layout.base + me->arch.plt_offset; + ip = plt_base + info->plt_offset; + *(int *)insn = 0x0d10e310; /* basr 1,0 */ + *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) { - unsigned int *ij; - ij = me->core_layout.base + - me->arch.plt_offset + - me->arch.plt_size - PLT_ENTRY_SIZE; - insn[2] = 0xa7f40000 + /* j __jump_r1 */ - (unsigned int)(u16) - (((unsigned long) ij - 8 - - (unsigned long) ip) / 2); + char *jump_r1; + + jump_r1 = plt_base + me->arch.plt_size - + PLT_ENTRY_SIZE; + /* brcl 0xf,__jump_r1 */ + *(short *)&insn[8] = 0xc0f4; + *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2; } else { - insn[2] = 0x07f10000; /* br %r1 */ + *(int *)&insn[8] = 0x07f10000; /* br %r1 */ } - insn[3] = (unsigned int) (val >> 32); - insn[4] = (unsigned int) val; + *(long *)&insn[14] = val; write(ip, insn, sizeof(insn)); info->plt_initialized = 1; diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 0c9e894913dc..651a51914e34 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -264,7 +264,14 @@ static int notrace s390_validate_registers(union mci mci, int umode) /* Validate vector registers */ union ctlreg0 cr0; - if (!mci.vr) { + /* + * The vector validity must only be checked if not running a + * KVM guest. For KVM guests the machine check is forwarded by + * KVM and it is the responsibility of the guest to take + * appropriate actions. The host vector or FPU values have been + * saved by KVM and will be restored by KVM. + */ + if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) { /* * Vector registers can't be restored. If the kernel * currently uses vector registers the system is @@ -307,11 +314,21 @@ static int notrace s390_validate_registers(union mci mci, int umode) if (cr2.gse) { if (!mci.gs) { /* - * Guarded storage register can't be restored and - * the current processes uses guarded storage. - * It has to be terminated. + * 2 cases: + * - machine check in kernel or userspace + * - machine check while running SIE (KVM guest) + * For kernel or userspace the userspace values of + * guarded storage control can not be recreated, the + * process must be terminated. + * For SIE the guest values of guarded storage can not + * be recreated. This is either due to a bug or due to + * GS being disabled in the guest. The guest will be + * notified by KVM code and the guests machine check + * handling must take care of this. The host values + * are saved by KVM and are not affected. */ - kill_task = 1; + if (!test_cpu_flag(CIF_MCCK_GUEST)) + kill_task = 1; } else { load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area); } diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 707cd4622c13..69feb8ed3312 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -17,4 +17,7 @@ KASAN_SANITIZE_uaccess.o := n obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o CFLAGS_test_unwind.o += -fno-optimize-sibling-calls +obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o +obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o + lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c new file mode 100644 index 000000000000..d056baa8fbb0 --- /dev/null +++ b/arch/s390/lib/test_modules.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <kunit/test.h> +#include <linux/module.h> + +#include "test_modules.h" + +#define DECLARE_RETURN(i) int test_modules_return_ ## i(void) +REPEAT_10000(DECLARE_RETURN); + +/* + * Test that modules with many relocations are loaded properly. + */ +static void test_modules_many_vmlinux_relocs(struct kunit *test) +{ + int result = 0; + +#define CALL_RETURN(i) result += test_modules_return_ ## i() + REPEAT_10000(CALL_RETURN); + KUNIT_ASSERT_EQ(test, result, 49995000); +} + +static struct kunit_case modules_testcases[] = { + KUNIT_CASE(test_modules_many_vmlinux_relocs), + {} +}; + +static struct kunit_suite modules_test_suite = { + .name = "modules_test_s390", + .test_cases = modules_testcases, +}; + +kunit_test_suites(&modules_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/arch/s390/lib/test_modules.h b/arch/s390/lib/test_modules.h new file mode 100644 index 000000000000..43b5e4b4af3e --- /dev/null +++ b/arch/s390/lib/test_modules.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef TEST_MODULES_H +#define TEST_MODULES_H + +#define __REPEAT_10000_3(f, x) \ + f(x ## 0); \ + f(x ## 1); \ + f(x ## 2); \ + f(x ## 3); \ + f(x ## 4); \ + f(x ## 5); \ + f(x ## 6); \ + f(x ## 7); \ + f(x ## 8); \ + f(x ## 9) +#define __REPEAT_10000_2(f, x) \ + __REPEAT_10000_3(f, x ## 0); \ + __REPEAT_10000_3(f, x ## 1); \ + __REPEAT_10000_3(f, x ## 2); \ + __REPEAT_10000_3(f, x ## 3); \ + __REPEAT_10000_3(f, x ## 4); \ + __REPEAT_10000_3(f, x ## 5); \ + __REPEAT_10000_3(f, x ## 6); \ + __REPEAT_10000_3(f, x ## 7); \ + __REPEAT_10000_3(f, x ## 8); \ + __REPEAT_10000_3(f, x ## 9) +#define __REPEAT_10000_1(f, x) \ + __REPEAT_10000_2(f, x ## 0); \ + __REPEAT_10000_2(f, x ## 1); \ + __REPEAT_10000_2(f, x ## 2); \ + __REPEAT_10000_2(f, x ## 3); \ + __REPEAT_10000_2(f, x ## 4); \ + __REPEAT_10000_2(f, x ## 5); \ + __REPEAT_10000_2(f, x ## 6); \ + __REPEAT_10000_2(f, x ## 7); \ + __REPEAT_10000_2(f, x ## 8); \ + __REPEAT_10000_2(f, x ## 9) +#define REPEAT_10000(f) \ + __REPEAT_10000_1(f, 0); \ + __REPEAT_10000_1(f, 1); \ + __REPEAT_10000_1(f, 2); \ + __REPEAT_10000_1(f, 3); \ + __REPEAT_10000_1(f, 4); \ + __REPEAT_10000_1(f, 5); \ + __REPEAT_10000_1(f, 6); \ + __REPEAT_10000_1(f, 7); \ + __REPEAT_10000_1(f, 8); \ + __REPEAT_10000_1(f, 9) + +#endif diff --git a/arch/s390/lib/test_modules_helpers.c b/arch/s390/lib/test_modules_helpers.c new file mode 100644 index 000000000000..1670349a03eb --- /dev/null +++ b/arch/s390/lib/test_modules_helpers.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/export.h> + +#include "test_modules.h" + +#define DEFINE_RETURN(i) \ + int test_modules_return_ ## i(void) \ + { \ + return 1 ## i - 10000; \ + } \ + EXPORT_SYMBOL_GPL(test_modules_return_ ## i) +REPEAT_10000(DEFINE_RETURN); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ebe8fc76949a..9f5bd41bf660 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -186,6 +186,7 @@ config X86 select HAVE_CONTEXT_TRACKING_OFFSTACK if HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT select HAVE_OBJTOOL_MCOUNT if STACK_VALIDATION + select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fd9f908debe5..c91434056c29 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6236,6 +6236,19 @@ __init int intel_pmu_init(void) pmu->num_counters = x86_pmu.num_counters; pmu->num_counters_fixed = x86_pmu.num_counters_fixed; } + + /* + * Quirk: For some Alder Lake machine, when all E-cores are disabled in + * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However, + * the X86_FEATURE_HYBRID_CPU is still set. The above codes will + * mistakenly add extra counters for P-cores. Correct the number of + * counters here. + */ + if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) { + pmu->num_counters = x86_pmu.num_counters; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + } + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); pmu->unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, @@ -6340,6 +6353,8 @@ __init int intel_pmu_init(void) } if (x86_pmu.lbr_nr) { + intel_pmu_lbr_init(); + pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); /* only support branch_stack snapshot for perfmon >= v2 */ diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 8043213b75a5..669c2be14784 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -8,14 +8,6 @@ #include "../perf_event.h" -static const enum { - LBR_EIP_FLAGS = 1, - LBR_TSX = 2, -} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = { - [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS, - [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX, -}; - /* * Intel LBR_SELECT bits * Intel Vol3a, April 2011, Section 16.7 Table 16-10 @@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void) for (i = 0; i < x86_pmu.lbr_nr; i++) { wrmsrl(x86_pmu.lbr_from + i, 0); wrmsrl(x86_pmu.lbr_to + i, 0); - if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) + if (x86_pmu.lbr_has_info) wrmsrl(x86_pmu.lbr_info + i, 0); } } @@ -305,11 +297,10 @@ enum { */ static inline bool lbr_from_signext_quirk_needed(void) { - int lbr_format = x86_pmu.intel_cap.lbr_format; bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM); - return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX); + return !tsx_support && x86_pmu.lbr_has_tsx; } static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); @@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) void intel_pmu_lbr_restore(void *ctx) { - bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx = ctx; - int i; - unsigned lbr_idx, mask; + bool need_info = x86_pmu.lbr_has_info; u64 tos = task_ctx->tos; + unsigned lbr_idx, mask; + int i; mask = x86_pmu.lbr_nr - 1; for (i = 0; i < task_ctx->valid_lbrs; i++) { @@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx) lbr_idx = (tos - i) & mask; wrlbr_from(lbr_idx, 0); wrlbr_to(lbr_idx, 0); - if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) + if (need_info) wrlbr_info(lbr_idx, 0); } @@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx) void intel_pmu_lbr_save(void *ctx) { - bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx = ctx; + bool need_info = x86_pmu.lbr_has_info; unsigned lbr_idx, mask; u64 tos; int i; @@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) { bool need_info = false, call_stack = false; unsigned long mask = x86_pmu.lbr_nr - 1; - int lbr_format = x86_pmu.intel_cap.lbr_format; u64 tos = intel_pmu_lbr_tos(); int i; int out = 0; @@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) for (i = 0; i < num; i++) { unsigned long lbr_idx = (tos - i) & mask; u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; - int skip = 0; u16 cycles = 0; - int lbr_flags = lbr_desc[lbr_format]; from = rdlbr_from(lbr_idx, NULL); to = rdlbr_to(lbr_idx, NULL); @@ -845,37 +833,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) if (call_stack && !from) break; - if (lbr_format == LBR_FORMAT_INFO && need_info) { - u64 info; - - info = rdlbr_info(lbr_idx, NULL); - mis = !!(info & LBR_INFO_MISPRED); - pred = !mis; - in_tx = !!(info & LBR_INFO_IN_TX); - abort = !!(info & LBR_INFO_ABORT); - cycles = (info & LBR_INFO_CYCLES); - } - - if (lbr_format == LBR_FORMAT_TIME) { - mis = !!(from & LBR_FROM_FLAG_MISPRED); - pred = !mis; - skip = 1; - cycles = ((to >> 48) & LBR_INFO_CYCLES); - - to = (u64)((((s64)to) << 16) >> 16); - } - - if (lbr_flags & LBR_EIP_FLAGS) { - mis = !!(from & LBR_FROM_FLAG_MISPRED); - pred = !mis; - skip = 1; - } - if (lbr_flags & LBR_TSX) { - in_tx = !!(from & LBR_FROM_FLAG_IN_TX); - abort = !!(from & LBR_FROM_FLAG_ABORT); - skip = 3; + if (x86_pmu.lbr_has_info) { + if (need_info) { + u64 info; + + info = rdlbr_info(lbr_idx, NULL); + mis = !!(info & LBR_INFO_MISPRED); + pred = !mis; + cycles = (info & LBR_INFO_CYCLES); + if (x86_pmu.lbr_has_tsx) { + in_tx = !!(info & LBR_INFO_IN_TX); + abort = !!(info & LBR_INFO_ABORT); + } + } + } else { + int skip = 0; + + if (x86_pmu.lbr_from_flags) { + mis = !!(from & LBR_FROM_FLAG_MISPRED); + pred = !mis; + skip = 1; + } + if (x86_pmu.lbr_has_tsx) { + in_tx = !!(from & LBR_FROM_FLAG_IN_TX); + abort = !!(from & LBR_FROM_FLAG_ABORT); + skip = 3; + } + from = (u64)((((s64)from) << skip) >> skip); + + if (x86_pmu.lbr_to_cycles) { + cycles = ((to >> 48) & LBR_INFO_CYCLES); + to = (u64)((((s64)to) << 16) >> 16); + } } - from = (u64)((((s64)from) << skip) >> skip); /* * Some CPUs report duplicated abort records, @@ -903,37 +893,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) cpuc->lbr_stack.hw_idx = tos; } +static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred); +static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles); +static DEFINE_STATIC_KEY_FALSE(x86_lbr_type); + static __always_inline int get_lbr_br_type(u64 info) { - if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type) - return 0; + int type = 0; - return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET; + if (static_branch_likely(&x86_lbr_type)) + type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET; + + return type; } static __always_inline bool get_lbr_mispred(u64 info) { - if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred) - return 0; + bool mispred = 0; - return !!(info & LBR_INFO_MISPRED); -} + if (static_branch_likely(&x86_lbr_mispred)) + mispred = !!(info & LBR_INFO_MISPRED); -static __always_inline bool get_lbr_predicted(u64 info) -{ - if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred) - return 0; - - return !(info & LBR_INFO_MISPRED); + return mispred; } static __always_inline u16 get_lbr_cycles(u64 info) { + u16 cycles = info & LBR_INFO_CYCLES; + if (static_cpu_has(X86_FEATURE_ARCH_LBR) && - !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID)) - return 0; + (!static_branch_likely(&x86_lbr_cycles) || + !(info & LBR_INFO_CYC_CNT_VALID))) + cycles = 0; - return info & LBR_INFO_CYCLES; + return cycles; } static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, @@ -961,7 +954,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, e->from = from; e->to = to; e->mispred = get_lbr_mispred(info); - e->predicted = get_lbr_predicted(info); + e->predicted = !e->mispred; e->in_tx = !!(info & LBR_INFO_IN_TX); e->abort = !!(info & LBR_INFO_ABORT); e->cycles = get_lbr_cycles(info); @@ -1120,7 +1113,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && - (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)) + x86_pmu.lbr_has_info) reg->config |= LBR_NO_INFO; return 0; @@ -1706,6 +1699,38 @@ void intel_pmu_lbr_init_knl(void) x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; } +void intel_pmu_lbr_init(void) +{ + switch (x86_pmu.intel_cap.lbr_format) { + case LBR_FORMAT_EIP_FLAGS2: + x86_pmu.lbr_has_tsx = 1; + fallthrough; + case LBR_FORMAT_EIP_FLAGS: + x86_pmu.lbr_from_flags = 1; + break; + + case LBR_FORMAT_INFO: + x86_pmu.lbr_has_tsx = 1; + fallthrough; + case LBR_FORMAT_INFO2: + x86_pmu.lbr_has_info = 1; + break; + + case LBR_FORMAT_TIME: + x86_pmu.lbr_from_flags = 1; + x86_pmu.lbr_to_cycles = 1; + break; + } + + if (x86_pmu.lbr_has_info) { + /* + * Only used in combination with baseline pebs. + */ + static_branch_enable(&x86_lbr_mispred); + static_branch_enable(&x86_lbr_cycles); + } +} + /* * LBR state size is variable based on the max number of registers. * This calculates the expected state size, which should match @@ -1726,6 +1751,9 @@ static bool is_arch_lbr_xsave_available(void) * Check the LBR state with the corresponding software structure. * Disable LBR XSAVES support if the size doesn't match. */ + if (xfeature_size(XFEATURE_LBR) == 0) + return false; + if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size())) return false; @@ -1765,6 +1793,12 @@ void __init intel_pmu_arch_lbr_init(void) x86_pmu.lbr_br_type = ecx.split.lbr_br_type; x86_pmu.lbr_nr = lbr_nr; + if (x86_pmu.lbr_mispred) + static_branch_enable(&x86_lbr_mispred); + if (x86_pmu.lbr_timed_lbr) + static_branch_enable(&x86_lbr_cycles); + if (x86_pmu.lbr_br_type) + static_branch_enable(&x86_lbr_type); arch_lbr_xsave = is_arch_lbr_xsave_available(); if (arch_lbr_xsave) { diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index f1ba6ab2e97e..e497da9bf427 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1762,7 +1762,7 @@ static const struct intel_uncore_init_fun rkl_uncore_init __initconst = { static const struct intel_uncore_init_fun adl_uncore_init __initconst = { .cpu_init = adl_uncore_cpu_init, - .mmio_init = tgl_uncore_mmio_init, + .mmio_init = adl_uncore_mmio_init, }; static const struct intel_uncore_init_fun icx_uncore_init __initconst = { diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index b9687980aab6..2adeaf4de4df 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -584,10 +584,11 @@ void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); void skl_uncore_cpu_init(void); void icl_uncore_cpu_init(void); -void adl_uncore_cpu_init(void); void tgl_uncore_cpu_init(void); +void adl_uncore_cpu_init(void); void tgl_uncore_mmio_init(void); void tgl_l_uncore_mmio_init(void); +void adl_uncore_mmio_init(void); int snb_pci2phy_map_init(int devid); /* uncore_snbep.c */ diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 3049c646fa20..6ddadb482f68 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -494,8 +494,8 @@ void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box) writel(0, box->io_addr); } -static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, - struct perf_event *event) +void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h index 6d735611c281..cfaf558bdb6b 100644 --- a/arch/x86/events/intel/uncore_discovery.h +++ b/arch/x86/events/intel/uncore_discovery.h @@ -139,6 +139,8 @@ void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box); void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box); void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, struct perf_event *event); +void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event); void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box); void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box); diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 0f63706cdadf..f698a55bde81 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */ #include "uncore.h" +#include "uncore_discovery.h" /* Uncore IMC PCI IDs */ #define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 @@ -64,6 +65,20 @@ #define PCI_DEVICE_ID_INTEL_RKL_2_IMC 0x4c53 #define PCI_DEVICE_ID_INTEL_ADL_1_IMC 0x4660 #define PCI_DEVICE_ID_INTEL_ADL_2_IMC 0x4641 +#define PCI_DEVICE_ID_INTEL_ADL_3_IMC 0x4601 +#define PCI_DEVICE_ID_INTEL_ADL_4_IMC 0x4602 +#define PCI_DEVICE_ID_INTEL_ADL_5_IMC 0x4609 +#define PCI_DEVICE_ID_INTEL_ADL_6_IMC 0x460a +#define PCI_DEVICE_ID_INTEL_ADL_7_IMC 0x4621 +#define PCI_DEVICE_ID_INTEL_ADL_8_IMC 0x4623 +#define PCI_DEVICE_ID_INTEL_ADL_9_IMC 0x4629 +#define PCI_DEVICE_ID_INTEL_ADL_10_IMC 0x4637 +#define PCI_DEVICE_ID_INTEL_ADL_11_IMC 0x463b +#define PCI_DEVICE_ID_INTEL_ADL_12_IMC 0x4648 +#define PCI_DEVICE_ID_INTEL_ADL_13_IMC 0x4649 +#define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650 +#define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668 +#define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -155,6 +170,7 @@ DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); @@ -1334,6 +1350,62 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_2_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_4_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_5_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_6_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_7_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_8_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_9_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_10_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_11_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_12_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_13_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_14_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_15_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ } }; @@ -1390,7 +1462,8 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void) #define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000 #define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000 -static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +static void __uncore_imc_init_box(struct intel_uncore_box *box, + unsigned int base_offset) { struct pci_dev *pdev = tgl_uncore_get_mc_dev(); struct intel_uncore_pmu *pmu = box->pmu; @@ -1417,11 +1490,17 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) addr |= ((resource_size_t)mch_bar << 32); #endif + addr += base_offset; box->io_addr = ioremap(addr, type->mmio_map_size); if (!box->io_addr) pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); } +static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + __uncore_imc_init_box(box, 0); +} + static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = { .init_box = tgl_uncore_imc_freerunning_init_box, .exit_box = uncore_mmio_exit_box, @@ -1469,3 +1548,136 @@ void tgl_uncore_mmio_init(void) } /* end of Tiger Lake MMIO uncore support */ + +/* Alder Lake MMIO uncore support */ +#define ADL_UNCORE_IMC_BASE 0xd900 +#define ADL_UNCORE_IMC_MAP_SIZE 0x200 +#define ADL_UNCORE_IMC_CTR 0xe8 +#define ADL_UNCORE_IMC_CTRL 0xd0 +#define ADL_UNCORE_IMC_GLOBAL_CTL 0xc0 +#define ADL_UNCORE_IMC_BOX_CTL 0xc4 +#define ADL_UNCORE_IMC_FREERUNNING_BASE 0xd800 +#define ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE 0x100 + +#define ADL_UNCORE_IMC_CTL_FRZ (1 << 0) +#define ADL_UNCORE_IMC_CTL_RST_CTRL (1 << 1) +#define ADL_UNCORE_IMC_CTL_RST_CTRS (1 << 2) +#define ADL_UNCORE_IMC_CTL_INT (ADL_UNCORE_IMC_CTL_RST_CTRL | \ + ADL_UNCORE_IMC_CTL_RST_CTRS) + +static void adl_uncore_imc_init_box(struct intel_uncore_box *box) +{ + __uncore_imc_init_box(box, ADL_UNCORE_IMC_BASE); + + /* The global control in MC1 can control both MCs. */ + if (box->io_addr && (box->pmu->pmu_idx == 1)) + writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + ADL_UNCORE_IMC_GLOBAL_CTL); +} + +static void adl_uncore_mmio_disable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(ADL_UNCORE_IMC_CTL_FRZ, box->io_addr + uncore_mmio_box_ctl(box)); +} + +static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(0, box->io_addr + uncore_mmio_box_ctl(box)); +} + +static struct intel_uncore_ops adl_uncore_mmio_ops = { + .init_box = adl_uncore_imc_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = adl_uncore_mmio_disable_box, + .enable_box = adl_uncore_mmio_enable_box, + .disable_event = intel_generic_uncore_mmio_disable_event, + .enable_event = intel_generic_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +#define ADL_UNC_CTL_CHMASK_MASK 0x00000f00 +#define ADL_UNC_IMC_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + ADL_UNC_CTL_CHMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET) + +static struct attribute *adl_uncore_imc_formats_attr[] = { + &format_attr_event.attr, + &format_attr_chmask.attr, + &format_attr_edge.attr, + NULL, +}; + +static const struct attribute_group adl_uncore_imc_format_group = { + .name = "format", + .attrs = adl_uncore_imc_formats_attr, +}; + +static struct intel_uncore_type adl_uncore_imc = { + .name = "imc", + .num_counters = 5, + .num_boxes = 2, + .perf_ctr_bits = 64, + .perf_ctr = ADL_UNCORE_IMC_CTR, + .event_ctl = ADL_UNCORE_IMC_CTRL, + .event_mask = ADL_UNC_IMC_EVENT_MASK, + .box_ctl = ADL_UNCORE_IMC_BOX_CTL, + .mmio_offset = 0, + .mmio_map_size = ADL_UNCORE_IMC_MAP_SIZE, + .ops = &adl_uncore_mmio_ops, + .format_group = &adl_uncore_imc_format_group, +}; + +enum perf_adl_uncore_imc_freerunning_types { + ADL_MMIO_UNCORE_IMC_DATA_TOTAL, + ADL_MMIO_UNCORE_IMC_DATA_READ, + ADL_MMIO_UNCORE_IMC_DATA_WRITE, + ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX +}; + +static struct freerunning_counters adl_uncore_imc_freerunning[] = { + [ADL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x40, 0x0, 0x0, 1, 64 }, + [ADL_MMIO_UNCORE_IMC_DATA_READ] = { 0x58, 0x0, 0x0, 1, 64 }, + [ADL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xA0, 0x0, 0x0, 1, 64 }, +}; + +static void adl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + __uncore_imc_init_box(box, ADL_UNCORE_IMC_FREERUNNING_BASE); +} + +static struct intel_uncore_ops adl_uncore_imc_freerunning_ops = { + .init_box = adl_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type adl_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .num_boxes = 2, + .num_freerunning_types = ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX, + .mmio_map_size = ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE, + .freerunning = adl_uncore_imc_freerunning, + .ops = &adl_uncore_imc_freerunning_ops, + .event_descs = tgl_uncore_imc_events, + .format_group = &tgl_uncore_imc_format_group, +}; + +static struct intel_uncore_type *adl_mmio_uncores[] = { + &adl_uncore_imc, + &adl_uncore_imc_free_running, + NULL +}; + +void adl_uncore_mmio_init(void) +{ + uncore_mmio_uncores = adl_mmio_uncores; +} + +/* end of Alder Lake MMIO uncore support */ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 3660f698fb2a..ed869443efb2 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -5482,7 +5482,7 @@ static struct intel_uncore_type icx_uncore_imc = { .fixed_ctr_bits = 48, .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, - .event_descs = hswep_uncore_imc_events, + .event_descs = snr_uncore_imc_events, .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, .event_ctl = SNR_IMC_MMIO_PMON_CTL0, .event_mask = SNBEP_PMON_RAW_EVENT_MASK, diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 9d376e528dfc..150261d929b9 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -215,7 +215,8 @@ enum { LBR_FORMAT_EIP_FLAGS2 = 0x04, LBR_FORMAT_INFO = 0x05, LBR_FORMAT_TIME = 0x06, - LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME, + LBR_FORMAT_INFO2 = 0x07, + LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2, }; enum { @@ -840,6 +841,11 @@ struct x86_pmu { bool lbr_double_abort; /* duplicated lbr aborts */ bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ + unsigned int lbr_has_info:1; + unsigned int lbr_has_tsx:1; + unsigned int lbr_from_flags:1; + unsigned int lbr_to_cycles:1; + /* * Intel Architectural LBR CPUID Enumeration */ @@ -1392,6 +1398,8 @@ void intel_pmu_lbr_init_skl(void); void intel_pmu_lbr_init_knl(void); +void intel_pmu_lbr_init(void); + void intel_pmu_arch_lbr_init(void); void intel_pmu_pebs_data_source_nhm(void); diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 85feafacc445..77e3a47af5ad 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -536,11 +536,14 @@ static struct perf_msr intel_rapl_spr_msrs[] = { * - perf_msr_probe(PERF_RAPL_MAX) * - want to use same event codes across both architectures */ -static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = { - [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, +static struct perf_msr amd_rapl_msrs[] = { + [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, 0, false, 0 }, + [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, + [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, 0, false, 0 }, + [PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, 0, false, 0 }, + [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, 0, false, 0 }, }; - static int rapl_cpu_offline(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1384517d7709..6e7c545bc7ee 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1483,7 +1483,8 @@ struct kvm_x86_ops { int (*get_msr_feature)(struct kvm_msr_entry *entry); - bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len); + bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); @@ -1496,6 +1497,7 @@ struct kvm_x86_ops { }; struct kvm_x86_nested_ops { + void (*leave_nested)(struct kvm_vcpu *vcpu); int (*check_events)(struct kvm_vcpu *vcpu); bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); void (*triple_fault)(struct kvm_vcpu *vcpu); @@ -1861,7 +1863,6 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); -void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 2da3316bb559..bf6e96011dfe 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -452,6 +452,9 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 +/* attributes for system fd (group 0) */ +#define KVM_X86_XCOMP_GUEST_SUPP 0 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 3902c28fb6cb..28be02adc669 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -133,6 +133,7 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 orig = &vcpu->arch.cpuid_entries[i]; if (e2[i].function != orig->function || e2[i].index != orig->index || + e2[i].flags != orig->flags || e2[i].eax != orig->eax || e2[i].ebx != orig->ebx || e2[i].ecx != orig->ecx || e2[i].edx != orig->edx) return -EINVAL; @@ -196,10 +197,26 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) vcpu->arch.pv_cpuid.features = best->eax; } +/* + * Calculate guest's supported XCR0 taking into account guest CPUID data and + * supported_xcr0 (comprised of host configuration and KVM_SUPPORTED_XCR0). + */ +static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent) +{ + struct kvm_cpuid_entry2 *best; + + best = cpuid_entry2_find(entries, nent, 0xd, 0); + if (!best) + return 0; + + return (best->eax | ((u64)best->edx << 32)) & supported_xcr0; +} + static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries, int nent) { struct kvm_cpuid_entry2 *best; + u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent); best = cpuid_entry2_find(entries, nent, 1, 0); if (best) { @@ -238,6 +255,21 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT); } + + /* + * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate + * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's + * requested XCR0 value. The enclave's XFRM must be a subset of XCRO + * at the time of EENTER, thus adjust the allowed XFRM by the guest's + * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to + * '1' even on CPUs that don't support XSAVE. + */ + best = cpuid_entry2_find(entries, nent, 0x12, 0x1); + if (best) { + best->ecx &= guest_supported_xcr0 & 0xffffffff; + best->edx &= guest_supported_xcr0 >> 32; + best->ecx |= XFEATURE_MASK_FPSSE; + } } void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) @@ -261,27 +293,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_apic_set_version(vcpu); } - best = kvm_find_cpuid_entry(vcpu, 0xD, 0); - if (!best) - vcpu->arch.guest_supported_xcr0 = 0; - else - vcpu->arch.guest_supported_xcr0 = - (best->eax | ((u64)best->edx << 32)) & supported_xcr0; - - /* - * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate - * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's - * requested XCR0 value. The enclave's XFRM must be a subset of XCRO - * at the time of EENTER, thus adjust the allowed XFRM by the guest's - * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to - * '1' even on CPUs that don't support XSAVE. - */ - best = kvm_find_cpuid_entry(vcpu, 0x12, 0x1); - if (best) { - best->ecx &= vcpu->arch.guest_supported_xcr0 & 0xffffffff; - best->edx &= vcpu->arch.guest_supported_xcr0 >> 32; - best->ecx |= XFEATURE_MASK_FPSSE; - } + vcpu->arch.guest_supported_xcr0 = + cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); kvm_update_pv_runtime(vcpu); @@ -346,8 +359,14 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, * KVM_SET_CPUID{,2} again. To support this legacy behavior, check * whether the supplied CPUID data is equal to what's already set. */ - if (vcpu->arch.last_vmentry_cpu != -1) - return kvm_cpuid_check_equal(vcpu, e2, nent); + if (vcpu->arch.last_vmentry_cpu != -1) { + r = kvm_cpuid_check_equal(vcpu, e2, nent); + if (r) + return r; + + kvfree(e2); + return 0; + } r = kvm_check_cpuid(vcpu, e2, nent); if (r) @@ -887,13 +906,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) } break; case 0xd: { - u64 guest_perm = xstate_get_guest_group_perm(); + u64 permitted_xcr0 = supported_xcr0 & xstate_get_guest_group_perm(); + u64 permitted_xss = supported_xss; - entry->eax &= supported_xcr0 & guest_perm; - entry->ebx = xstate_required_size(supported_xcr0, false); + entry->eax &= permitted_xcr0; + entry->ebx = xstate_required_size(permitted_xcr0, false); entry->ecx = entry->ebx; - entry->edx &= (supported_xcr0 & guest_perm) >> 32; - if (!supported_xcr0) + entry->edx &= permitted_xcr0 >> 32; + if (!permitted_xcr0) break; entry = do_host_cpuid(array, function, 1); @@ -902,20 +922,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) cpuid_entry_override(entry, CPUID_D_1_EAX); if (entry->eax & (F(XSAVES)|F(XSAVEC))) - entry->ebx = xstate_required_size(supported_xcr0 | supported_xss, + entry->ebx = xstate_required_size(permitted_xcr0 | permitted_xss, true); else { - WARN_ON_ONCE(supported_xss != 0); + WARN_ON_ONCE(permitted_xss != 0); entry->ebx = 0; } - entry->ecx &= supported_xss; - entry->edx &= supported_xss >> 32; + entry->ecx &= permitted_xss; + entry->edx &= permitted_xss >> 32; for (i = 2; i < 64; ++i) { bool s_state; - if (supported_xcr0 & BIT_ULL(i)) + if (permitted_xcr0 & BIT_ULL(i)) s_state = false; - else if (supported_xss & BIT_ULL(i)) + else if (permitted_xss & BIT_ULL(i)) s_state = true; else continue; @@ -929,7 +949,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) * invalid sub-leafs. Only valid sub-leafs should * reach this point, and they should have a non-zero * save state size. Furthermore, check whether the - * processor agrees with supported_xcr0/supported_xss + * processor agrees with permitted_xcr0/permitted_xss * on whether this is an XCR0- or IA32_XSS-managed area. */ if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 0x1) != s_state)) { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index baca9fa37a91..4662469240bc 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2629,7 +2629,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) kvm_apic_set_version(vcpu); apic_update_ppr(apic); - hrtimer_cancel(&apic->lapic_timer.timer); + cancel_apic_timer(apic); apic->lapic_timer.expired_tscdeadline = 0; apic_update_lvtt(apic); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index cf206855ebf0..1218b5a342fc 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -983,9 +983,9 @@ void svm_free_nested(struct vcpu_svm *svm) /* * Forcibly leave nested mode in order to be able to reset the VCPU later on. */ -void svm_leave_nested(struct vcpu_svm *svm) +void svm_leave_nested(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = &svm->vcpu; + struct vcpu_svm *svm = to_svm(vcpu); if (is_guest_mode(vcpu)) { svm->nested.nested_run_pending = 0; @@ -1411,7 +1411,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { - svm_leave_nested(svm); + svm_leave_nested(vcpu); svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); return 0; } @@ -1478,7 +1478,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, */ if (is_guest_mode(vcpu)) - svm_leave_nested(svm); + svm_leave_nested(vcpu); else svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; @@ -1532,6 +1532,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) } struct kvm_x86_nested_ops svm_nested_ops = { + .leave_nested = svm_leave_nested, .check_events = svm_check_nested_events, .triple_fault = nested_svm_triple_fault, .get_nested_state_pages = svm_get_nested_state_pages, diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6a22798eaaee..17b53457d866 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2100,8 +2100,13 @@ void __init sev_hardware_setup(void) if (!sev_enabled || !npt_enabled) goto out; - /* Does the CPU support SEV? */ - if (!boot_cpu_has(X86_FEATURE_SEV)) + /* + * SEV must obviously be supported in hardware. Sanity check that the + * CPU supports decode assists, which is mandatory for SEV guests to + * support instruction emulation. + */ + if (!boot_cpu_has(X86_FEATURE_SEV) || + WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS))) goto out; /* Retrieve SEV CPUID information */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2c99b18d76c0..6d97629655e3 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -290,7 +290,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) { if (!(efer & EFER_SVME)) { - svm_leave_nested(svm); + svm_leave_nested(vcpu); svm_set_gif(svm, true); /* #GP intercept is still needed for vmware backdoor */ if (!enable_vmware_backdoor) @@ -312,7 +312,11 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) return ret; } - if (svm_gp_erratum_intercept) + /* + * Never intercept #GP for SEV guests, KVM can't + * decrypt guest memory to workaround the erratum. + */ + if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm)) set_exception_intercept(svm, GP_VECTOR); } } @@ -1010,9 +1014,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu) * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. * We intercept those #GP and allow access to them anyway - * as VMware does. + * as VMware does. Don't intercept #GP for SEV guests as KVM can't + * decrypt guest memory to decode the faulting instruction. */ - if (enable_vmware_backdoor) + if (enable_vmware_backdoor && !sev_guest(vcpu->kvm)) set_exception_intercept(svm, GP_VECTOR); svm_set_intercept(svm, INTERCEPT_INTR); @@ -2091,10 +2096,6 @@ static int gp_interception(struct kvm_vcpu *vcpu) if (error_code) goto reinject; - /* All SVM instructions expect page aligned RAX */ - if (svm->vmcb->save.rax & ~PAGE_MASK) - goto reinject; - /* Decode the instruction for usage later */ if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK) goto reinject; @@ -2112,8 +2113,13 @@ static int gp_interception(struct kvm_vcpu *vcpu) if (!is_guest_mode(vcpu)) return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE); - } else + } else { + /* All SVM instructions expect page aligned RAX */ + if (svm->vmcb->save.rax & ~PAGE_MASK) + goto reinject; + return emulate_svm_instr(vcpu, opcode); + } reinject: kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); @@ -4252,79 +4258,140 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu) } } -static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) +static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { bool smep, smap, is_user; unsigned long cr4; + u64 error_code; + + /* Emulation is always possible when KVM has access to all guest state. */ + if (!sev_guest(vcpu->kvm)) + return true; + + /* #UD and #GP should never be intercepted for SEV guests. */ + WARN_ON_ONCE(emul_type & (EMULTYPE_TRAP_UD | + EMULTYPE_TRAP_UD_FORCED | + EMULTYPE_VMWARE_GP)); /* - * When the guest is an SEV-ES guest, emulation is not possible. + * Emulation is impossible for SEV-ES guests as KVM doesn't have access + * to guest register state. */ if (sev_es_guest(vcpu->kvm)) return false; /* + * Emulation is possible if the instruction is already decoded, e.g. + * when completing I/O after returning from userspace. + */ + if (emul_type & EMULTYPE_NO_DECODE) + return true; + + /* + * Emulation is possible for SEV guests if and only if a prefilled + * buffer containing the bytes of the intercepted instruction is + * available. SEV guest memory is encrypted with a guest specific key + * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and + * decode garbage. + * + * Inject #UD if KVM reached this point without an instruction buffer. + * In practice, this path should never be hit by a well-behaved guest, + * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path + * is still theoretically reachable, e.g. via unaccelerated fault-like + * AVIC access, and needs to be handled by KVM to avoid putting the + * guest into an infinite loop. Injecting #UD is somewhat arbitrary, + * but its the least awful option given lack of insight into the guest. + */ + if (unlikely(!insn)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return false; + } + + /* + * Emulate for SEV guests if the insn buffer is not empty. The buffer + * will be empty if the DecodeAssist microcode cannot fetch bytes for + * the faulting instruction because the code fetch itself faulted, e.g. + * the guest attempted to fetch from emulated MMIO or a guest page + * table used to translate CS:RIP resides in emulated MMIO. + */ + if (likely(insn_len)) + return true; + + /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * * Errata: - * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is - * possible that CPU microcode implementing DecodeAssist will fail - * to read bytes of instruction which caused #NPF. In this case, - * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly - * return 0 instead of the correct guest instruction bytes. - * - * This happens because CPU microcode reading instruction bytes - * uses a special opcode which attempts to read data using CPL=0 - * privileges. The microcode reads CS:RIP and if it hits a SMAP - * fault, it gives up and returns no instruction bytes. + * When CPU raises #NPF on guest data access and vCPU CR4.SMAP=1, it is + * possible that CPU microcode implementing DecodeAssist will fail to + * read guest memory at CS:RIP and vmcb.GuestIntrBytes will incorrectly + * be '0'. This happens because microcode reads CS:RIP using a _data_ + * loap uop with CPL=0 privileges. If the load hits a SMAP #PF, ucode + * gives up and does not fill the instruction bytes buffer. * - * Detection: - * We reach here in case CPU supports DecodeAssist, raised #NPF and - * returned 0 in GuestIntrBytes field of the VMCB. - * First, errata can only be triggered in case vCPU CR4.SMAP=1. - * Second, if vCPU CR4.SMEP=1, errata could only be triggered - * in case vCPU CPL==3 (Because otherwise guest would have triggered - * a SMEP fault instead of #NPF). - * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL. - * As most guests enable SMAP if they have also enabled SMEP, use above - * logic in order to attempt minimize false-positive of detecting errata - * while still preserving all cases semantic correctness. + * As above, KVM reaches this point iff the VM is an SEV guest, the CPU + * supports DecodeAssist, a #NPF was raised, KVM's page fault handler + * triggered emulation (e.g. for MMIO), and the CPU returned 0 in the + * GuestIntrBytes field of the VMCB. * - * Workaround: - * To determine what instruction the guest was executing, the hypervisor - * will have to decode the instruction at the instruction pointer. + * This does _not_ mean that the erratum has been encountered, as the + * DecodeAssist will also fail if the load for CS:RIP hits a legitimate + * #PF, e.g. if the guest attempt to execute from emulated MMIO and + * encountered a reserved/not-present #PF. * - * In non SEV guest, hypervisor will be able to read the guest - * memory to decode the instruction pointer when insn_len is zero - * so we return true to indicate that decoding is possible. + * To hit the erratum, the following conditions must be true: + * 1. CR4.SMAP=1 (obviously). + * 2. CR4.SMEP=0 || CPL=3. If SMEP=1 and CPL<3, the erratum cannot + * have been hit as the guest would have encountered a SMEP + * violation #PF, not a #NPF. + * 3. The #NPF is not due to a code fetch, in which case failure to + * retrieve the instruction bytes is legitimate (see abvoe). * - * But in the SEV guest, the guest memory is encrypted with the - * guest specific key and hypervisor will not be able to decode the - * instruction pointer so we will not able to workaround it. Lets - * print the error and request to kill the guest. + * In addition, don't apply the erratum workaround if the #NPF occurred + * while translating guest page tables (see below). */ - if (likely(!insn || insn_len)) - return true; - - /* - * If RIP is invalid, go ahead with emulation which will cause an - * internal error exit. - */ - if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT)) - return true; + error_code = to_svm(vcpu)->vmcb->control.exit_info_1; + if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK)) + goto resume_guest; cr4 = kvm_read_cr4(vcpu); smep = cr4 & X86_CR4_SMEP; smap = cr4 & X86_CR4_SMAP; is_user = svm_get_cpl(vcpu) == 3; if (smap && (!smep || is_user)) { - if (!sev_guest(vcpu->kvm)) - return true; - pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n"); - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + + /* + * If the fault occurred in userspace, arbitrarily inject #GP + * to avoid killing the guest and to hopefully avoid confusing + * the guest kernel too much, e.g. injecting #PF would not be + * coherent with respect to the guest's page tables. Request + * triple fault if the fault occurred in the kernel as there's + * no fault that KVM can inject without confusing the guest. + * In practice, the triple fault is moot as no sane SEV kernel + * will execute from user memory while also running with SMAP=1. + */ + if (is_user) + kvm_inject_gp(vcpu, 0); + else + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); } +resume_guest: + /* + * If the erratum was not hit, simply resume the guest and let it fault + * again. While awful, e.g. the vCPU may get stuck in an infinite loop + * if the fault is at CPL=0, it's the lesser of all evils. Exiting to + * userspace will kill the guest, and letting the emulator read garbage + * will yield random behavior and potentially corrupt the guest. + * + * Simply resuming the guest is technically not a violation of the SEV + * architecture. AMD's APM states that all code fetches and page table + * accesses for SEV guest are encrypted, regardless of the C-Bit. The + * APM also states that encrypted accesses to MMIO are "ignored", but + * doesn't explicitly define "ignored", i.e. doing nothing and letting + * the guest spin is technically "ignoring" the access. + */ return false; } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 47ef8f4a9358..73525353e424 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -304,11 +304,6 @@ static inline void vmcb_mark_all_clean(struct vmcb *vmcb) & ~VMCB_ALWAYS_DIRTY_MASK; } -static inline bool vmcb_is_clean(struct vmcb *vmcb, int bit) -{ - return (vmcb->control.clean & (1 << bit)); -} - static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit) { vmcb->control.clean &= ~(1 << bit); @@ -525,7 +520,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm) int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun); -void svm_leave_nested(struct vcpu_svm *svm); +void svm_leave_nested(struct kvm_vcpu *vcpu); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index c53b8bf8d013..489ca56212c6 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -46,6 +46,9 @@ static inline void svm_hv_init_vmcb(struct vmcb *vmcb) if (npt_enabled && ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) hve->hv_enlightenments_control.enlightened_npt_tlb = 1; + + if (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP) + hve->hv_enlightenments_control.msr_bitmap = 1; } static inline void svm_hv_hardware_setup(void) @@ -83,14 +86,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments( struct hv_enlightenments *hve = (struct hv_enlightenments *)vmcb->control.reserved_sw; - /* - * vmcb can be NULL if called during early vcpu init. - * And its okay not to mark vmcb dirty during vcpu init - * as we mark it dirty unconditionally towards end of vcpu - * init phase. - */ - if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) && - hve->hv_enlightenments_control.msr_bitmap) + if (hve->hv_enlightenments_control.msr_bitmap) vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); } diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 959b59d13b5a..3f430e218375 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -54,7 +54,6 @@ struct nested_vmx_msrs { struct vmcs_config { int size; - int order; u32 basic_cap; u32 revision_id; u32 pin_based_exec_ctrl; diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index ba6f99f584ac..87e3dc10edf4 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -12,8 +12,6 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs); -#if IS_ENABLED(CONFIG_HYPERV) - #define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x) #define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \ {EVMCS1_OFFSET(name), clean_field} @@ -296,6 +294,7 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { }; const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); +#if IS_ENABLED(CONFIG_HYPERV) __init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) { vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL; @@ -362,6 +361,7 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata) case MSR_IA32_VMX_PROCBASED_CTLS2: ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC; break; + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: case MSR_IA32_VMX_PINBASED_CTLS: ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; break; diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index 16731d2cf231..8d70f9aea94b 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -59,12 +59,12 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs); SECONDARY_EXEC_SHADOW_VMCS | \ SECONDARY_EXEC_TSC_SCALING | \ SECONDARY_EXEC_PAUSE_LOOP_EXITING) -#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) +#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \ + (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) #define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING) -#if IS_ENABLED(CONFIG_HYPERV) - struct evmcs_field { u16 offset; u16 clean_field; @@ -73,26 +73,56 @@ struct evmcs_field { extern const struct evmcs_field vmcs_field_to_evmcs_1[]; extern const unsigned int nr_evmcs_1_fields; -static __always_inline int get_evmcs_offset(unsigned long field, - u16 *clean_field) +static __always_inline int evmcs_field_offset(unsigned long field, + u16 *clean_field) { unsigned int index = ROL16(field, 6); const struct evmcs_field *evmcs_field; - if (unlikely(index >= nr_evmcs_1_fields)) { - WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n", - field); + if (unlikely(index >= nr_evmcs_1_fields)) return -ENOENT; - } evmcs_field = &vmcs_field_to_evmcs_1[index]; + /* + * Use offset=0 to detect holes in eVMCS. This offset belongs to + * 'revision_id' but this field has no encoding and is supposed to + * be accessed directly. + */ + if (unlikely(!evmcs_field->offset)) + return -ENOENT; + if (clean_field) *clean_field = evmcs_field->clean_field; return evmcs_field->offset; } +static inline u64 evmcs_read_any(struct hv_enlightened_vmcs *evmcs, + unsigned long field, u16 offset) +{ + /* + * vmcs12_read_any() doesn't care whether the supplied structure + * is 'struct vmcs12' or 'struct hv_enlightened_vmcs' as it takes + * the exact offset of the required field, use it for convenience + * here. + */ + return vmcs12_read_any((void *)evmcs, field, offset); +} + +#if IS_ENABLED(CONFIG_HYPERV) + +static __always_inline int get_evmcs_offset(unsigned long field, + u16 *clean_field) +{ + int offset = evmcs_field_offset(field, clean_field); + + WARN_ONCE(offset < 0, "KVM: accessing unsupported EVMCS field %lx\n", + field); + + return offset; +} + static __always_inline void evmcs_write64(unsigned long field, u64 value) { u16 clean_field; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f235f77cbc03..ba34e94049c7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7,6 +7,7 @@ #include <asm/mmu_context.h> #include "cpuid.h" +#include "evmcs.h" #include "hyperv.h" #include "mmu.h" #include "nested.h" @@ -4851,18 +4852,20 @@ static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu) struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs; /* - * We should allocate a shadow vmcs for vmcs01 only when L1 - * executes VMXON and free it when L1 executes VMXOFF. - * As it is invalid to execute VMXON twice, we shouldn't reach - * here when vmcs01 already have an allocated shadow vmcs. + * KVM allocates a shadow VMCS only when L1 executes VMXON and frees it + * when L1 executes VMXOFF or the vCPU is forced out of nested + * operation. VMXON faults if the CPU is already post-VMXON, so it + * should be impossible to already have an allocated shadow VMCS. KVM + * doesn't support virtualization of VMCS shadowing, so vmcs01 should + * always be the loaded VMCS. */ - WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs); + if (WARN_ON(loaded_vmcs != &vmx->vmcs01 || loaded_vmcs->shadow_vmcs)) + return loaded_vmcs->shadow_vmcs; + + loaded_vmcs->shadow_vmcs = alloc_vmcs(true); + if (loaded_vmcs->shadow_vmcs) + vmcs_clear(loaded_vmcs->shadow_vmcs); - if (!loaded_vmcs->shadow_vmcs) { - loaded_vmcs->shadow_vmcs = alloc_vmcs(true); - if (loaded_vmcs->shadow_vmcs) - vmcs_clear(loaded_vmcs->shadow_vmcs); - } return loaded_vmcs->shadow_vmcs; } @@ -5099,27 +5102,49 @@ static int handle_vmread(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - /* - * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, - * any VMREAD sets the ALU flags for VMfailInvalid. - */ - if (vmx->nested.current_vmptr == INVALID_GPA || - (is_guest_mode(vcpu) && - get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) - return nested_vmx_failInvalid(vcpu); - /* Decode instruction info and find the field to read */ field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); - offset = vmcs_field_to_offset(field); - if (offset < 0) - return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + if (!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) { + /* + * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, + * any VMREAD sets the ALU flags for VMfailInvalid. + */ + if (vmx->nested.current_vmptr == INVALID_GPA || + (is_guest_mode(vcpu) && + get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) + return nested_vmx_failInvalid(vcpu); - if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) - copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + offset = get_vmcs12_field_offset(field); + if (offset < 0) + return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + + if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) + copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); - /* Read the field, zero-extended to a u64 value */ - value = vmcs12_read_any(vmcs12, field, offset); + /* Read the field, zero-extended to a u64 value */ + value = vmcs12_read_any(vmcs12, field, offset); + } else { + /* + * Hyper-V TLFS (as of 6.0b) explicitly states, that while an + * enlightened VMCS is active VMREAD/VMWRITE instructions are + * unsupported. Unfortunately, certain versions of Windows 11 + * don't comply with this requirement which is not enforced in + * genuine Hyper-V. Allow VMREAD from an enlightened VMCS as a + * workaround, as misbehaving guests will panic on VM-Fail. + * Note, enlightened VMCS is incompatible with shadow VMCS so + * all VMREADs from L2 should go to L1. + */ + if (WARN_ON_ONCE(is_guest_mode(vcpu))) + return nested_vmx_failInvalid(vcpu); + + offset = evmcs_field_offset(field, NULL); + if (offset < 0) + return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + + /* Read the field, zero-extended to a u64 value */ + value = evmcs_read_any(vmx->nested.hv_evmcs, field, offset); + } /* * Now copy part of this value to register or memory, as requested. @@ -5214,7 +5239,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); - offset = vmcs_field_to_offset(field); + offset = get_vmcs12_field_offset(field); if (offset < 0) return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); @@ -6462,7 +6487,7 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void) max_idx = 0; for (i = 0; i < nr_vmcs12_fields; i++) { /* The vmcs12 table is very, very sparsely populated. */ - if (!vmcs_field_to_offset_table[i]) + if (!vmcs12_field_offsets[i]) continue; idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i)); @@ -6771,6 +6796,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) } struct kvm_x86_nested_ops vmx_nested_ops = { + .leave_nested = vmx_leave_nested, .check_events = vmx_check_nested_events, .hv_timer_pending = nested_vmx_preemption_timer_pending, .triple_fault = nested_vmx_triple_fault, diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c index cab6ba7a5005..2251b60920f8 100644 --- a/arch/x86/kvm/vmx/vmcs12.c +++ b/arch/x86/kvm/vmx/vmcs12.c @@ -8,7 +8,7 @@ FIELD(number, name), \ [ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32) -const unsigned short vmcs_field_to_offset_table[] = { +const unsigned short vmcs12_field_offsets[] = { FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), FIELD(POSTED_INTR_NV, posted_intr_nv), FIELD(GUEST_ES_SELECTOR, guest_es_selector), @@ -151,4 +151,4 @@ const unsigned short vmcs_field_to_offset_table[] = { FIELD(HOST_RSP, host_rsp), FIELD(HOST_RIP, host_rip), }; -const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs_field_to_offset_table); +const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets); diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 2a45f026ee11..746129ddd5ae 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -361,10 +361,10 @@ static inline void vmx_check_vmcs12_offsets(void) CHECK_OFFSET(guest_pml_index, 996); } -extern const unsigned short vmcs_field_to_offset_table[]; +extern const unsigned short vmcs12_field_offsets[]; extern const unsigned int nr_vmcs12_fields; -static inline short vmcs_field_to_offset(unsigned long field) +static inline short get_vmcs12_field_offset(unsigned long field) { unsigned short offset; unsigned int index; @@ -377,7 +377,7 @@ static inline short vmcs_field_to_offset(unsigned long field) return -ENOENT; index = array_index_nospec(index, nr_vmcs12_fields); - offset = vmcs_field_to_offset_table[index]; + offset = vmcs12_field_offsets[index]; if (offset == 0) return -ENOENT; return offset; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4ac676066d60..aca3ae2a02f3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1487,11 +1487,12 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) return 0; } -static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) +static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { /* * Emulation of instructions in SGX enclaves is impossible as RIP does - * not point tthe failing instruction, and even if it did, the code + * not point at the failing instruction, and even if it did, the code * stream is inaccessible. Inject #UD instead of exiting to userspace * so that guest userspace can't DoS the guest simply by triggering * emulation (enclaves are CPL3 only). @@ -2603,7 +2604,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, return -EIO; vmcs_conf->size = vmx_msr_high & 0x1fff; - vmcs_conf->order = get_order(vmcs_conf->size); vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; vmcs_conf->revision_id = vmx_msr_low; @@ -2628,7 +2628,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) struct page *pages; struct vmcs *vmcs; - pages = __alloc_pages_node(node, flags, vmcs_config.order); + pages = __alloc_pages_node(node, flags, 0); if (!pages) return NULL; vmcs = page_address(pages); @@ -2647,7 +2647,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) void free_vmcs(struct vmcs *vmcs) { - free_pages((unsigned long)vmcs, vmcs_config.order); + free_page((unsigned long)vmcs); } /* @@ -4094,10 +4094,14 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) vmcs_write32(HOST_IA32_SYSENTER_CS, low32); /* - * If 32-bit syscall is enabled, vmx_vcpu_load_vcms rewrites - * HOST_IA32_SYSENTER_ESP. + * SYSENTER is used for 32-bit system calls on either 32-bit or + * 64-bit kernels. It is always zero If neither is allowed, otherwise + * vmx_vcpu_load_vmcs loads it with the per-CPU entry stack (and may + * have already done so!). */ - vmcs_writel(HOST_IA32_SYSENTER_ESP, 0); + if (!IS_ENABLED(CONFIG_IA32_EMULATION) && !IS_ENABLED(CONFIG_X86_32)) + vmcs_writel(HOST_IA32_SYSENTER_ESP, 0); + rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl); vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */ @@ -4901,8 +4905,33 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) dr6 = vmx_get_exit_qual(vcpu); if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + /* + * If the #DB was due to ICEBP, a.k.a. INT1, skip the + * instruction. ICEBP generates a trap-like #DB, but + * despite its interception control being tied to #DB, + * is an instruction intercept, i.e. the VM-Exit occurs + * on the ICEBP itself. Note, skipping ICEBP also + * clears STI and MOVSS blocking. + * + * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS + * if single-step is enabled in RFLAGS and STI or MOVSS + * blocking is active, as the CPU doesn't set the bit + * on VM-Exit due to #DB interception. VM-Entry has a + * consistency check that a single-step #DB is pending + * in this scenario as the previous instruction cannot + * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV + * don't modify RFLAGS), therefore the one instruction + * delay when activating single-step breakpoints must + * have already expired. Note, the CPU sets/clears BS + * as appropriate for all other VM-Exits types. + */ if (is_icebp(intr_info)) WARN_ON(!skip_emulated_instruction(vcpu)); + else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) && + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS))) + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS); kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); return 1; @@ -5397,7 +5426,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { gpa_t gpa; - if (!vmx_can_emulate_instruction(vcpu, NULL, 0)) + if (!vmx_can_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0)) return 1; /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9e43d756312f..74b53a16f38a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3535,6 +3535,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~supported_xss) return 1; vcpu->arch.ia32_xss = data; + kvm_update_cpuid_runtime(vcpu); break; case MSR_SMI_COUNT: if (!msr_info->host_initiated) @@ -4229,6 +4230,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SREGS2: case KVM_CAP_EXIT_ON_EMULATION_FAILURE: case KVM_CAP_VCPU_ATTRIBUTES: + case KVM_CAP_SYS_ATTRIBUTES: r = 1; break; case KVM_CAP_EXIT_HYPERCALL: @@ -4331,7 +4333,49 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; } return r; +} + +static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr) +{ + void __user *uaddr = (void __user*)(unsigned long)attr->addr; + if ((u64)(unsigned long)uaddr != attr->addr) + return ERR_PTR(-EFAULT); + return uaddr; +} + +static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr) +{ + u64 __user *uaddr = kvm_get_attr_addr(attr); + + if (attr->group) + return -ENXIO; + + if (IS_ERR(uaddr)) + return PTR_ERR(uaddr); + + switch (attr->attr) { + case KVM_X86_XCOMP_GUEST_SUPP: + if (put_user(supported_xcr0, uaddr)) + return -EFAULT; + return 0; + default: + return -ENXIO; + break; + } +} + +static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr) +{ + if (attr->group) + return -ENXIO; + + switch (attr->attr) { + case KVM_X86_XCOMP_GUEST_SUPP: + return 0; + default: + return -ENXIO; + } } long kvm_arch_dev_ioctl(struct file *filp, @@ -4422,6 +4466,22 @@ long kvm_arch_dev_ioctl(struct file *filp, case KVM_GET_SUPPORTED_HV_CPUID: r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp); break; + case KVM_GET_DEVICE_ATTR: { + struct kvm_device_attr attr; + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_x86_dev_get_attr(&attr); + break; + } + case KVM_HAS_DEVICE_ATTR: { + struct kvm_device_attr attr; + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_x86_dev_has_attr(&attr); + break; + } default: r = -EINVAL; break; @@ -4860,8 +4920,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { - if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) + if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { + kvm_x86_ops.nested_ops->leave_nested(vcpu); kvm_smm_changed(vcpu, events->smi.smm); + } vcpu->arch.smi_pending = events->smi.pending; @@ -5022,11 +5084,11 @@ static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu, static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr; + u64 __user *uaddr = kvm_get_attr_addr(attr); int r; - if ((u64)(unsigned long)uaddr != attr->addr) - return -EFAULT; + if (IS_ERR(uaddr)) + return PTR_ERR(uaddr); switch (attr->attr) { case KVM_VCPU_TSC_OFFSET: @@ -5045,12 +5107,12 @@ static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu, static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr; + u64 __user *uaddr = kvm_get_attr_addr(attr); struct kvm *kvm = vcpu->kvm; int r; - if ((u64)(unsigned long)uaddr != attr->addr) - return -EFAULT; + if (IS_ERR(uaddr)) + return PTR_ERR(uaddr); switch (attr->attr) { case KVM_VCPU_TSC_OFFSET: { @@ -6810,6 +6872,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, } EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); +static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) +{ + return static_call(kvm_x86_can_emulate_instruction)(vcpu, emul_type, + insn, insn_len); +} + int handle_ud(struct kvm_vcpu *vcpu) { static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX }; @@ -6817,7 +6886,7 @@ int handle_ud(struct kvm_vcpu *vcpu) char sig[5]; /* ud2; .ascii "kvm" */ struct x86_exception e; - if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, NULL, 0))) + if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0))) return 1; if (force_emulation_prefix && @@ -8193,7 +8262,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool writeback = true; bool write_fault_to_spt; - if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, insn, insn_len))) + if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len))) return 1; vcpu->arch.l1tf_flush_l1d = true; @@ -9706,7 +9775,7 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); } -void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) +static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { if (!lapic_in_kernel(vcpu)) return; @@ -11209,7 +11278,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.msr_misc_features_enables = 0; - vcpu->arch.xcr0 = XFEATURE_MASK_FP; + __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP); + __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true); } /* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */ @@ -11226,8 +11296,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0); kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600); - vcpu->arch.ia32_xss = 0; - static_call(kvm_x86_vcpu_reset)(vcpu, init_event); kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 0e3f7d6e9fd7..bad57535fad0 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -316,10 +316,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) "\tnotq %0\n" "\t" LOCK_PREFIX "andq %0, %2\n" "2:\n" - "\t.section .fixup,\"ax\"\n" - "3:\tjmp\t2b\n" - "\t.previous\n" - _ASM_EXTABLE_UA(1b, 3b) + _ASM_EXTABLE_UA(1b, 2b) : "=r" (evtchn_pending_sel), "+m" (vi->evtchn_pending_sel), "+m" (v->arch.xen.evtchn_pending_sel) @@ -335,10 +332,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) "\tnotl %0\n" "\t" LOCK_PREFIX "andl %0, %2\n" "2:\n" - "\t.section .fixup,\"ax\"\n" - "3:\tjmp\t2b\n" - "\t.previous\n" - _ASM_EXTABLE_UA(1b, 3b) + _ASM_EXTABLE_UA(1b, 2b) : "=r" (evtchn_pending_sel32), "+m" (vi->evtchn_pending_sel), "+m" (v->arch.xen.evtchn_pending_sel) diff --git a/block/blk-core.c b/block/blk-core.c index 97f8bc8d3a79..d93e3bb9a769 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1061,20 +1061,32 @@ again: } static unsigned long __part_start_io_acct(struct block_device *part, - unsigned int sectors, unsigned int op) + unsigned int sectors, unsigned int op, + unsigned long start_time) { const int sgrp = op_stat_group(op); - unsigned long now = READ_ONCE(jiffies); part_stat_lock(); - update_io_ticks(part, now, false); + update_io_ticks(part, start_time, false); part_stat_inc(part, ios[sgrp]); part_stat_add(part, sectors[sgrp], sectors); part_stat_local_inc(part, in_flight[op_is_write(op)]); part_stat_unlock(); - return now; + return start_time; +} + +/** + * bio_start_io_acct_time - start I/O accounting for bio based drivers + * @bio: bio to start account for + * @start_time: start time that should be passed back to bio_end_io_acct(). + */ +void bio_start_io_acct_time(struct bio *bio, unsigned long start_time) +{ + __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), + bio_op(bio), start_time); } +EXPORT_SYMBOL_GPL(bio_start_io_acct_time); /** * bio_start_io_acct - start I/O accounting for bio based drivers @@ -1084,14 +1096,15 @@ static unsigned long __part_start_io_acct(struct block_device *part, */ unsigned long bio_start_io_acct(struct bio *bio) { - return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), bio_op(bio)); + return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), + bio_op(bio), jiffies); } EXPORT_SYMBOL_GPL(bio_start_io_acct); unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned int op) { - return __part_start_io_acct(disk->part0, sectors, op); + return __part_start_io_acct(disk->part0, sectors, op, jiffies); } EXPORT_SYMBOL(disk_start_io_acct); diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index b925f3db3ab7..18c68d8b9138 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -144,7 +144,7 @@ int disk_register_independent_access_ranges(struct gendisk *disk, &q->kobj, "%s", "independent_access_ranges"); if (ret) { q->ia_ranges = NULL; - kfree(iars); + kobject_put(&iars->kobj); return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index f3bf3358a3bb..1adfe4824ef5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2922,6 +2922,8 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * */ blk_mq_run_dispatch_ops(rq->q, ret = blk_mq_request_issue_directly(rq, true)); + if (ret) + blk_account_io_done(rq, ktime_get_ns()); return ret; } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c index 028329428b75..87c7c90676ca 100644 --- a/drivers/ata/pata_platform.c +++ b/drivers/ata/pata_platform.c @@ -128,6 +128,8 @@ int __pata_platform_probe(struct device *dev, struct resource *io_res, ap = host->ports[0]; ap->ops = devm_kzalloc(dev, sizeof(*ap->ops), GFP_KERNEL); + if (!ap->ops) + return -ENOMEM; ap->ops->inherits = &ata_sff_port_ops; ap->ops->cable_detect = ata_cable_unknown; ap->ops->set_mode = pata_platform_set_mode; diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 646ad385e490..ccac1c453080 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -358,7 +358,7 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg, * other namespaces. */ if ((current_user_ns() != &init_user_ns) || - (task_active_pid_ns(current) != &init_pid_ns)) + !task_is_in_init_pid_ns(current)) return; /* Can only change if privileged. */ diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c index 7e0957eea094..869894b74741 100644 --- a/drivers/counter/counter-core.c +++ b/drivers/counter/counter-core.c @@ -90,10 +90,8 @@ struct counter_device *counter_alloc(size_t sizeof_priv) int err; ch = kzalloc(sizeof(*ch) + sizeof_priv, GFP_KERNEL); - if (!ch) { - err = -ENOMEM; - goto err_alloc_ch; - } + if (!ch) + return NULL; counter = &ch->counter; dev = &counter->dev; @@ -123,9 +121,8 @@ err_chrdev_add: err_ida_alloc: kfree(ch); -err_alloc_ch: - return ERR_PTR(err); + return NULL; } EXPORT_SYMBOL_GPL(counter_alloc); @@ -208,12 +205,12 @@ struct counter_device *devm_counter_alloc(struct device *dev, size_t sizeof_priv int err; counter = counter_alloc(sizeof_priv); - if (IS_ERR(counter)) - return counter; + if (!counter) + return NULL; err = devm_add_action_or_reset(dev, devm_counter_put, counter); if (err < 0) - return ERR_PTR(err); + return NULL; return counter; } diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index ae79c3300129..7de3f5b6e8d0 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -722,6 +722,13 @@ void __init efi_systab_report_header(const efi_table_hdr_t *systab_hdr, systab_hdr->revision >> 16, systab_hdr->revision & 0xffff, vendor); + + if (IS_ENABLED(CONFIG_X86_64) && + systab_hdr->revision > EFI_1_10_SYSTEM_TABLE_REVISION && + !strcmp(vendor, "Apple")) { + pr_info("Apple Mac detected, using EFI v1.10 runtime services only\n"); + efi.runtime_version = EFI_1_10_SYSTEM_TABLE_REVISION; + } } static __initdata char memory_type_name[][13] = { diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 2363fee9211c..9cc556013d08 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -119,9 +119,9 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, if (image->image_base != _text) efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n"); - if (!IS_ALIGNED((u64)_text, EFI_KIMG_ALIGN)) - efi_err("FIRMWARE BUG: kernel image not aligned on %ldk boundary\n", - EFI_KIMG_ALIGN >> 10); + if (!IS_ALIGNED((u64)_text, SEGMENT_ALIGN)) + efi_err("FIRMWARE BUG: kernel image not aligned on %dk boundary\n", + SEGMENT_ALIGN >> 10); kernel_size = _edata - _text; kernel_memsize = kernel_size + (_end - _edata); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b21bcdc97460..4c83f1db8a24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1525,6 +1525,87 @@ static const u16 amdgpu_unsupported_pciidlist[] = { 0x99A0, 0x99A2, 0x99A4, + /* radeon secondary ids */ + 0x3171, + 0x3e70, + 0x4164, + 0x4165, + 0x4166, + 0x4168, + 0x4170, + 0x4171, + 0x4172, + 0x4173, + 0x496e, + 0x4a69, + 0x4a6a, + 0x4a6b, + 0x4a70, + 0x4a74, + 0x4b69, + 0x4b6b, + 0x4b6c, + 0x4c6e, + 0x4e64, + 0x4e65, + 0x4e66, + 0x4e67, + 0x4e68, + 0x4e69, + 0x4e6a, + 0x4e71, + 0x4f73, + 0x5569, + 0x556b, + 0x556d, + 0x556f, + 0x5571, + 0x5854, + 0x5874, + 0x5940, + 0x5941, + 0x5b72, + 0x5b73, + 0x5b74, + 0x5b75, + 0x5d44, + 0x5d45, + 0x5d6d, + 0x5d6f, + 0x5d72, + 0x5d77, + 0x5e6b, + 0x5e6d, + 0x7120, + 0x7124, + 0x7129, + 0x712e, + 0x712f, + 0x7162, + 0x7163, + 0x7166, + 0x7167, + 0x7172, + 0x7173, + 0x71a0, + 0x71a1, + 0x71a3, + 0x71a7, + 0x71bb, + 0x71e0, + 0x71e1, + 0x71e2, + 0x71e6, + 0x71e7, + 0x71f2, + 0x7269, + 0x726b, + 0x726e, + 0x72a0, + 0x72a8, + 0x72b1, + 0x72b3, + 0x793f, }; static const struct pci_device_id pciidlist[] = { diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c index ff5bb152ef49..e6ef36de0825 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c @@ -2033,10 +2033,10 @@ static void calculate_bandwidth( kfree(surface_type); free_tiling_mode: kfree(tiling_mode); -free_yclk: - kfree(yclk); free_sclk: kfree(sclk); +free_yclk: + kfree(yclk); } /******************************************************************************* diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index ec19678a0702..e447c74be713 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -503,7 +503,6 @@ static void dcn_bw_calc_rq_dlg_ttu( //input[in_idx].dout.output_standard; /*todo: soc->sr_enter_plus_exit_time??*/ - dlg_sys_param->t_srx_delay_us = dc->dcn_ip->dcfclk_cstate_latency / v->dcf_clk_deep_sleep; dml1_rq_dlg_get_rq_params(dml, rq_param, &input->pipe.src); dml1_extract_rq_regs(dml, rq_regs, rq_param); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 01c8849b9db2..6f5528d34093 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1404,20 +1404,34 @@ static void program_timing_sync( status->timing_sync_info.master = false; } - /* remove any other unblanked pipes as they have already been synced */ - for (j = j + 1; j < group_size; j++) { - bool is_blanked; - if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked) - is_blanked = - pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp); - else - is_blanked = - pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg); - if (!is_blanked) { - group_size--; - pipe_set[j] = pipe_set[group_size]; - j--; + /* remove any other pipes that are already been synced */ + if (dc->config.use_pipe_ctx_sync_logic) { + /* check pipe's syncd to decide which pipe to be removed */ + for (j = 1; j < group_size; j++) { + if (pipe_set[j]->pipe_idx_syncd == pipe_set[0]->pipe_idx_syncd) { + group_size--; + pipe_set[j] = pipe_set[group_size]; + j--; + } else + /* link slave pipe's syncd with master pipe */ + pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd; + } + } else { + for (j = j + 1; j < group_size; j++) { + bool is_blanked; + + if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked) + is_blanked = + pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp); + else + is_blanked = + pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg); + if (!is_blanked) { + group_size--; + pipe_set[j] = pipe_set[group_size]; + j--; + } } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 05e216524370..4c3ab2575e4b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -202,7 +202,7 @@ void dp_wait_for_training_aux_rd_interval( uint32_t wait_in_micro_secs) { #if defined(CONFIG_DRM_AMD_DC_DCN) - if (wait_in_micro_secs > 16000) + if (wait_in_micro_secs > 1000) msleep(wait_in_micro_secs/1000); else udelay(wait_in_micro_secs); @@ -6935,7 +6935,7 @@ bool dpcd_write_128b_132b_sst_payload_allocation_table( } } retries++; - udelay(5000); + msleep(5); } if (!result && retries == max_retries) { @@ -6987,7 +6987,7 @@ bool dpcd_poll_for_allocation_change_trigger(struct dc_link *link) break; } - udelay(5000); + msleep(5); } if (result == ACT_FAILED) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d4ff6cc6b8d9..b3912ff9dc91 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3217,6 +3217,60 @@ struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt( } #endif +void reset_syncd_pipes_from_disabled_pipes(struct dc *dc, + struct dc_state *context) +{ + int i, j; + struct pipe_ctx *pipe_ctx_old, *pipe_ctx, *pipe_ctx_syncd; + + /* If pipe backend is reset, need to reset pipe syncd status */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe_ctx_old = &dc->current_state->res_ctx.pipe_ctx[i]; + pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx_old->stream) + continue; + + if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe) + continue; + + if (!pipe_ctx->stream || + pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) { + + /* Reset all the syncd pipes from the disabled pipe */ + for (j = 0; j < dc->res_pool->pipe_count; j++) { + pipe_ctx_syncd = &context->res_ctx.pipe_ctx[j]; + if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_syncd) == pipe_ctx_old->pipe_idx) || + !IS_PIPE_SYNCD_VALID(pipe_ctx_syncd)) + SET_PIPE_SYNCD_TO_PIPE(pipe_ctx_syncd, j); + } + } + } +} + +void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc, + struct dc_state *context, + uint8_t disabled_master_pipe_idx) +{ + int i; + struct pipe_ctx *pipe_ctx, *pipe_ctx_check; + + pipe_ctx = &context->res_ctx.pipe_ctx[disabled_master_pipe_idx]; + if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx) != disabled_master_pipe_idx) || + !IS_PIPE_SYNCD_VALID(pipe_ctx)) + SET_PIPE_SYNCD_TO_PIPE(pipe_ctx, disabled_master_pipe_idx); + + /* for the pipe disabled, check if any slave pipe exists and assert */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe_ctx_check = &context->res_ctx.pipe_ctx[i]; + + if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_check) == disabled_master_pipe_idx) && + IS_PIPE_SYNCD_VALID(pipe_ctx_check) && (i != disabled_master_pipe_idx)) + DC_ERR("DC: Failure: pipe_idx[%d] syncd with disabled master pipe_idx[%d]\n", + i, disabled_master_pipe_idx); + } +} + uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter) { /* TODO - get transmitter to phy idx mapping from DMUB */ diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index da2c78ce14d6..288e7b01f561 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -344,6 +344,7 @@ struct dc_config { uint8_t vblank_alignment_max_frame_time_diff; bool is_asymmetric_memory; bool is_single_rank_dimm; + bool use_pipe_ctx_sync_logic; }; enum visual_confirm { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 78192ecba102..f3ff141b706a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1566,6 +1566,10 @@ static enum dc_status apply_single_controller_ctx_to_hw( &pipe_ctx->stream->audio_info); } + /* make sure no pipes syncd to the pipe being enabled */ + if (!pipe_ctx->stream->apply_seamless_boot_optimization && dc->config.use_pipe_ctx_sync_logic) + check_syncd_pipes_for_disabled_master_pipe(dc, context, pipe_ctx->pipe_idx); + #if defined(CONFIG_DRM_AMD_DC_DCN) /* DCN3.1 FPGA Workaround * Need to enable HPO DP Stream Encoder before setting OTG master enable. @@ -1604,7 +1608,7 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx->stream_res.stream_enc, pipe_ctx->stream_res.tg->inst); - if (dc_is_dp_signal(pipe_ctx->stream->signal) && + if (dc_is_embedded_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc->funcs->reset_fifo) pipe_ctx->stream_res.stream_enc->funcs->reset_fifo( pipe_ctx->stream_res.stream_enc); @@ -2297,6 +2301,10 @@ enum dc_status dce110_apply_ctx_to_hw( enum dc_status status; int i; + /* reset syncd pipes from disabled pipes */ + if (dc->config.use_pipe_ctx_sync_logic) + reset_syncd_pipes_from_disabled_pipes(dc, context); + /* Reset old context */ /* look up the targets that have been removed since last commit */ hws->funcs.reset_hw_ctx_wrap(dc, context); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 602ec9a08549..8ca26383b568 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1878,7 +1878,6 @@ noinline bool dcn30_internal_validate_bw( dc->res_pool->funcs->update_soc_for_wm_a(dc, context); pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - DC_FP_START(); if (!pipe_cnt) { out = true; goto validate_out; @@ -2104,7 +2103,6 @@ validate_fail: out = false; validate_out: - DC_FP_END(); return out; } @@ -2306,7 +2304,9 @@ bool dcn30_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_COUNT(); + DC_FP_START(); out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + DC_FP_END(); if (pipe_cnt == 0) goto validate_out; diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index c1c6e602b06c..5d9637b07429 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -686,7 +686,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_clock_gate = true, .disable_pplib_clock_request = true, .disable_pplib_wm_range = true, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -1380,6 +1380,17 @@ static void set_wm_ranges( pp_smu->nv_funcs.set_wm_ranges(&pp_smu->nv_funcs.pp_smu, &ranges); } +static void dcn301_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + DC_FP_START(); + dcn301_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_END(); +} + static struct resource_funcs dcn301_res_pool_funcs = { .destroy = dcn301_destroy_resource_pool, .link_enc_create = dcn301_link_encoder_create, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 42ed47e8133d..8d64187478e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -2260,6 +2260,9 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index 246071c72f6b..548cdef8a8ad 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -1576,8 +1576,6 @@ void dml20_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated print__dlg_sys_params_st(mode_lib, &dlg_sys_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index 015e7f2c0b16..0fc9f3e3ffae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -1577,8 +1577,6 @@ void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated print__dlg_sys_params_st(mode_lib, &dlg_sys_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 8bc27de4c104..618f4b682ab1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -1688,8 +1688,6 @@ void dml21_rq_dlg_get_dlg_reg( mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated print__dlg_sys_params_st(mode_lib, &dlg_sys_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c index aef854270054..747167083dea 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c @@ -1858,8 +1858,6 @@ void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated print__dlg_sys_params_st(mode_lib, &dlg_sys_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index 94c32832a0e7..0a7a33864973 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -327,7 +327,7 @@ void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info) dcn3_01_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; } -void dcn301_calculate_wm_and_dlg(struct dc *dc, +void dcn301_calculate_wm_and_dlg_fp(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h index fc7065d17842..774b0fdfc80b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h @@ -34,7 +34,7 @@ void dcn301_fpu_set_wm_ranges(int i, void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info); -void dcn301_calculate_wm_and_dlg(struct dc *dc, +void dcn301_calculate_wm_and_dlg_fp(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index d46a2733024c..8f9f1d607f7c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -546,7 +546,6 @@ struct _vcs_dpi_display_dlg_sys_params_st { double t_sr_wm_us; double t_extra_us; double mem_trip_us; - double t_srx_delay_us; double deepsleep_dcfclk_mhz; double total_flip_bw; unsigned int total_flip_bytes; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c index 71ea503cb32f..412e75eb4704 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c @@ -142,9 +142,6 @@ void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, const struct _v dml_print("DML_RQ_DLG_CALC: t_sr_wm_us = %3.2f\n", dlg_sys_param->t_sr_wm_us); dml_print("DML_RQ_DLG_CALC: t_extra_us = %3.2f\n", dlg_sys_param->t_extra_us); dml_print( - "DML_RQ_DLG_CALC: t_srx_delay_us = %3.2f\n", - dlg_sys_param->t_srx_delay_us); - dml_print( "DML_RQ_DLG_CALC: deepsleep_dcfclk_mhz = %3.2f\n", dlg_sys_param->deepsleep_dcfclk_mhz); dml_print( diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index 59dc2c5b58dd..3df559c591f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -1331,10 +1331,6 @@ void dml1_rq_dlg_get_dlg_params( if (dual_plane) DTRACE("DLG: %s: swath_height_c = %d", __func__, swath_height_c); - DTRACE( - "DLG: %s: t_srx_delay_us = %3.2f", - __func__, - (double) dlg_sys_param->t_srx_delay_us); DTRACE("DLG: %s: line_time_in_us = %3.2f", __func__, (double) line_time_in_us); DTRACE("DLG: %s: vupdate_offset = %d", __func__, vupdate_offset); DTRACE("DLG: %s: vupdate_width = %d", __func__, vupdate_width); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 890280026e69..943240e2809e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -382,6 +382,7 @@ struct pipe_ctx { struct pll_settings pll_settings; uint8_t pipe_idx; + uint8_t pipe_idx_syncd; struct pipe_ctx *top_pipe; struct pipe_ctx *bottom_pipe; diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 4249bf306e09..dbfe6690ded8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -34,6 +34,10 @@ #define MEMORY_TYPE_HBM 2 +#define IS_PIPE_SYNCD_VALID(pipe) ((((pipe)->pipe_idx_syncd) & 0x80)?1:0) +#define GET_PIPE_SYNCD_FROM_PIPE(pipe) ((pipe)->pipe_idx_syncd & 0x7F) +#define SET_PIPE_SYNCD_TO_PIPE(pipe, pipe_syncd) ((pipe)->pipe_idx_syncd = (0x80 | pipe_syncd)) + enum dce_version resource_parse_asic_id( struct hw_asic_id asic_id); @@ -208,6 +212,13 @@ struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt( const struct dc_link *link); #endif +void reset_syncd_pipes_from_disabled_pipes(struct dc *dc, + struct dc_state *context); + +void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc, + struct dc_state *context, + uint8_t disabled_master_pipe_idx); + uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter); #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h index d9eb353a4bf0..dbe1cc620f6e 100644 --- a/drivers/gpu/drm/ast/ast_tables.h +++ b/drivers/gpu/drm/ast/ast_tables.h @@ -282,8 +282,6 @@ static const struct ast_vbios_enhtable res_1360x768[] = { }; static const struct ast_vbios_enhtable res_1600x900[] = { - {1800, 1600, 24, 80, 1000, 900, 1, 3, VCLK108, /* 60Hz */ - (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 3, 0x3A }, {1760, 1600, 48, 32, 926, 900, 3, 5, VCLK97_75, /* 60Hz CVT RB */ (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo | AST2500PreCatchCRT), 60, 1, 0x3A }, diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 21174efd91be..88cd992df356 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1327,8 +1327,10 @@ int drm_atomic_check_only(struct drm_atomic_state *state) drm_dbg_atomic(dev, "checking %p\n", state); - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) - requested_crtc |= drm_crtc_mask(crtc); + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + if (new_crtc_state->enable) + requested_crtc |= drm_crtc_mask(crtc); + } for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { ret = drm_atomic_plane_check(old_plane_state, new_plane_state); @@ -1377,8 +1379,10 @@ int drm_atomic_check_only(struct drm_atomic_state *state) } } - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) - affected_crtc |= drm_crtc_mask(crtc); + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + if (new_crtc_state->enable) + affected_crtc |= drm_crtc_mask(crtc); + } /* * For commits that allow modesets drivers can add other CRTCs to the diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 042bb80383c9..b910978d3e48 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -115,6 +115,12 @@ static const struct drm_dmi_panel_orientation_data lcd1280x1920_rightside_up = { .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +static const struct drm_dmi_panel_orientation_data lcd1600x2560_leftside_up = { + .width = 1600, + .height = 2560, + .orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP, +}; + static const struct dmi_system_id orientation_data[] = { { /* Acer One 10 (S1003) */ .matches = { @@ -275,6 +281,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Default string"), }, .driver_data = (void *)&onegx1_pro, + }, { /* OneXPlayer */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ONE-NETBOOK TECHNOLOGY CO., LTD."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"), + }, + .driver_data = (void *)&lcd1600x2560_leftside_up, }, { /* Samsung GalaxyBook 10.6 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), diff --git a/drivers/gpu/drm/drm_privacy_screen_x86.c b/drivers/gpu/drm/drm_privacy_screen_x86.c index a2cafb294ca6..e7aa74ad0b24 100644 --- a/drivers/gpu/drm/drm_privacy_screen_x86.c +++ b/drivers/gpu/drm/drm_privacy_screen_x86.c @@ -33,6 +33,9 @@ static bool __init detect_thinkpad_privacy_screen(void) unsigned long long output; acpi_status status; + if (acpi_disabled) + return false; + /* Get embedded-controller handle */ status = acpi_get_devices("PNP0C09", acpi_set_handle, NULL, &ec_handle); if (ACPI_FAILURE(status) || !ec_handle) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index b03c20c14ca1..a17313282e8b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -469,8 +469,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, return -EINVAL; } - if (args->stream_size > SZ_64K || args->nr_relocs > SZ_64K || - args->nr_bos > SZ_64K || args->nr_pmrs > 128) { + if (args->stream_size > SZ_128K || args->nr_relocs > SZ_128K || + args->nr_bos > SZ_128K || args->nr_pmrs > 128) { DRM_ERROR("submit arguments out of size limits\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 4b4829eb16c2..0dd107dcecc2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -311,6 +311,7 @@ struct drm_i915_gem_object { #define I915_BO_READONLY BIT(6) #define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */ #define I915_BO_PROTECTED BIT(8) +#define I915_BO_WAS_BOUND_BIT 9 /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 9f429ed6e78a..a50f884973bc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -10,6 +10,8 @@ #include "i915_gem_lmem.h" #include "i915_gem_mman.h" +#include "gt/intel_gt.h" + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, unsigned int sg_page_sizes) @@ -221,6 +223,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) __i915_gem_object_reset_page_iter(obj); obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; + + with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) + intel_gt_invalidate_tlbs(to_gt(i915)); + } + return pages; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f98f0fb21efb..35d0fcd3a86c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -29,6 +29,8 @@ void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { spin_lock_init(>->irq_lock); + mutex_init(>->tlb_invalidate_lock); + INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -912,3 +914,109 @@ void intel_gt_info_print(const struct intel_gt_info *info, intel_sseu_dump(&info->sseu, p); } + +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { }; + + if (drm_WARN_ON_ONCE(&engine->i915->drm, + class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (gen8 && class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + else + rb.bit = engine->instance; + + rb.bit = BIT(rb.bit); + + return rb; +} + +void intel_gt_invalidate_tlbs(struct intel_gt *gt) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + static const i915_reg_t gen12_regs[] = { + [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + }; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const i915_reg_t *regs; + unsigned int num = 0; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (GRAPHICS_VER(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); + } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { + regs = gen8_regs; + num = ARRAY_SIZE(gen8_regs); + } else if (GRAPHICS_VER(i915) < 8) { + return; + } + + if (drm_WARN_ONCE(&i915->drm, !num, + "Platform does not implement TLB invalidation!")) + return; + + GEM_TRACE("\n"); + + assert_rpm_wakelock_held(&i915->runtime_pm); + + mutex_lock(>->tlb_invalidate_lock); + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + for_each_engine(engine, gt, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + if (__intel_wait_for_register_fw(uncore, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + drm_err_ratelimited(>->i915->drm, + "%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + /* + * Use delayed put since a) we mostly expect a flurry of TLB + * invalidations so it is good to avoid paying the forcewake cost and + * b) it works around a bug in Icelake which cannot cope with too rapid + * transitions. + */ + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); + mutex_unlock(>->tlb_invalidate_lock); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 3ace129eb2af..a913fb6ffec3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -91,4 +91,6 @@ void intel_gt_info_print(const struct intel_gt_info *info, void intel_gt_watchdog_work(struct work_struct *work); +void intel_gt_invalidate_tlbs(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 14216cc471b1..f20687796490 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -73,6 +73,8 @@ struct intel_gt { struct intel_uc uc; + struct mutex tlb_invalidate_lock; + struct i915_wa_list wa_list; struct intel_gt_timelines { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 971d601fe751..c32420cb8ed5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2721,6 +2721,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28) #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24) +#define GEN8_RTCR _MMIO(0x4260) +#define GEN8_M1TCR _MMIO(0x4264) +#define GEN8_M2TCR _MMIO(0x4268) +#define GEN8_BTCR _MMIO(0x426c) +#define GEN8_VTCR _MMIO(0x4270) + #if 0 #define PRB0_TAIL _MMIO(0x2030) #define PRB0_HEAD _MMIO(0x2034) @@ -2819,6 +2825,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) +#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) +#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) +#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) +#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) + #define GEN12_AUX_ERR_DBG _MMIO(0x43f4) #define FPGA_DBG _MMIO(0x42300) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 29a858c53bdd..c0d6d5526abe 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -457,6 +457,9 @@ int i915_vma_bind(struct i915_vma *vma, vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } + if (vma->obj) + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + atomic_or(bind_flags, &vma->flags); return 0; } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index fc25ebf1a593..778da3179b3c 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -724,7 +724,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore, } static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, - enum forcewake_domains fw_domains) + enum forcewake_domains fw_domains, + bool delayed) { struct intel_uncore_forcewake_domain *domain; unsigned int tmp; @@ -739,7 +740,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, continue; } - fw_domains_put(uncore, domain->mask); + if (delayed && + !(domain->uncore->fw_domains_timer & domain->mask)) + fw_domain_arm_timer(domain); + else + fw_domains_put(uncore, domain->mask); } } @@ -760,7 +765,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore, return; spin_lock_irqsave(&uncore->lock, irqflags); - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); + spin_unlock_irqrestore(&uncore->lock, irqflags); +} + +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains fw_domains) +{ + unsigned long irqflags; + + if (!uncore->fw_get_funcs) + return; + + spin_lock_irqsave(&uncore->lock, irqflags); + __intel_uncore_forcewake_put(uncore, fw_domains, true); spin_unlock_irqrestore(&uncore->lock, irqflags); } @@ -802,7 +820,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore, if (!uncore->fw_get_funcs) return; - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); } void assert_forcewakes_inactive(struct intel_uncore *uncore) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 210fe2a71612..2a15b2b2e2fc 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -246,6 +246,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore, enum forcewake_domains domains); void intel_uncore_forcewake_put(struct intel_uncore *uncore, enum forcewake_domains domains); +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains domains); void intel_uncore_forcewake_flush(struct intel_uncore *uncore, enum forcewake_domains fw_domains); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 51b83776951b..17cfad6424db 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1560,6 +1560,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) for (i = 0; i < gpu->nr_rings; i++) a6xx_gpu->shadow[i] = 0; + gpu->suspend_count++; + return 0; } diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 93005839b5da..fb261930ad1c 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -608,9 +608,27 @@ static int adreno_resume(struct device *dev) return gpu->funcs->pm_resume(gpu); } +static int active_submits(struct msm_gpu *gpu) +{ + int active_submits; + mutex_lock(&gpu->active_lock); + active_submits = gpu->active_submits; + mutex_unlock(&gpu->active_lock); + return active_submits; +} + static int adreno_suspend(struct device *dev) { struct msm_gpu *gpu = dev_to_gpu(dev); + int remaining; + + remaining = wait_event_timeout(gpu->retire_event, + active_submits(gpu) == 0, + msecs_to_jiffies(1000)); + if (remaining == 0) { + dev_err(dev, "Timeout waiting for GPU to suspend\n"); + return -EBUSY; + } return gpu->funcs->pm_suspend(gpu); } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c index a98e964c3b6f..355894a3b48c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c @@ -26,9 +26,16 @@ static void dpu_setup_dspp_pcc(struct dpu_hw_dspp *ctx, struct dpu_hw_pcc_cfg *cfg) { - u32 base = ctx->cap->sblk->pcc.base; + u32 base; - if (!ctx || !base) { + if (!ctx) { + DRM_ERROR("invalid ctx %pK\n", ctx); + return; + } + + base = ctx->cap->sblk->pcc.base; + + if (!base) { DRM_ERROR("invalid ctx %pK pcc base 0x%x\n", ctx, base); return; } diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c index 052548883d27..0fe02529b5e7 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -40,7 +40,12 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi) of_node_put(phy_node); - if (!phy_pdev || !msm_dsi->phy) { + if (!phy_pdev) { + DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__); + return -EPROBE_DEFER; + } + if (!msm_dsi->phy) { + put_device(&phy_pdev->dev); DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__); return -EPROBE_DEFER; } diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index c2ed177717c7..2027b38617ab 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -808,12 +808,14 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy, struct msm_dsi_phy_clk_request *clk_req, struct msm_dsi_phy_shared_timings *shared_timings) { - struct device *dev = &phy->pdev->dev; + struct device *dev; int ret; if (!phy || !phy->cfg->ops.enable) return -EINVAL; + dev = &phy->pdev->dev; + ret = dsi_phy_enable_resource(phy); if (ret) { DRM_DEV_ERROR(dev, "%s: resource enable failed, %d\n", diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 3acdeae25caf..719720709e9e 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -97,10 +97,15 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi) of_node_put(phy_node); - if (!phy_pdev || !hdmi->phy) { + if (!phy_pdev) { DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); return -EPROBE_DEFER; } + if (!hdmi->phy) { + DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); + put_device(&phy_pdev->dev); + return -EPROBE_DEFER; + } hdmi->phy_dev = get_device(&phy_pdev->dev); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index ad35a5d94053..555666e3f960 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -461,7 +461,7 @@ static int msm_init_vram(struct drm_device *dev) of_node_put(node); if (ret) return ret; - size = r.end - r.start; + size = r.end - r.start + 1; DRM_INFO("using VRAM carveout: %lx@%pa\n", size, &r.start); /* if we have no IOMMU, then we need to use carveout allocator. @@ -510,7 +510,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) struct msm_drm_private *priv = dev_get_drvdata(dev); struct drm_device *ddev; struct msm_kms *kms; - struct msm_mdss *mdss; int ret, i; ddev = drm_dev_alloc(drv, dev); @@ -521,8 +520,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) ddev->dev_private = priv; priv->dev = ddev; - mdss = priv->mdss; - priv->wq = alloc_ordered_workqueue("msm", 0); priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 0f78c2615272..2c1049c0ea14 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -703,6 +703,8 @@ static void retire_submits(struct msm_gpu *gpu) } } } + + wake_up_all(&gpu->retire_event); } static void retire_worker(struct kthread_work *work) @@ -848,6 +850,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, INIT_LIST_HEAD(&gpu->active_list); mutex_init(&gpu->active_lock); mutex_init(&gpu->lock); + init_waitqueue_head(&gpu->retire_event); kthread_init_work(&gpu->retire_work, retire_worker); kthread_init_work(&gpu->recover_work, recover_worker); kthread_init_work(&gpu->fault_work, fault_worker); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 445c6bfd4b6b..92aa1e9196c6 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -230,6 +230,9 @@ struct msm_gpu { /* work for handling GPU recovery: */ struct kthread_work recover_work; + /** retire_event: notified when submits are retired: */ + wait_queue_head_t retire_event; + /* work for handling active-list retiring: */ struct kthread_work retire_work; diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 62405e980925..9bf319be11f6 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -133,6 +133,18 @@ void msm_devfreq_init(struct msm_gpu *gpu) CLOCK_MONOTONIC, HRTIMER_MODE_REL); } +static void cancel_idle_work(struct msm_gpu_devfreq *df) +{ + hrtimer_cancel(&df->idle_work.timer); + kthread_cancel_work_sync(&df->idle_work.work); +} + +static void cancel_boost_work(struct msm_gpu_devfreq *df) +{ + hrtimer_cancel(&df->boost_work.timer); + kthread_cancel_work_sync(&df->boost_work.work); +} + void msm_devfreq_cleanup(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; @@ -152,7 +164,12 @@ void msm_devfreq_resume(struct msm_gpu *gpu) void msm_devfreq_suspend(struct msm_gpu *gpu) { - devfreq_suspend_device(gpu->devfreq.devfreq); + struct msm_gpu_devfreq *df = &gpu->devfreq; + + devfreq_suspend_device(df->devfreq); + + cancel_idle_work(df); + cancel_boost_work(df); } static void msm_devfreq_boost_work(struct kthread_work *work) @@ -196,7 +213,7 @@ void msm_devfreq_active(struct msm_gpu *gpu) /* * Cancel any pending transition to idle frequency: */ - hrtimer_cancel(&df->idle_work.timer); + cancel_idle_work(df); idle_time = ktime_to_ms(ktime_sub(ktime_get(), df->idle_time)); diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index a229da58962a..9300d3354c51 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -1262,7 +1262,6 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { struct vc4_dsi *dsi = host_to_dsi(host); - int ret; dsi->lanes = device->lanes; dsi->channel = device->channel; @@ -1297,18 +1296,15 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host, return 0; } - ret = component_add(&dsi->pdev->dev, &vc4_dsi_ops); - if (ret) { - mipi_dsi_host_unregister(&dsi->dsi_host); - return ret; - } - - return 0; + return component_add(&dsi->pdev->dev, &vc4_dsi_ops); } static int vc4_dsi_host_detach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { + struct vc4_dsi *dsi = host_to_dsi(host); + + component_del(&dsi->pdev->dev, &vc4_dsi_ops); return 0; } @@ -1686,9 +1682,7 @@ static int vc4_dsi_dev_remove(struct platform_device *pdev) struct device *dev = &pdev->dev; struct vc4_dsi *dsi = dev_get_drvdata(dev); - component_del(&pdev->dev, &vc4_dsi_ops); mipi_dsi_host_unregister(&dsi->dsi_host); - return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index d6b66636a19b..ea3ecdda561d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1140,15 +1140,14 @@ extern int vmw_execbuf_fence_commands(struct drm_file *file_priv, struct vmw_private *dev_priv, struct vmw_fence_obj **p_fence, uint32_t *p_handle); -extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, +extern int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, struct vmw_fpriv *vmw_fp, int ret, struct drm_vmw_fence_rep __user *user_fence_rep, struct vmw_fence_obj *fence, uint32_t fence_handle, - int32_t out_fence_fd, - struct sync_file *sync_file); + int32_t out_fence_fd); bool vmw_cmd_describe(const void *buf, u32 *size, char const **cmd); /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 44ca23b0ea4e..dd2ff441068e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3879,17 +3879,17 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv, * Also if copying fails, user-space will be unable to signal the fence object * so we wait for it immediately, and then unreference the user-space reference. */ -void +int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, struct vmw_fpriv *vmw_fp, int ret, struct drm_vmw_fence_rep __user *user_fence_rep, struct vmw_fence_obj *fence, uint32_t fence_handle, - int32_t out_fence_fd, struct sync_file *sync_file) + int32_t out_fence_fd) { struct drm_vmw_fence_rep fence_rep; if (user_fence_rep == NULL) - return; + return 0; memset(&fence_rep, 0, sizeof(fence_rep)); @@ -3917,19 +3917,13 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, * handle. */ if (unlikely(ret != 0) && (fence_rep.error == 0)) { - if (sync_file) - fput(sync_file->file); - - if (fence_rep.fd != -1) { - put_unused_fd(fence_rep.fd); - fence_rep.fd = -1; - } - ttm_ref_object_base_unref(vmw_fp->tfile, fence_handle); VMW_DEBUG_USER("Fence copy error. Syncing.\n"); (void) vmw_fence_obj_wait(fence, false, false, VMW_FENCE_WAIT_TIMEOUT); } + + return ret ? -EFAULT : 0; } /** @@ -4266,16 +4260,23 @@ int vmw_execbuf_process(struct drm_file *file_priv, (void) vmw_fence_obj_wait(fence, false, false, VMW_FENCE_WAIT_TIMEOUT); + } + } + + ret = vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, + user_fence_rep, fence, handle, out_fence_fd); + + if (sync_file) { + if (ret) { + /* usercopy of fence failed, put the file object */ + fput(sync_file->file); + put_unused_fd(out_fence_fd); } else { /* Link the fence with the FD created earlier */ fd_install(out_fence_fd, sync_file->file); } } - vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, - user_fence_rep, fence, handle, out_fence_fd, - sync_file); - /* Don't unreference when handing fence out */ if (unlikely(out_fence != NULL)) { *out_fence = fence; @@ -4293,7 +4294,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, */ vmw_validation_unref_lists(&val_ctx); - return 0; + return ret; out_unlock_binding: mutex_unlock(&dev_priv->binding_mutex); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index c60d395f9e2e..5001b87aebe8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -1128,7 +1128,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, } vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence, - handle, -1, NULL); + handle, -1); vmw_fence_obj_unreference(&fence); return 0; out_no_create: diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 4e693e8de2c3..bbd2f4ec08ec 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2501,7 +2501,7 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, if (file_priv) vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, user_fence_rep, fence, - handle, -1, NULL); + handle, -1); if (out_fence) *out_fence = fence; else diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index ca873a3b98db..f2d05bff4245 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1660,6 +1660,13 @@ static int balloon_connect_vsp(struct hv_device *dev) unsigned long t; int ret; + /* + * max_pkt_size should be large enough for one vmbus packet header plus + * our receive buffer size. Hyper-V sends messages up to + * HV_HYP_PAGE_SIZE bytes long on balloon channel. + */ + dev->channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2; + ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0, balloon_onchannelcallback, dev); if (ret) diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index d519aca4a9d6..fb6d14d213a1 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -662,6 +662,9 @@ static int adt7470_fan_write(struct device *dev, u32 attr, int channel, long val struct adt7470_data *data = dev_get_drvdata(dev); int err; + if (val <= 0) + return -EINVAL; + val = FAN_RPM_TO_PERIOD(val); val = clamp_val(val, 1, 65534); diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 74019dff2550..1c9493c70813 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -373,7 +373,7 @@ static const struct lm90_params lm90_params[] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, - .max_convrate = 8, + .max_convrate = 7, }, [lm86] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT @@ -394,12 +394,13 @@ static const struct lm90_params lm90_params[] = { .max_convrate = 9, }, [max6646] = { - .flags = LM90_HAVE_CRIT, + .flags = LM90_HAVE_CRIT | LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6654] = { + .flags = LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 7, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, @@ -418,7 +419,7 @@ static const struct lm90_params lm90_params[] = { }, [max6680] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT - | LM90_HAVE_CRIT_ALRM_SWP, + | LM90_HAVE_CRIT_ALRM_SWP | LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 7, }, @@ -848,7 +849,7 @@ static int lm90_update_device(struct device *dev) * Re-enable ALERT# output if it was originally enabled and * relevant alarms are all clear */ - if (!(data->config_orig & 0x80) && + if ((client->irq || !(data->config_orig & 0x80)) && !(data->alarms & data->alert_alarms)) { if (data->config & 0x80) { dev_dbg(&client->dev, "Re-enabling ALERT#\n"); @@ -1807,22 +1808,22 @@ static bool lm90_is_tripped(struct i2c_client *client, u16 *status) if (st & LM90_STATUS_LLOW) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_min, 0); + hwmon_temp_min_alarm, 0); if (st & LM90_STATUS_RLOW) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_min, 1); + hwmon_temp_min_alarm, 1); if (st2 & MAX6696_STATUS2_R2LOW) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_min, 2); + hwmon_temp_min_alarm, 2); if (st & LM90_STATUS_LHIGH) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_max, 0); + hwmon_temp_max_alarm, 0); if (st & LM90_STATUS_RHIGH) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_max, 1); + hwmon_temp_max_alarm, 1); if (st2 & MAX6696_STATUS2_R2HIGH) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_max, 2); + hwmon_temp_max_alarm, 2); return true; } diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index fd3f91cb01c6..098d12b9ecda 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -1175,7 +1175,7 @@ static inline u8 in_to_reg(u32 val, u8 nr) struct nct6775_data { int addr; /* IO base of hw monitor block */ - int sioreg; /* SIO register address */ + struct nct6775_sio_data *sio_data; enum kinds kind; const char *name; @@ -3559,7 +3559,7 @@ clear_caseopen(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nct6775_data *data = dev_get_drvdata(dev); - struct nct6775_sio_data *sio_data = dev_get_platdata(dev); + struct nct6775_sio_data *sio_data = data->sio_data; int nr = to_sensor_dev_attr(attr)->index - INTRUSION_ALARM_BASE; unsigned long val; u8 reg; @@ -3967,7 +3967,7 @@ static int nct6775_probe(struct platform_device *pdev) return -ENOMEM; data->kind = sio_data->kind; - data->sioreg = sio_data->sioreg; + data->sio_data = sio_data; if (sio_data->access == access_direct) { data->addr = res->start; diff --git a/drivers/hwmon/pmbus/ir38064.c b/drivers/hwmon/pmbus/ir38064.c index 0ea7e1c18bdc..09276e397194 100644 --- a/drivers/hwmon/pmbus/ir38064.c +++ b/drivers/hwmon/pmbus/ir38064.c @@ -62,7 +62,7 @@ static const struct i2c_device_id ir38064_id[] = { MODULE_DEVICE_TABLE(i2c, ir38064_id); -static const struct of_device_id ir38064_of_match[] = { +static const struct of_device_id __maybe_unused ir38064_of_match[] = { { .compatible = "infineon,ir38060" }, { .compatible = "infineon,ir38064" }, { .compatible = "infineon,ir38164" }, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c0ae8087c602..dcbd6d201619 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -489,7 +489,7 @@ static void start_io_acct(struct dm_io *io) struct mapped_device *md = io->md; struct bio *bio = io->orig_bio; - io->start_time = bio_start_io_acct(bio); + bio_start_io_acct_time(bio, io->start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), @@ -535,7 +535,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) io->md = md; spin_lock_init(&io->endio_lock); - start_io_acct(io); + io->start_time = jiffies; return io; } @@ -1442,9 +1442,6 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md, ci->sector = bio->bi_iter.bi_sector; } -#define __dm_part_stat_sub(part, field, subnd) \ - (part_stat_get(part, field) -= (subnd)) - /* * Entry point to split a bio into clones and submit them to the targets. */ @@ -1480,23 +1477,12 @@ static void __split_and_process_bio(struct mapped_device *md, GFP_NOIO, &md->queue->bio_split); ci.io->orig_bio = b; - /* - * Adjust IO stats for each split, otherwise upon queue - * reentry there will be redundant IO accounting. - * NOTE: this is a stop-gap fix, a proper fix involves - * significant refactoring of DM core's bio splitting - * (by eliminating DM's splitting and just using bio_split) - */ - part_stat_lock(); - __dm_part_stat_sub(dm_disk(md)->part0, - sectors[op_stat_group(bio_op(bio))], ci.sector_count); - part_stat_unlock(); - bio_chain(b, bio); trace_block_split(b, bio->bi_iter.bi_sector); submit_bio_noacct(bio); } } + start_io_acct(ci.io); /* drop the extra reference count */ dm_io_dec_pending(ci.io, errno_to_blk_status(error)); diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c index c3305bdda69c..bee727ed98db 100644 --- a/drivers/misc/eeprom/at25.c +++ b/drivers/misc/eeprom/at25.c @@ -440,6 +440,10 @@ static int at25_probe(struct spi_device *spi) return -ENXIO; } + at25 = devm_kzalloc(&spi->dev, sizeof(*at25), GFP_KERNEL); + if (!at25) + return -ENOMEM; + mutex_init(&at25->lock); at25->spi = spi; spi_set_drvdata(spi, at25); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ec498ce70f35..238b56d77c36 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4133,9 +4133,7 @@ static int bond_eth_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cm fallthrough; case SIOCGHWTSTAMP: - rcu_read_lock(); real_dev = bond_option_active_slave_get_rcu(bond); - rcu_read_unlock(); if (!real_dev) return -EOPNOTSUPP; @@ -5382,9 +5380,7 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev, struct net_device *real_dev; struct phy_device *phydev; - rcu_read_lock(); real_dev = bond_option_active_slave_get_rcu(bond); - rcu_read_unlock(); if (real_dev) { ops = real_dev->ethtool_ops; phydev = real_dev->phydev; diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c index 0bff1884d5cc..74d7fcbfd065 100644 --- a/drivers/net/can/flexcan/flexcan-core.c +++ b/drivers/net/can/flexcan/flexcan-core.c @@ -296,6 +296,7 @@ static_assert(sizeof(struct flexcan_regs) == 0x4 * 18 + 0xfb8); static const struct flexcan_devtype_data fsl_mcf5441x_devtype_data = { .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_NR_IRQ_3 | FLEXCAN_QUIRK_NR_MB_16 | + FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX | FLEXCAN_QUIRK_SUPPPORT_RX_FIFO, }; diff --git a/drivers/net/can/flexcan/flexcan.h b/drivers/net/can/flexcan/flexcan.h index fccdff8b1f0f..23fc09a7e10f 100644 --- a/drivers/net/can/flexcan/flexcan.h +++ b/drivers/net/can/flexcan/flexcan.h @@ -21,7 +21,7 @@ * Below is some version info we got: * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode MB * Filter? connected? Passive detection ption in MB Supported? - * MCF5441X FlexCAN2 ? no yes no no yes no 16 + * MCF5441X FlexCAN2 ? no yes no no no no 16 * MX25 FlexCAN2 03.00.00.00 no no no no no no 64 * MX28 FlexCAN2 03.00.04.00 yes yes no no no no 64 * MX35 FlexCAN2 03.00.00.00 no no no no no no 64 diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 5b47cd867783..1a4b56f6fa8c 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -336,6 +336,9 @@ m_can_fifo_read(struct m_can_classdev *cdev, u32 addr_offset = cdev->mcfg[MRAM_RXF0].off + fgi * RXF0_ELEMENT_SIZE + offset; + if (val_count == 0) + return 0; + return cdev->ops->read_fifo(cdev, addr_offset, val, val_count); } @@ -346,6 +349,9 @@ m_can_fifo_write(struct m_can_classdev *cdev, u32 addr_offset = cdev->mcfg[MRAM_TXB].off + fpi * TXB_ELEMENT_SIZE + offset; + if (val_count == 0) + return 0; + return cdev->ops->write_fifo(cdev, addr_offset, val, val_count); } diff --git a/drivers/net/can/m_can/tcan4x5x-regmap.c b/drivers/net/can/m_can/tcan4x5x-regmap.c index ca80dbaf7a3f..26e212b8ca7a 100644 --- a/drivers/net/can/m_can/tcan4x5x-regmap.c +++ b/drivers/net/can/m_can/tcan4x5x-regmap.c @@ -12,7 +12,7 @@ #define TCAN4X5X_SPI_INSTRUCTION_WRITE (0x61 << 24) #define TCAN4X5X_SPI_INSTRUCTION_READ (0x41 << 24) -#define TCAN4X5X_MAX_REGISTER 0x8ffc +#define TCAN4X5X_MAX_REGISTER 0x87fc static int tcan4x5x_regmap_gather_write(void *context, const void *reg, size_t reg_len, diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 481f1df3106c..8aec5d9fbfef 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -2278,6 +2278,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) struct net_device *dev; struct typhoon *tp; int card_id = (int) ent->driver_data; + u8 addr[ETH_ALEN] __aligned(4); void __iomem *ioaddr; void *shared; dma_addr_t shared_dma; @@ -2409,8 +2410,9 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto error_out_reset; } - *(__be16 *)&dev->dev_addr[0] = htons(le16_to_cpu(xp_resp[0].parm1)); - *(__be32 *)&dev->dev_addr[2] = htonl(le32_to_cpu(xp_resp[0].parm2)); + *(__be16 *)&addr[0] = htons(le16_to_cpu(xp_resp[0].parm1)); + *(__be32 *)&addr[2] = htonl(le32_to_cpu(xp_resp[0].parm2)); + eth_hw_addr_set(dev, addr); if (!is_valid_ether_addr(dev->dev_addr)) { err_msg = "Could not obtain valid ethernet address, aborting"; diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c index bd22a534b1c0..e7b879123bb1 100644 --- a/drivers/net/ethernet/8390/etherh.c +++ b/drivers/net/ethernet/8390/etherh.c @@ -655,6 +655,7 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id) struct ei_device *ei_local; struct net_device *dev; struct etherh_priv *eh; + u8 addr[ETH_ALEN]; int ret; ret = ecard_request_resources(ec); @@ -724,12 +725,13 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id) spin_lock_init(&ei_local->page_lock); if (ec->cid.product == PROD_ANT_ETHERM) { - etherm_addr(dev->dev_addr); + etherm_addr(addr); ei_local->reg_offset = etherm_regoffsets; } else { - etherh_addr(dev->dev_addr, ec); + etherh_addr(addr, ec); ei_local->reg_offset = etherh_regoffsets; } + eth_hw_addr_set(dev, addr); ei_local->name = dev->name; ei_local->word16 = 1; diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 493b0cefcc2a..ec8df05e7bf6 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -1032,6 +1032,7 @@ static int dec_lance_probe(struct device *bdev, const int type) int i, ret; unsigned long esar_base; unsigned char *esar; + u8 addr[ETH_ALEN]; const char *desc; if (dec_lance_debug && version_printed++ == 0) @@ -1228,7 +1229,8 @@ static int dec_lance_probe(struct device *bdev, const int type) break; } for (i = 0; i < 6; i++) - dev->dev_addr[i] = esar[i * 4]; + addr[i] = esar[i * 4]; + eth_hw_addr_set(dev, addr); printk("%s: %s, addr = %pM, irq = %d\n", name, desc, dev->dev_addr, dev->irq); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index 1bc4d33a0ce5..30a573db02bb 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -826,7 +826,6 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic) struct aq_hw_s *aq_hw = aq_nic->aq_hw; int hweight = 0; int err = 0; - int i; if (unlikely(!aq_hw_ops->hw_filter_vlan_set)) return -EOPNOTSUPP; @@ -837,8 +836,7 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic) aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans); if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { - for (i = 0; i < BITS_TO_LONGS(VLAN_N_VID); i++) - hweight += hweight_long(aq_nic->active_vlans[i]); + hweight = bitmap_weight(aq_nic->active_vlans, VLAN_N_VID); err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false); if (err) @@ -871,7 +869,7 @@ int aq_filters_vlan_offload_off(struct aq_nic_s *aq_nic) struct aq_hw_s *aq_hw = aq_nic->aq_hw; int err = 0; - memset(aq_nic->active_vlans, 0, sizeof(aq_nic->active_vlans)); + bitmap_zero(aq_nic->active_vlans, VLAN_N_VID); aq_fvlan_rebuild(aq_nic, aq_nic->active_vlans, aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans); diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index f38f40eb966e..a1a38456c9a3 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -2183,9 +2183,7 @@ static int sbmac_init(struct platform_device *pldev, long long base) ea_reg >>= 8; } - for (i = 0; i < 6; i++) { - dev->dev_addr[i] = eaddr[i]; - } + eth_hw_addr_set(dev, eaddr); /* * Initialize context (get pointers to registers and stuff), then diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index bbbde9f701c2..be0bd4b44926 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -99,13 +99,13 @@ static void mpc52xx_fec_tx_timeout(struct net_device *dev, unsigned int txqueue) netif_wake_queue(dev); } -static void mpc52xx_fec_set_paddr(struct net_device *dev, u8 *mac) +static void mpc52xx_fec_set_paddr(struct net_device *dev, const u8 *mac) { struct mpc52xx_fec_priv *priv = netdev_priv(dev); struct mpc52xx_fec __iomem *fec = priv->fec; - out_be32(&fec->paddr1, *(u32 *)(&mac[0])); - out_be32(&fec->paddr2, (*(u16 *)(&mac[4]) << 16) | FEC_PADDR2_TYPE); + out_be32(&fec->paddr1, *(const u32 *)(&mac[0])); + out_be32(&fec->paddr2, (*(const u16 *)(&mac[4]) << 16) | FEC_PADDR2_TYPE); } static int mpc52xx_fec_set_mac_address(struct net_device *dev, void *addr) @@ -893,13 +893,15 @@ static int mpc52xx_fec_probe(struct platform_device *op) rv = of_get_ethdev_address(np, ndev); if (rv) { struct mpc52xx_fec __iomem *fec = priv->fec; + u8 addr[ETH_ALEN] __aligned(4); /* * If the MAC addresse is not provided via DT then read * it back from the controller regs */ - *(u32 *)(&ndev->dev_addr[0]) = in_be32(&fec->paddr1); - *(u16 *)(&ndev->dev_addr[4]) = in_be32(&fec->paddr2) >> 16; + *(u32 *)(&addr[0]) = in_be32(&fec->paddr1); + *(u16 *)(&addr[4]) = in_be32(&fec->paddr2) >> 16; + eth_hw_addr_set(ndev, addr); } /* diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 5f5d4f7aa813..160735484465 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -843,7 +843,7 @@ static inline bool gve_is_gqi(struct gve_priv *priv) /* buffers */ int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, - enum dma_data_direction); + enum dma_data_direction, gfp_t gfp_flags); void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, enum dma_data_direction); /* tx handling */ diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index f7f65c4bf993..54e51c8221b8 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -766,9 +766,9 @@ static void gve_free_rings(struct gve_priv *priv) int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, - enum dma_data_direction dir) + enum dma_data_direction dir, gfp_t gfp_flags) { - *page = alloc_page(GFP_KERNEL); + *page = alloc_page(gfp_flags); if (!*page) { priv->page_alloc_fail++; return -ENOMEM; @@ -811,7 +811,7 @@ static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, for (i = 0; i < pages; i++) { err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], &qpl->page_buses[i], - gve_qpl_dma_dir(priv, id)); + gve_qpl_dma_dir(priv, id), GFP_KERNEL); /* caller handles clean up */ if (err) return -ENOMEM; diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 9ddcc497f48e..2068199445bd 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -86,7 +86,8 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, dma_addr_t dma; int err; - err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE); + err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE, + GFP_ATOMIC); if (err) return err; diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index beb8bb079023..8c939628e2d8 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -157,7 +157,7 @@ static int gve_alloc_page_dqo(struct gve_priv *priv, int err; err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page, - &buf_state->addr, DMA_FROM_DEVICE); + &buf_state->addr, DMA_FROM_DEVICE, GFP_KERNEL); if (err) return err; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 7df87610ad96..21442a9bb996 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2043,8 +2043,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) break; } - if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) - hclgevf_enable_vector(&hdev->misc_vector, true); + hclgevf_enable_vector(&hdev->misc_vector, true); return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c index c612ef526d16..3e7d7c4bafdc 100644 --- a/drivers/net/ethernet/i825xx/ether1.c +++ b/drivers/net/ethernet/i825xx/ether1.c @@ -986,6 +986,7 @@ static int ether1_probe(struct expansion_card *ec, const struct ecard_id *id) { struct net_device *dev; + u8 addr[ETH_ALEN]; int i, ret = 0; ether1_banner(); @@ -1015,7 +1016,8 @@ ether1_probe(struct expansion_card *ec, const struct ecard_id *id) } for (i = 0; i < 6; i++) - dev->dev_addr[i] = readb(IDPROM_ADDRESS + (i << 2)); + addr[i] = readb(IDPROM_ADDRESS + (i << 2)); + eth_hw_addr_set(dev, addr); if (ether1_init_2(dev)) { ret = -ENODEV; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 59536bd5cab1..bda7a2a9d211 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2602,6 +2602,7 @@ static void __ibmvnic_reset(struct work_struct *work) struct ibmvnic_rwi *rwi; unsigned long flags; u32 reset_state; + int num_fails = 0; int rc = 0; adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); @@ -2655,11 +2656,23 @@ static void __ibmvnic_reset(struct work_struct *work) rc = do_hard_reset(adapter, rwi, reset_state); rtnl_unlock(); } - if (rc) { - /* give backing device time to settle down */ + if (rc) + num_fails++; + else + num_fails = 0; + + /* If auto-priority-failover is enabled we can get + * back to back failovers during resets, resulting + * in at least two failed resets (from high-priority + * backing device to low-priority one and then back) + * If resets continue to fail beyond that, give the + * adapter some time to settle down before retrying. + */ + if (num_fails >= 3) { netdev_dbg(adapter->netdev, - "[S:%s] Hard reset failed, waiting 60 secs\n", - adapter_state_to_string(adapter->state)); + "[S:%s] Hard reset failed %d times, waiting 60 secs\n", + adapter_state_to_string(adapter->state), + num_fails); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(60 * HZ); } @@ -3844,11 +3857,25 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) struct device *dev = &adapter->vdev->dev; union ibmvnic_crq crq; int max_entries; + int cap_reqs; + + /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on + * the PROMISC flag). Initialize this count upfront. When the tasklet + * receives a response to all of these, it will send the next protocol + * message (QUERY_IP_OFFLOAD). + */ + if (!(adapter->netdev->flags & IFF_PROMISC) || + adapter->promisc_supported) + cap_reqs = 7; + else + cap_reqs = 6; if (!retry) { /* Sub-CRQ entries are 32 byte long */ int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4); + atomic_set(&adapter->running_cap_crqs, cap_reqs); + if (adapter->min_tx_entries_per_subcrq > entries_page || adapter->min_rx_add_entries_per_subcrq > entries_page) { dev_err(dev, "Fatal, invalid entries per sub-crq\n"); @@ -3909,44 +3936,45 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; + } else { + atomic_add(cap_reqs, &adapter->running_cap_crqs); } - memset(&crq, 0, sizeof(crq)); crq.request_capability.first = IBMVNIC_CRQ_CMD; crq.request_capability.cmd = REQUEST_CAPABILITY; crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_tx_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_MTU); crq.request_capability.number = cpu_to_be64(adapter->req_mtu); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); if (adapter->netdev->flags & IFF_PROMISC) { @@ -3954,16 +3982,21 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(1); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } } else { crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(0); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); } static int pending_scrq(struct ibmvnic_adapter *adapter, @@ -4357,118 +4390,132 @@ static void send_query_map(struct ibmvnic_adapter *adapter) static void send_query_cap(struct ibmvnic_adapter *adapter) { union ibmvnic_crq crq; + int cap_reqs; + + /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count + * upfront. When the tasklet receives a response to all of these, it + * can send out the next protocol messaage (REQUEST_CAPABILITY). + */ + cap_reqs = 25; + + atomic_set(&adapter->running_cap_crqs, cap_reqs); - atomic_set(&adapter->running_cap_crqs, 0); memset(&crq, 0, sizeof(crq)); crq.query_capability.first = IBMVNIC_CRQ_CMD; crq.query_capability.cmd = QUERY_CAPABILITY; crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ); - atomic_inc(&adapter->running_cap_crqs); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); } static void send_query_ip_offload(struct ibmvnic_adapter *adapter) @@ -4772,6 +4819,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, char *name; atomic_dec(&adapter->running_cap_crqs); + netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n", + atomic_read(&adapter->running_cap_crqs)); switch (be16_to_cpu(crq->request_capability_rsp.capability)) { case REQ_TX_QUEUES: req_value = &adapter->req_tx_queues; @@ -4835,10 +4884,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, } /* Done receiving requested capabilities, query IP offload support */ - if (atomic_read(&adapter->running_cap_crqs) == 0) { - adapter->wait_capability = false; + if (atomic_read(&adapter->running_cap_crqs) == 0) send_query_ip_offload(adapter); - } } static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, @@ -5136,10 +5183,8 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq, } out: - if (atomic_read(&adapter->running_cap_crqs) == 0) { - adapter->wait_capability = false; + if (atomic_read(&adapter->running_cap_crqs) == 0) send_request_cap(adapter, 0); - } } static int send_query_phys_parms(struct ibmvnic_adapter *adapter) @@ -5435,33 +5480,21 @@ static void ibmvnic_tasklet(struct tasklet_struct *t) struct ibmvnic_crq_queue *queue = &adapter->crq; union ibmvnic_crq *crq; unsigned long flags; - bool done = false; spin_lock_irqsave(&queue->lock, flags); - while (!done) { - /* Pull all the valid messages off the CRQ */ - while ((crq = ibmvnic_next_crq(adapter)) != NULL) { - /* This barrier makes sure ibmvnic_next_crq()'s - * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded - * before ibmvnic_handle_crq()'s - * switch(gen_crq->first) and switch(gen_crq->cmd). - */ - dma_rmb(); - ibmvnic_handle_crq(crq, adapter); - crq->generic.first = 0; - } - /* remain in tasklet until all - * capabilities responses are received + /* Pull all the valid messages off the CRQ */ + while ((crq = ibmvnic_next_crq(adapter)) != NULL) { + /* This barrier makes sure ibmvnic_next_crq()'s + * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded + * before ibmvnic_handle_crq()'s + * switch(gen_crq->first) and switch(gen_crq->cmd). */ - if (!adapter->wait_capability) - done = true; + dma_rmb(); + ibmvnic_handle_crq(crq, adapter); + crq->generic.first = 0; } - /* if capabilities CRQ's were sent in this tasklet, the following - * tasklet must wait until all responses are received - */ - if (atomic_read(&adapter->running_cap_crqs) != 0) - adapter->wait_capability = true; + spin_unlock_irqrestore(&queue->lock, flags); } diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 4a8f36e0ab07..4a7a56ff74ce 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -919,7 +919,6 @@ struct ibmvnic_adapter { int login_rsp_buf_sz; atomic_t running_cap_crqs; - bool wait_capability; struct ibmvnic_sub_crq_queue **tx_scrq ____cacheline_aligned; struct ibmvnic_sub_crq_queue **rx_scrq ____cacheline_aligned; diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 4d939af0a626..2e02cc68cd3f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -174,7 +174,6 @@ enum i40e_interrupt_policy { struct i40e_lump_tracking { u16 num_entries; - u16 search_hint; u16 list[0]; #define I40E_PILE_VALID_BIT 0x8000 #define I40E_IWARP_IRQ_PILE_ID (I40E_PILE_VALID_BIT - 2) @@ -848,12 +847,12 @@ struct i40e_vsi { struct rtnl_link_stats64 net_stats_offsets; struct i40e_eth_stats eth_stats; struct i40e_eth_stats eth_stats_offsets; - u32 tx_restart; - u32 tx_busy; + u64 tx_restart; + u64 tx_busy; u64 tx_linearize; u64 tx_force_wb; - u32 rx_buf_failed; - u32 rx_page_failed; + u64 rx_buf_failed; + u64 rx_page_failed; /* These are containers of ring pointers, allocated at run-time */ struct i40e_ring **rx_rings; diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 2c1b1da1220e..1e57cc8c47d7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -240,7 +240,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) (unsigned long int)vsi->net_stats_offsets.rx_compressed, (unsigned long int)vsi->net_stats_offsets.tx_compressed); dev_info(&pf->pdev->dev, - " tx_restart = %d, tx_busy = %d, rx_buf_failed = %d, rx_page_failed = %d\n", + " tx_restart = %llu, tx_busy = %llu, rx_buf_failed = %llu, rx_page_failed = %llu\n", vsi->tx_restart, vsi->tx_busy, vsi->rx_buf_failed, vsi->rx_page_failed); rcu_read_lock(); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2a3d8aef7f4e..f70c478dafdb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -196,10 +196,6 @@ int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem) * @id: an owner id to stick on the items assigned * * Returns the base item index of the lump, or negative for error - * - * The search_hint trick and lack of advanced fit-finding only work - * because we're highly likely to have all the same size lump requests. - * Linear search time and any fragmentation should be minimal. **/ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, u16 needed, u16 id) @@ -214,8 +210,21 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, return -EINVAL; } - /* start the linear search with an imperfect hint */ - i = pile->search_hint; + /* Allocate last queue in the pile for FDIR VSI queue + * so it doesn't fragment the qp_pile + */ + if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) { + if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) { + dev_err(&pf->pdev->dev, + "Cannot allocate queue %d for I40E_VSI_FDIR\n", + pile->num_entries - 1); + return -ENOMEM; + } + pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT; + return pile->num_entries - 1; + } + + i = 0; while (i < pile->num_entries) { /* skip already allocated entries */ if (pile->list[i] & I40E_PILE_VALID_BIT) { @@ -234,7 +243,6 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, for (j = 0; j < needed; j++) pile->list[i+j] = id | I40E_PILE_VALID_BIT; ret = i; - pile->search_hint = i + j; break; } @@ -257,7 +265,7 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id) { int valid_id = (id | I40E_PILE_VALID_BIT); int count = 0; - int i; + u16 i; if (!pile || index >= pile->num_entries) return -EINVAL; @@ -269,8 +277,6 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id) count++; } - if (count && index < pile->search_hint) - pile->search_hint = index; return count; } @@ -772,9 +778,9 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) struct rtnl_link_stats64 *ns; /* netdev stats */ struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ - u32 tx_restart, tx_busy; + u64 tx_restart, tx_busy; struct i40e_ring *p; - u32 rx_page, rx_buf; + u64 rx_page, rx_buf; u64 bytes, packets; unsigned int start; u64 tx_linearize; @@ -10574,15 +10580,9 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } i40e_get_oem_version(&pf->hw); - if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && - ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) || - hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) { - /* The following delay is necessary for 4.33 firmware and older - * to recover after EMP reset. 200 ms should suffice but we - * put here 300 ms to be sure that FW is ready to operate - * after reset. - */ - mdelay(300); + if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) { + /* The following delay is necessary for firmware update. */ + mdelay(1000); } /* re-verify the eeprom if we just had an EMP reset */ @@ -11792,7 +11792,6 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) return -ENOMEM; pf->irq_pile->num_entries = vectors; - pf->irq_pile->search_hint = 0; /* track first vector for misc interrupts, ignore return */ (void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1); @@ -12595,7 +12594,6 @@ static int i40e_sw_init(struct i40e_pf *pf) goto sw_init_done; } pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp; - pf->qp_pile->search_hint = 0; pf->tx_timeout_recovery_level = 1; diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 8d0588a27a05..1908eed4fa5e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -413,6 +413,9 @@ #define I40E_VFINT_DYN_CTLN(_INTVF) (0x00024800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */ #define I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT 1 #define I40E_VFINT_DYN_CTLN_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT) +#define I40E_VFINT_ICR0_ADMINQ_SHIFT 30 +#define I40E_VFINT_ICR0_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ADMINQ_SHIFT) +#define I40E_VFINT_ICR0_ENA(_VF) (0x0002C000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */ #define I40E_VPINT_AEQCTL(_VF) (0x0002B800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */ #define I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT 0 #define I40E_VPINT_AEQCTL_ITR_INDX_SHIFT 11 diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index b785d09c19f8..dfdb6e786461 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1377,6 +1377,32 @@ static i40e_status i40e_config_vf_promiscuous_mode(struct i40e_vf *vf, } /** + * i40e_sync_vfr_reset + * @hw: pointer to hw struct + * @vf_id: VF identifier + * + * Before trigger hardware reset, we need to know if no other process has + * reserved the hardware for any reset operations. This check is done by + * examining the status of the RSTAT1 register used to signal the reset. + **/ +static int i40e_sync_vfr_reset(struct i40e_hw *hw, int vf_id) +{ + u32 reg; + int i; + + for (i = 0; i < I40E_VFR_WAIT_COUNT; i++) { + reg = rd32(hw, I40E_VFINT_ICR0_ENA(vf_id)) & + I40E_VFINT_ICR0_ADMINQ_MASK; + if (reg) + return 0; + + usleep_range(100, 200); + } + + return -EAGAIN; +} + +/** * i40e_trigger_vf_reset * @vf: pointer to the VF structure * @flr: VFLR was issued or not @@ -1390,9 +1416,11 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr) struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; u32 reg, reg_idx, bit_idx; + bool vf_active; + u32 radq; /* warn the VF */ - clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); + vf_active = test_and_clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); /* Disable VF's configuration API during reset. The flag is re-enabled * in i40e_alloc_vf_res(), when it's safe again to access VF's VSI. @@ -1406,7 +1434,19 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr) * just need to clean up, so don't hit the VFRTRIG register. */ if (!flr) { - /* reset VF using VPGEN_VFRTRIG reg */ + /* Sync VFR reset before trigger next one */ + radq = rd32(hw, I40E_VFINT_ICR0_ENA(vf->vf_id)) & + I40E_VFINT_ICR0_ADMINQ_MASK; + if (vf_active && !radq) + /* waiting for finish reset by virtual driver */ + if (i40e_sync_vfr_reset(hw, vf->vf_id)) + dev_info(&pf->pdev->dev, + "Reset VF %d never finished\n", + vf->vf_id); + + /* Reset VF using VPGEN_VFRTRIG reg. It is also setting + * in progress state in rstat1 register. + */ reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id)); reg |= I40E_VPGEN_VFRTRIG_VFSWR_MASK; wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg); @@ -2618,6 +2658,59 @@ error_param: } /** + * i40e_check_enough_queue - find big enough queue number + * @vf: pointer to the VF info + * @needed: the number of items needed + * + * Returns the base item index of the queue, or negative for error + **/ +static int i40e_check_enough_queue(struct i40e_vf *vf, u16 needed) +{ + unsigned int i, cur_queues, more, pool_size; + struct i40e_lump_tracking *pile; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi; + + vsi = pf->vsi[vf->lan_vsi_idx]; + cur_queues = vsi->alloc_queue_pairs; + + /* if current allocated queues are enough for need */ + if (cur_queues >= needed) + return vsi->base_queue; + + pile = pf->qp_pile; + if (cur_queues > 0) { + /* if the allocated queues are not zero + * just check if there are enough queues for more + * behind the allocated queues. + */ + more = needed - cur_queues; + for (i = vsi->base_queue + cur_queues; + i < pile->num_entries; i++) { + if (pile->list[i] & I40E_PILE_VALID_BIT) + break; + + if (more-- == 1) + /* there is enough */ + return vsi->base_queue; + } + } + + pool_size = 0; + for (i = 0; i < pile->num_entries; i++) { + if (pile->list[i] & I40E_PILE_VALID_BIT) { + pool_size = 0; + continue; + } + if (needed <= ++pool_size) + /* there is enough */ + return i; + } + + return -ENOMEM; +} + +/** * i40e_vc_request_queues_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -2651,6 +2744,12 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) req_pairs - cur_pairs, pf->queues_left); vfres->num_queue_pairs = pf->queues_left + cur_pairs; + } else if (i40e_check_enough_queue(vf, req_pairs) < 0) { + dev_warn(&pf->pdev->dev, + "VF %d requested %d more queues, but there is not enough for it.\n", + vf->vf_id, + req_pairs - cur_pairs); + vfres->num_queue_pairs = cur_pairs; } else { /* successful request */ vf->num_req_queues = req_pairs; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 49575a640a84..03c42fd0fea1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -19,6 +19,7 @@ #define I40E_MAX_VF_PROMISC_FLAGS 3 #define I40E_VF_STATE_WAIT_COUNT 20 +#define I40E_VFR_WAIT_COUNT 100 /* Various queue ctrls */ enum i40e_queue_ctrl { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 186d00a9ab35..3631d612aaca 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1570,6 +1570,8 @@ static struct mac_ops cgx_mac_ops = { .mac_enadis_pause_frm = cgx_lmac_enadis_pause_frm, .mac_pause_frm_config = cgx_lmac_pause_frm_config, .mac_enadis_ptp_config = cgx_lmac_ptp_config, + .mac_rx_tx_enable = cgx_lmac_rx_tx_enable, + .mac_tx_enable = cgx_lmac_tx_enable, }; static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index fc6e7423cbd8..b33e7d1d0851 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -107,6 +107,9 @@ struct mac_ops { void (*mac_enadis_ptp_config)(void *cgxd, int lmac_id, bool enable); + + int (*mac_rx_tx_enable)(void *cgxd, int lmac_id, bool enable); + int (*mac_tx_enable)(void *cgxd, int lmac_id, bool enable); }; struct cgx { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 4e79e918a161..58e2aeebc14f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -732,6 +732,7 @@ enum nix_af_status { NIX_AF_ERR_BANDPROF_INVAL_REQ = -428, NIX_AF_ERR_CQ_CTX_WRITE_ERR = -429, NIX_AF_ERR_AQ_CTX_RETRY_WRITE = -430, + NIX_AF_ERR_LINK_CREDITS = -431, }; /* For NIX RX vtag action */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h index 0fe7ad35e36f..4180376fa676 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h @@ -185,7 +185,6 @@ enum npc_kpu_parser_state { NPC_S_KPU2_QINQ, NPC_S_KPU2_ETAG, NPC_S_KPU2_EXDSA, - NPC_S_KPU2_NGIO, NPC_S_KPU2_CPT_CTAG, NPC_S_KPU2_CPT_QINQ, NPC_S_KPU3_CTAG, @@ -212,6 +211,7 @@ enum npc_kpu_parser_state { NPC_S_KPU5_NSH, NPC_S_KPU5_CPT_IP, NPC_S_KPU5_CPT_IP6, + NPC_S_KPU5_NGIO, NPC_S_KPU6_IP6_EXT, NPC_S_KPU6_IP6_HOP_DEST, NPC_S_KPU6_IP6_ROUT, @@ -1124,15 +1124,6 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = { NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_CTAG, 0xffff, - NPC_ETYPE_NGIO, - 0xffff, - 0x0000, - 0x0000, - }, - { - NPC_S_KPU1_ETHER, 0xff, - NPC_ETYPE_CTAG, - 0xffff, NPC_ETYPE_CTAG, 0xffff, 0x0000, @@ -1968,6 +1959,15 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = { }, { NPC_S_KPU2_CTAG, 0xff, + NPC_ETYPE_NGIO, + 0xffff, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + }, + { + NPC_S_KPU2_CTAG, 0xff, NPC_ETYPE_PPPOE, 0xffff, 0x0000, @@ -2750,15 +2750,6 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = { 0x0000, }, { - NPC_S_KPU2_NGIO, 0xff, - 0x0000, - 0x0000, - 0x0000, - 0x0000, - 0x0000, - 0x0000, - }, - { NPC_S_KPU2_CPT_CTAG, 0xff, NPC_ETYPE_IP, 0xffff, @@ -5090,6 +5081,15 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = { 0x0000, }, { + NPC_S_KPU5_NGIO, 0xff, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + }, + { NPC_S_NA, 0X00, 0x0000, 0x0000, @@ -8425,14 +8425,6 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = { { NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 12, 0, 0, 0, - NPC_S_KPU2_NGIO, 12, 1, - NPC_LID_LA, NPC_LT_LA_ETHER, - 0, - 0, 0, 0, 0, - }, - { - NPC_ERRLEV_RE, NPC_EC_NOERR, - 8, 12, 0, 0, 0, NPC_S_KPU2_CTAG2, 12, 1, NPC_LID_LA, NPC_LT_LA_ETHER, NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN, @@ -9196,6 +9188,14 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = { }, { NPC_ERRLEV_RE, NPC_EC_NOERR, + 0, 0, 0, 2, 0, + NPC_S_KPU5_NGIO, 6, 1, + NPC_LID_LB, NPC_LT_LB_CTAG, + 0, + 0, 0, 0, 0, + }, + { + NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 6, 2, 0, NPC_S_KPU5_IP, 14, 1, NPC_LID_LB, NPC_LT_LB_PPPOE, @@ -9892,14 +9892,6 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = { }, { NPC_ERRLEV_RE, NPC_EC_NOERR, - 0, 0, 0, 0, 1, - NPC_S_NA, 0, 1, - NPC_LID_LC, NPC_LT_LC_NGIO, - 0, - 0, 0, 0, 0, - }, - { - NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 6, 2, 0, NPC_S_KPU5_CPT_IP, 6, 1, NPC_LID_LB, NPC_LT_LB_CTAG, @@ -11974,6 +11966,14 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = { 0, 0, 0, 0, }, { + NPC_ERRLEV_RE, NPC_EC_NOERR, + 0, 0, 0, 0, 1, + NPC_S_NA, 0, 1, + NPC_LID_LC, NPC_LT_LC_NGIO, + 0, + 0, 0, 0, 0, + }, + { NPC_ERRLEV_LC, NPC_EC_UNK, 0, 0, 0, 0, 1, NPC_S_NA, 0, 0, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index e695fa0e82a9..9ea2f6ac38ec 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -30,6 +30,8 @@ static struct mac_ops rpm_mac_ops = { .mac_enadis_pause_frm = rpm_lmac_enadis_pause_frm, .mac_pause_frm_config = rpm_lmac_pause_frm_config, .mac_enadis_ptp_config = rpm_lmac_ptp_config, + .mac_rx_tx_enable = rpm_lmac_rx_tx_enable, + .mac_tx_enable = rpm_lmac_tx_enable, }; struct mac_ops *rpm_get_mac_ops(void) @@ -54,6 +56,43 @@ int rpm_get_nr_lmacs(void *rpmd) return hweight8(rpm_read(rpm, 0, CGXX_CMRX_RX_LMACS) & 0xFULL); } +int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable) +{ + rpm_t *rpm = rpmd; + u64 cfg, last; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + last = cfg; + if (enable) + cfg |= RPM_TX_EN; + else + cfg &= ~(RPM_TX_EN); + + if (cfg != last) + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + return !!(last & RPM_TX_EN); +} + +int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + if (enable) + cfg |= RPM_RX_EN | RPM_TX_EN; + else + cfg &= ~(RPM_RX_EN | RPM_TX_EN); + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + return 0; +} + void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable) { rpm_t *rpm = rpmd; @@ -252,23 +291,20 @@ int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable) if (!rpm || lmac_id >= rpm->lmac_count) return -ENODEV; lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id); - if (lmac_type == LMAC_MODE_100G_R) { - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1); - - if (enable) - cfg |= RPMX_MTI_PCS_LBK; - else - cfg &= ~RPMX_MTI_PCS_LBK; - rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg); - } else { - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1); - if (enable) - cfg |= RPMX_MTI_PCS_LBK; - else - cfg &= ~RPMX_MTI_PCS_LBK; - rpm_write(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1, cfg); + + if (lmac_type == LMAC_MODE_QSGMII || lmac_type == LMAC_MODE_SGMII) { + dev_err(&rpm->pdev->dev, "loopback not supported for LPC mode\n"); + return 0; } + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1); + + if (enable) + cfg |= RPMX_MTI_PCS_LBK; + else + cfg &= ~RPMX_MTI_PCS_LBK; + rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg); + return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index 57c8a687b488..ff580311edd0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -43,6 +43,8 @@ #define RPMX_MTI_STAT_DATA_HI_CDC 0x10038 #define RPM_LMAC_FWI 0xa +#define RPM_TX_EN BIT_ULL(0) +#define RPM_RX_EN BIT_ULL(1) /* Function Declarations */ int rpm_get_nr_lmacs(void *rpmd); @@ -57,4 +59,6 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause, int rpm_get_tx_stats(void *rpmd, int lmac_id, int idx, u64 *tx_stat); int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat); void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable); +int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable); +int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable); #endif /* RPM_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 3ca6b942ebe2..54e1b27a7dfe 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -520,8 +520,11 @@ static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg) rvu_write64(rvu, blkaddr, rst_reg, BIT_ULL(0)); err = rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true); - if (err) - dev_err(rvu->dev, "HW block:%d reset failed\n", blkaddr); + if (err) { + dev_err(rvu->dev, "HW block:%d reset timeout retrying again\n", blkaddr); + while (rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true) == -EBUSY) + ; + } } static void rvu_reset_all_blocks(struct rvu *rvu) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 66e45d733824..5ed94cfb47d2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -806,6 +806,7 @@ bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature); u32 rvu_cgx_get_fifolen(struct rvu *rvu); void *rvu_first_cgx_pdata(struct rvu *rvu); int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id); +int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable); int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, int type); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 2ca182a4ce82..8a7ac5a8b821 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -441,16 +441,26 @@ void rvu_cgx_enadis_rx_bp(struct rvu *rvu, int pf, bool enable) int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start) { int pf = rvu_get_pf(pcifunc); + struct mac_ops *mac_ops; u8 cgx_id, lmac_id; + void *cgxd; if (!is_cgx_config_permitted(rvu, pcifunc)) return LMAC_AF_ERR_PERM_DENIED; rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + cgxd = rvu_cgx_pdata(cgx_id, rvu); + mac_ops = get_mac_ops(cgxd); + + return mac_ops->mac_rx_tx_enable(cgxd, lmac_id, start); +} - cgx_lmac_rx_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, start); +int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable) +{ + struct mac_ops *mac_ops; - return 0; + mac_ops = get_mac_ops(cgxd); + return mac_ops->mac_tx_enable(cgxd, lmac_id, enable); } void rvu_cgx_disable_dmac_entries(struct rvu *rvu, u16 pcifunc) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index a09a507369ac..d1eddb769a41 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -1224,6 +1224,8 @@ static void print_nix_cn10k_sq_ctx(struct seq_file *m, seq_printf(m, "W3: head_offset\t\t\t%d\nW3: smenq_next_sqb_vld\t\t%d\n\n", sq_ctx->head_offset, sq_ctx->smenq_next_sqb_vld); + seq_printf(m, "W3: smq_next_sq_vld\t\t%d\nW3: smq_pend\t\t\t%d\n", + sq_ctx->smq_next_sq_vld, sq_ctx->smq_pend); seq_printf(m, "W4: next_sqb \t\t\t%llx\n\n", sq_ctx->next_sqb); seq_printf(m, "W5: tail_sqb \t\t\t%llx\n\n", sq_ctx->tail_sqb); seq_printf(m, "W6: smenq_sqb \t\t\t%llx\n\n", sq_ctx->smenq_sqb); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index d8b1948aaa0a..97fb61915379 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -512,11 +512,11 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST); lmac_chan_cnt = cfg & 0xFF; - cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1); - sdp_chan_cnt = cfg & 0xFFF; - cgx_bpid_cnt = hw->cgx_links * lmac_chan_cnt; lbk_bpid_cnt = hw->lbk_links * ((cfg >> 16) & 0xFF); + + cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1); + sdp_chan_cnt = cfg & 0xFFF; sdp_bpid_cnt = hw->sdp_links * sdp_chan_cnt; pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); @@ -2068,8 +2068,8 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, /* enable cgx tx if disabled */ if (is_pf_cgxmapped(rvu, pf)) { rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); - restore_tx_en = !cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), - lmac_id, true); + restore_tx_en = !rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), + lmac_id, true); } cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq)); @@ -2092,7 +2092,7 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, rvu_cgx_enadis_rx_bp(rvu, pf, true); /* restore cgx tx state */ if (restore_tx_en) - cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false); + rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false); return err; } @@ -3878,7 +3878,7 @@ nix_config_link_credits(struct rvu *rvu, int blkaddr, int link, /* Enable cgx tx if disabled for credits to be back */ if (is_pf_cgxmapped(rvu, pf)) { rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); - restore_tx_en = !cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), + restore_tx_en = !rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, true); } @@ -3891,8 +3891,8 @@ nix_config_link_credits(struct rvu *rvu, int blkaddr, int link, NIX_AF_TL1X_SW_XOFF(schq), BIT_ULL(0)); } - rc = -EBUSY; - poll_tmo = jiffies + usecs_to_jiffies(10000); + rc = NIX_AF_ERR_LINK_CREDITS; + poll_tmo = jiffies + usecs_to_jiffies(200000); /* Wait for credits to return */ do { if (time_after(jiffies, poll_tmo)) @@ -3918,7 +3918,7 @@ exit: /* Restore state of cgx tx */ if (restore_tx_en) - cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false); + rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false); mutex_unlock(&rvu->rsrc_lock); return rc; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index c0005a1feee6..91f86d77cd41 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -402,6 +402,7 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, int index, struct mcam_entry *entry, bool *enable) { + struct rvu_npc_mcam_rule *rule; u16 owner, target_func; struct rvu_pfvf *pfvf; u64 rx_action; @@ -423,6 +424,12 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, test_bit(NIXLF_INITIALIZED, &pfvf->flags))) *enable = false; + /* fix up not needed for the rules added by user(ntuple filters) */ + list_for_each_entry(rule, &mcam->mcam_rules, list) { + if (rule->entry == index) + return; + } + /* copy VF default entry action to the VF mcam entry */ rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr, target_func); @@ -489,8 +496,8 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, } /* PF installing VF rule */ - if (intf == NIX_INTF_RX && actindex < mcam->bmap_entries) - npc_fixup_vf_rule(rvu, mcam, blkaddr, index, entry, &enable); + if (is_npc_intf_rx(intf) && actindex < mcam->bmap_entries) + npc_fixup_vf_rule(rvu, mcam, blkaddr, actindex, entry, &enable); /* Set 'action' */ rvu_write64(rvu, blkaddr, @@ -916,7 +923,8 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, u16 pcifunc, u64 rx_action) { int actindex, index, bank, entry; - bool enable; + struct rvu_npc_mcam_rule *rule; + bool enable, update; if (!(pcifunc & RVU_PFVF_FUNC_MASK)) return; @@ -924,6 +932,14 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam, mutex_lock(&mcam->lock); for (index = 0; index < mcam->bmap_entries; index++) { if (mcam->entry2target_pffunc[index] == pcifunc) { + update = true; + /* update not needed for the rules added via ntuple filters */ + list_for_each_entry(rule, &mcam->mcam_rules, list) { + if (rule->entry == index) + update = false; + } + if (!update) + continue; bank = npc_get_bank(mcam, index); actindex = index; entry = index & (mcam->banksize - 1); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index ff2b21999f36..19c53e591d0d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -1098,14 +1098,6 @@ find_rule: write_req.cntr = rule->cntr; } - err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, - &write_rsp); - if (err) { - rvu_mcam_remove_counter_from_rule(rvu, owner, rule); - if (new) - kfree(rule); - return err; - } /* update rule */ memcpy(&rule->packet, &dummy.packet, sizeof(rule->packet)); memcpy(&rule->mask, &dummy.mask, sizeof(rule->mask)); @@ -1132,6 +1124,18 @@ find_rule: if (req->default_rule) pfvf->def_ucast_rule = rule; + /* write to mcam entry registers */ + err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, + &write_rsp); + if (err) { + rvu_mcam_remove_counter_from_rule(rvu, owner, rule); + if (new) { + list_del(&rule->list); + kfree(rule); + } + return err; + } + /* VF's MAC address is being changed via PF */ if (pf_set_vfs_mac) { ether_addr_copy(pfvf->default_mac, req->packet.dmac); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 61e52812983f..14509fc64cce 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -603,6 +603,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura, size++; tar_addr |= ((size - 1) & 0x7) << 4; } + dma_wmb(); memcpy((u64 *)lmt_info->lmt_addr, ptrs, sizeof(u64) * num_ptrs); /* Perform LMTST flush */ cn10k_lmt_flush(val, tar_addr); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 6080ebd9bd94..d39341e4ab37 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -394,7 +394,12 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf, dst_mdev->msg_size = mbox_hdr->msg_size; dst_mdev->num_msgs = num_msgs; err = otx2_sync_mbox_msg(dst_mbox); - if (err) { + /* Error code -EIO indicate there is a communication failure + * to the AF. Rest of the error codes indicate that AF processed + * VF messages and set the error codes in response messages + * (if any) so simply forward responses to VF. + */ + if (err == -EIO) { dev_warn(pf->dev, "AF not responding to VF%d messages\n", vf); /* restore PF mbase and exit */ diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c index ca5f1177963d..ce5970bdcc6a 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c @@ -40,11 +40,12 @@ static int lan966x_mac_wait_for_completion(struct lan966x *lan966x) { u32 val; - return readx_poll_timeout(lan966x_mac_get_status, - lan966x, val, - (ANA_MACACCESS_MAC_TABLE_CMD_GET(val)) == - MACACCESS_CMD_IDLE, - TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US); + return readx_poll_timeout_atomic(lan966x_mac_get_status, + lan966x, val, + (ANA_MACACCESS_MAC_TABLE_CMD_GET(val)) == + MACACCESS_CMD_IDLE, + TABLE_UPDATE_SLEEP_US, + TABLE_UPDATE_TIMEOUT_US); } static void lan966x_mac_select(struct lan966x *lan966x, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 2cb70da63db3..1f60fd125a1d 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -182,9 +182,9 @@ static int lan966x_port_inj_ready(struct lan966x *lan966x, u8 grp) { u32 val; - return readx_poll_timeout(lan966x_port_inj_status, lan966x, val, - QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp), - READL_SLEEP_US, READL_TIMEOUT_US); + return readx_poll_timeout_atomic(lan966x_port_inj_status, lan966x, val, + QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp), + READL_SLEEP_US, READL_TIMEOUT_US); } static int lan966x_port_ifh_xmit(struct sk_buff *skb, diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index 16a4cbae9326..c672f92d65e9 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -749,6 +749,7 @@ ether3_probe(struct expansion_card *ec, const struct ecard_id *id) const struct ether3_data *data = id->data; struct net_device *dev; int bus_type, ret; + u8 addr[ETH_ALEN]; ether3_banner(); @@ -776,7 +777,8 @@ ether3_probe(struct expansion_card *ec, const struct ecard_id *id) priv(dev)->seeq = priv(dev)->base + data->base_offset; dev->irq = ec->irq; - ether3_addr(dev->dev_addr, ec); + ether3_addr(addr, ec); + eth_hw_addr_set(dev, addr); priv(dev)->dev = dev; timer_setup(&priv(dev)->timer, ether3_ledoff, 0); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 617d0e4c6495..09644ab0d87a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -756,7 +756,7 @@ static int sun8i_dwmac_reset(struct stmmac_priv *priv) if (err) { dev_err(priv->device, "EMAC reset timeout\n"); - return -EFAULT; + return err; } return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index e2e0f977875d..dde5b772a5af 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -22,21 +22,21 @@ #define ETHER_CLK_SEL_RMII_CLK_EN BIT(2) #define ETHER_CLK_SEL_RMII_CLK_RST BIT(3) #define ETHER_CLK_SEL_DIV_SEL_2 BIT(4) -#define ETHER_CLK_SEL_DIV_SEL_20 BIT(0) +#define ETHER_CLK_SEL_DIV_SEL_20 0 #define ETHER_CLK_SEL_FREQ_SEL_125M (BIT(9) | BIT(8)) #define ETHER_CLK_SEL_FREQ_SEL_50M BIT(9) #define ETHER_CLK_SEL_FREQ_SEL_25M BIT(8) #define ETHER_CLK_SEL_FREQ_SEL_2P5M 0 -#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN BIT(0) +#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN 0 #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC BIT(10) #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV BIT(11) -#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN BIT(0) +#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN 0 #define ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC BIT(12) #define ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV BIT(13) -#define ETHER_CLK_SEL_TX_CLK_O_TX_I BIT(0) +#define ETHER_CLK_SEL_TX_CLK_O_TX_I 0 #define ETHER_CLK_SEL_TX_CLK_O_RMII_I BIT(14) #define ETHER_CLK_SEL_TX_O_E_N_IN BIT(15) -#define ETHER_CLK_SEL_RMII_CLK_SEL_IN BIT(0) +#define ETHER_CLK_SEL_RMII_CLK_SEL_IN 0 #define ETHER_CLK_SEL_RMII_CLK_SEL_RX_C BIT(16) #define ETHER_CLK_SEL_RX_TX_CLK_EN (ETHER_CLK_SEL_RX_CLK_EN | ETHER_CLK_SEL_TX_CLK_EN) @@ -96,31 +96,41 @@ static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed) val |= ETHER_CLK_SEL_TX_O_E_N_IN; writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); + /* Set Clock-Mux, Start clock, Set TX_O direction */ switch (dwmac->phy_intf_sel) { case ETHER_CONFIG_INTF_RGMII: val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); + + val |= ETHER_CLK_SEL_RX_TX_CLK_EN; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); + + val &= ~ETHER_CLK_SEL_TX_O_E_N_IN; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); break; case ETHER_CONFIG_INTF_RMII: val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV | - ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN | + ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN | ETHER_CLK_SEL_RMII_CLK_SEL_RX_C; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); + + val |= ETHER_CLK_SEL_RMII_CLK_RST; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); + + val |= ETHER_CLK_SEL_RMII_CLK_EN | ETHER_CLK_SEL_RX_TX_CLK_EN; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); break; case ETHER_CONFIG_INTF_MII: default: val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC | - ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN | - ETHER_CLK_SEL_RMII_CLK_EN; + ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); + + val |= ETHER_CLK_SEL_RX_TX_CLK_EN; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); break; } - /* Start clock */ - writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); - val |= ETHER_CLK_SEL_RX_TX_CLK_EN; - writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); - - val &= ~ETHER_CLK_SEL_TX_O_E_N_IN; - writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); - spin_unlock_irqrestore(&dwmac->lock, flags); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 40b5ed94cb54..5b195d5051d6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -194,7 +194,6 @@ struct stmmac_priv { u32 tx_coal_timer[MTL_MAX_TX_QUEUES]; u32 rx_coal_frames[MTL_MAX_TX_QUEUES]; - int tx_coalesce; int hwts_tx_en; bool tx_path_in_lpi_mode; bool tso; @@ -229,7 +228,6 @@ struct stmmac_priv { unsigned int flow_ctrl; unsigned int pause; struct mii_bus *mii; - int mii_irq[PHY_MAX_ADDR]; struct phylink_config phylink_config; struct phylink *phylink; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6708ca2aa4f7..639a753266e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -402,7 +402,7 @@ static void stmmac_lpi_entry_timer_config(struct stmmac_priv *priv, bool en) * Description: this function is to verify and enter in LPI mode in case of * EEE. */ -static void stmmac_enable_eee_mode(struct stmmac_priv *priv) +static int stmmac_enable_eee_mode(struct stmmac_priv *priv) { u32 tx_cnt = priv->plat->tx_queues_to_use; u32 queue; @@ -412,13 +412,14 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv) struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; if (tx_q->dirty_tx != tx_q->cur_tx) - return; /* still unfinished work */ + return -EBUSY; /* still unfinished work */ } /* Check and enter in LPI mode */ if (!priv->tx_path_in_lpi_mode) stmmac_set_eee_mode(priv, priv->hw, priv->plat->en_tx_lpi_clockgating); + return 0; } /** @@ -450,8 +451,8 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) { struct stmmac_priv *priv = from_timer(priv, t, eee_ctrl_timer); - stmmac_enable_eee_mode(priv); - mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); + if (stmmac_enable_eee_mode(priv)) + mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); } /** @@ -889,6 +890,9 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; int ret; + if (priv->plat->ptp_clk_freq_config) + priv->plat->ptp_clk_freq_config(priv); + ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE); if (ret) return ret; @@ -911,8 +915,6 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) priv->hwts_tx_en = 0; priv->hwts_rx_en = 0; - stmmac_ptp_register(priv); - return 0; } @@ -2647,8 +2649,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) if (priv->eee_enabled && !priv->tx_path_in_lpi_mode && priv->eee_sw_timer_en) { - stmmac_enable_eee_mode(priv); - mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); + if (stmmac_enable_eee_mode(priv)) + mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); } /* We still have pending packets, let's call for a new scheduling */ @@ -3238,7 +3240,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv) /** * stmmac_hw_setup - setup mac in a usable state. * @dev : pointer to the device structure. - * @init_ptp: initialize PTP if set + * @ptp_register: register PTP if set * Description: * this is the main function to setup the HW in a usable state because the * dma engine is reset, the core registers are configured (e.g. AXI, @@ -3248,7 +3250,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv) * 0 on success and an appropriate (-)ve integer as defined in errno.h * file on failure. */ -static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) +static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) { struct stmmac_priv *priv = netdev_priv(dev); u32 rx_cnt = priv->plat->rx_queues_to_use; @@ -3305,13 +3307,13 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) stmmac_mmc_setup(priv); - if (init_ptp) { - ret = stmmac_init_ptp(priv); - if (ret == -EOPNOTSUPP) - netdev_warn(priv->dev, "PTP not supported by HW\n"); - else if (ret) - netdev_warn(priv->dev, "PTP init failed\n"); - } + ret = stmmac_init_ptp(priv); + if (ret == -EOPNOTSUPP) + netdev_warn(priv->dev, "PTP not supported by HW\n"); + else if (ret) + netdev_warn(priv->dev, "PTP init failed\n"); + else if (ptp_register) + stmmac_ptp_register(priv); priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 0d24ebd37873..1c9f02f9c317 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -297,9 +297,6 @@ void stmmac_ptp_register(struct stmmac_priv *priv) { int i; - if (priv->plat->ptp_clk_freq_config) - priv->plat->ptp_clk_freq_config(priv); - for (i = 0; i < priv->dma_cap.pps_out_num; i++) { if (i >= STMMAC_PPS_MAX) break; diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index ba220593e6db..8f6817f346ba 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -1146,7 +1146,7 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv) static struct page_pool *cpsw_create_page_pool(struct cpsw_common *cpsw, int size) { - struct page_pool_params pp_params; + struct page_pool_params pp_params = {}; struct page_pool *pool; pp_params.order = 0; diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index cf0917b29e30..5251fc324221 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1091,20 +1091,22 @@ static int tsi108_get_mac(struct net_device *dev) struct tsi108_prv_data *data = netdev_priv(dev); u32 word1 = TSI_READ(TSI108_MAC_ADDR1); u32 word2 = TSI_READ(TSI108_MAC_ADDR2); + u8 addr[ETH_ALEN]; /* Note that the octets are reversed from what the manual says, * producing an even weirder ordering... */ if (word2 == 0 && word1 == 0) { - dev->dev_addr[0] = 0x00; - dev->dev_addr[1] = 0x06; - dev->dev_addr[2] = 0xd2; - dev->dev_addr[3] = 0x00; - dev->dev_addr[4] = 0x00; + addr[0] = 0x00; + addr[1] = 0x06; + addr[2] = 0xd2; + addr[3] = 0x00; + addr[4] = 0x00; if (0x8 == data->phy) - dev->dev_addr[5] = 0x01; + addr[5] = 0x01; else - dev->dev_addr[5] = 0x02; + addr[5] = 0x02; + eth_hw_addr_set(dev, addr); word2 = (dev->dev_addr[0] << 16) | (dev->dev_addr[1] << 24); @@ -1114,12 +1116,13 @@ static int tsi108_get_mac(struct net_device *dev) TSI_WRITE(TSI108_MAC_ADDR1, word1); TSI_WRITE(TSI108_MAC_ADDR2, word2); } else { - dev->dev_addr[0] = (word2 >> 16) & 0xff; - dev->dev_addr[1] = (word2 >> 24) & 0xff; - dev->dev_addr[2] = (word1 >> 0) & 0xff; - dev->dev_addr[3] = (word1 >> 8) & 0xff; - dev->dev_addr[4] = (word1 >> 16) & 0xff; - dev->dev_addr[5] = (word1 >> 24) & 0xff; + addr[0] = (word2 >> 16) & 0xff; + addr[1] = (word2 >> 24) & 0xff; + addr[2] = (word1 >> 0) & 0xff; + addr[3] = (word1 >> 8) & 0xff; + addr[4] = (word1 >> 16) & 0xff; + addr[5] = (word1 >> 24) & 0xff; + eth_hw_addr_set(dev, addr); } if (!is_valid_ether_addr(dev->dev_addr)) { @@ -1136,14 +1139,12 @@ static int tsi108_set_mac(struct net_device *dev, void *addr) { struct tsi108_prv_data *data = netdev_priv(dev); u32 word1, word2; - int i; if (!is_valid_ether_addr(addr)) return -EADDRNOTAVAIL; - for (i = 0; i < 6; i++) - /* +2 is for the offset of the HW addr type */ - dev->dev_addr[i] = ((unsigned char *)addr)[i + 2]; + /* +2 is for the offset of the HW addr type */ + eth_hw_addr_set(dev, ((unsigned char *)addr) + 2); word2 = (dev->dev_addr[0] << 16) | (dev->dev_addr[1] << 24); diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 6376b8485976..980f2be32f05 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -950,9 +950,7 @@ static int yam_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __ ym = memdup_user(data, sizeof(struct yamdrv_ioctl_mcs)); if (IS_ERR(ym)) return PTR_ERR(ym); - if (ym->cmd != SIOCYAMSMCS) - return -EINVAL; - if (ym->bitrate > YAM_MAXBITRATE) { + if (ym->cmd != SIOCYAMSMCS || ym->bitrate > YAM_MAXBITRATE) { kfree(ym); return -EINVAL; } diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index bb5104ae4610..3c683e0e40e9 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -854,6 +854,7 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54616S", /* PHY_GBIT_FEATURES */ + .soft_reset = genphy_soft_reset, .config_init = bcm54xx_config_init, .config_aneg = bcm54616s_config_aneg, .config_intr = bcm_phy_config_intr, diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 74d8e1dc125f..ce0bb5951b81 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1746,6 +1746,9 @@ void phy_detach(struct phy_device *phydev) phy_driver_is_genphy_10g(phydev)) device_release_driver(&phydev->mdio.dev); + /* Assert the reset signal */ + phy_device_reset(phydev, 1); + /* * The phydev might go away on the put_device() below, so avoid * a use-after-free bug by reading the underlying bus first. @@ -1757,9 +1760,6 @@ void phy_detach(struct phy_device *phydev) ndev_owner = dev->dev.parent->driver->owner; if (ndev_owner != bus->owner) module_put(bus->owner); - - /* Assert the reset signal */ - phy_device_reset(phydev, 1); } EXPORT_SYMBOL(phy_detach); diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 0c6c0d1843bc..c1512c9925a6 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -651,6 +651,11 @@ struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) else if (ret < 0) return ERR_PTR(ret); + if (!fwnode_device_is_available(ref.fwnode)) { + fwnode_handle_put(ref.fwnode); + return NULL; + } + bus = sfp_bus_get(ref.fwnode); fwnode_handle_put(ref.fwnode); if (!bus) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 7ae041e2b3fb..f79a66d4e22c 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1092,7 +1092,6 @@ static void __nvmf_concat_opt_tokens(struct seq_file *seq_file) static int nvmf_dev_show(struct seq_file *seq_file, void *private) { struct nvme_ctrl *ctrl; - int ret = 0; mutex_lock(&nvmf_dev_mutex); ctrl = seq_file->private; @@ -1106,7 +1105,7 @@ static int nvmf_dev_show(struct seq_file *seq_file, void *private) out_unlock: mutex_unlock(&nvmf_dev_mutex); - return ret; + return 0; } static int nvmf_dev_open(struct inode *inode, struct file *file) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d8585df2c2fd..6a99ed680915 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3391,7 +3391,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DEALLOCATE_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index 0d63541c4052..e9cf318e6670 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -28,6 +28,7 @@ void pci_msi_teardown_msi_irqs(struct pci_dev *dev) msi_domain_free_irqs_descs_locked(domain, &dev->dev); else pci_msi_legacy_teardown_msi_irqs(dev); + msi_free_msi_descs(&dev->dev); } /** @@ -171,8 +172,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) pci_msi_domain_update_chip_ops(info); - info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS | - MSI_FLAG_FREE_MSI_DESCS; + info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS; if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE)) info->flags |= MSI_FLAG_MUST_REACTIVATE; diff --git a/drivers/pci/msi/legacy.c b/drivers/pci/msi/legacy.c index cdbb4689db78..db761adef652 100644 --- a/drivers/pci/msi/legacy.c +++ b/drivers/pci/msi/legacy.c @@ -77,5 +77,4 @@ void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev) { msi_device_destroy_sysfs(&dev->dev); arch_teardown_msi_irqs(dev); - msi_free_msi_descs(&dev->dev); } diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 3ddd426fc969..166019786653 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -180,6 +180,7 @@ config QCOM_Q6V5_ADSP depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n depends on QCOM_SYSMON || QCOM_SYSMON=n depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n + depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n select MFD_SYSCON select QCOM_PIL_INFO select QCOM_MDT_LOADER @@ -199,6 +200,7 @@ config QCOM_Q6V5_MSS depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n depends on QCOM_SYSMON || QCOM_SYSMON=n depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n + depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n select MFD_SYSCON select QCOM_MDT_LOADER select QCOM_PIL_INFO @@ -218,6 +220,7 @@ config QCOM_Q6V5_PAS depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n depends on QCOM_SYSMON || QCOM_SYSMON=n depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n + depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n select MFD_SYSCON select QCOM_PIL_INFO select QCOM_MDT_LOADER @@ -239,6 +242,7 @@ config QCOM_Q6V5_WCSS depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n depends on QCOM_SYSMON || QCOM_SYSMON=n depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n + depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n select MFD_SYSCON select QCOM_MDT_LOADER select QCOM_PIL_INFO diff --git a/drivers/remoteproc/qcom_q6v5.c b/drivers/remoteproc/qcom_q6v5.c index eada7e34f3af..442a388f8102 100644 --- a/drivers/remoteproc/qcom_q6v5.c +++ b/drivers/remoteproc/qcom_q6v5.c @@ -10,6 +10,7 @@ #include <linux/platform_device.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/soc/qcom/qcom_aoss.h> #include <linux/soc/qcom/smem.h> #include <linux/soc/qcom/smem_state.h> #include <linux/remoteproc.h> diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c index d6214cb66026..5663cf799c95 100644 --- a/drivers/rpmsg/rpmsg_char.c +++ b/drivers/rpmsg/rpmsg_char.c @@ -93,7 +93,7 @@ static int rpmsg_eptdev_destroy(struct device *dev, void *data) /* wake up any blocked readers */ wake_up_interruptible(&eptdev->readq); - device_del(&eptdev->dev); + cdev_device_del(&eptdev->cdev, &eptdev->dev); put_device(&eptdev->dev); return 0; @@ -336,7 +336,6 @@ static void rpmsg_eptdev_release_device(struct device *dev) ida_simple_remove(&rpmsg_ept_ida, dev->id); ida_simple_remove(&rpmsg_minor_ida, MINOR(eptdev->dev.devt)); - cdev_del(&eptdev->cdev); kfree(eptdev); } @@ -381,19 +380,13 @@ static int rpmsg_eptdev_create(struct rpmsg_ctrldev *ctrldev, dev->id = ret; dev_set_name(dev, "rpmsg%d", ret); - ret = cdev_add(&eptdev->cdev, dev->devt, 1); + ret = cdev_device_add(&eptdev->cdev, &eptdev->dev); if (ret) goto free_ept_ida; /* We can now rely on the release function for cleanup */ dev->release = rpmsg_eptdev_release_device; - ret = device_add(dev); - if (ret) { - dev_err(dev, "device_add failed: %d\n", ret); - put_device(dev); - } - return ret; free_ept_ida: @@ -462,7 +455,6 @@ static void rpmsg_ctrldev_release_device(struct device *dev) ida_simple_remove(&rpmsg_ctrl_ida, dev->id); ida_simple_remove(&rpmsg_minor_ida, MINOR(dev->devt)); - cdev_del(&ctrldev->cdev); kfree(ctrldev); } @@ -497,19 +489,13 @@ static int rpmsg_chrdev_probe(struct rpmsg_device *rpdev) dev->id = ret; dev_set_name(&ctrldev->dev, "rpmsg_ctrl%d", ret); - ret = cdev_add(&ctrldev->cdev, dev->devt, 1); + ret = cdev_device_add(&ctrldev->cdev, &ctrldev->dev); if (ret) goto free_ctrl_ida; /* We can now rely on the release function for cleanup */ dev->release = rpmsg_ctrldev_release_device; - ret = device_add(dev); - if (ret) { - dev_err(&rpdev->dev, "device_add failed: %d\n", ret); - put_device(dev); - } - dev_set_drvdata(&rpdev->dev, ctrldev); return ret; @@ -535,7 +521,7 @@ static void rpmsg_chrdev_remove(struct rpmsg_device *rpdev) if (ret) dev_warn(&rpdev->dev, "failed to nuke endpoints: %d\n", ret); - device_del(&ctrldev->dev); + cdev_device_del(&ctrldev->cdev, &ctrldev->dev); put_device(&ctrldev->dev); } diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index d24cafe02708..511bf8e0a436 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -521,6 +521,8 @@ static void zfcp_fc_adisc_handler(void *data) goto out; } + /* re-init to undo drop from zfcp_fc_adisc() */ + port->d_id = ntoh24(adisc_resp->adisc_port_id); /* port is good, unblock rport without going through erp */ zfcp_scsi_schedule_rport_register(port); out: @@ -534,6 +536,7 @@ static int zfcp_fc_adisc(struct zfcp_port *port) struct zfcp_fc_req *fc_req; struct zfcp_adapter *adapter = port->adapter; struct Scsi_Host *shost = adapter->scsi_host; + u32 d_id; int ret; fc_req = kmem_cache_zalloc(zfcp_fc_req_cache, GFP_ATOMIC); @@ -558,7 +561,15 @@ static int zfcp_fc_adisc(struct zfcp_port *port) fc_req->u.adisc.req.adisc_cmd = ELS_ADISC; hton24(fc_req->u.adisc.req.adisc_port_id, fc_host_port_id(shost)); - ret = zfcp_fsf_send_els(adapter, port->d_id, &fc_req->ct_els, + d_id = port->d_id; /* remember as destination for send els below */ + /* + * Force fresh GID_PN lookup on next port recovery. + * Must happen after request setup and before sending request, + * to prevent race with port->d_id re-init in zfcp_fc_adisc_handler(). + */ + port->d_id = 0; + + ret = zfcp_fsf_send_els(adapter, d_id, &fc_req->ct_els, ZFCP_FC_CTELS_TMO); if (ret) kmem_cache_free(zfcp_fc_req_cache, fc_req); diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index b9482da79512..3ebe66151dcb 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -1567,8 +1567,6 @@ static int twl_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) pci_try_set_mwi(pdev); retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (retval) - retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (retval) { TW_PRINTK(host, TW_DRIVER, 0x18, "Failed to set dma mask"); retval = -ENODEV; @@ -1786,8 +1784,6 @@ static int __maybe_unused twl_resume(struct device *dev) pci_try_set_mwi(pdev); retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (retval) - retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (retval) { TW_PRINTK(host, TW_DRIVER, 0x25, "Failed to set dma mask during resume"); retval = -ENODEV; diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 3ad3ebaca8e9..ad4972c0fc53 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -1507,7 +1507,6 @@ NCR_700_intr(int irq, void *dev_id) struct scsi_cmnd *SCp = hostdata->cmd; handled = 1; - SCp = hostdata->cmd; if(istat & SCSI_INT_PENDING) { udelay(10); diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index 440ef32be048..e5aa982ffedc 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -732,9 +732,6 @@ bfad_pci_init(struct pci_dev *pdev, struct bfad_s *bfad) pci_set_master(pdev); rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (rc) { rc = -ENODEV; printk(KERN_ERR "dma_set_mask_and_coherent fail %p\n", pdev); @@ -1560,9 +1557,6 @@ bfad_pci_slot_reset(struct pci_dev *pdev) rc = dma_set_mask_and_coherent(&bfad->pcidev->dev, DMA_BIT_MASK(64)); if (rc) - rc = dma_set_mask_and_coherent(&bfad->pcidev->dev, - DMA_BIT_MASK(32)); - if (rc) goto out_disable_device; if (restart_bfa(bfad) == -1) diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 71fa62bd3083..9be273c320e2 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -82,7 +82,7 @@ static int bnx2fc_bind_pcidev(struct bnx2fc_hba *hba); static void bnx2fc_unbind_pcidev(struct bnx2fc_hba *hba); static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, struct device *parent, int npiv); -static void bnx2fc_destroy_work(struct work_struct *work); +static void bnx2fc_port_destroy(struct fcoe_port *port); static struct bnx2fc_hba *bnx2fc_hba_lookup(struct net_device *phys_dev); static struct bnx2fc_interface *bnx2fc_interface_lookup(struct net_device @@ -907,9 +907,6 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, __bnx2fc_destroy(interface); } mutex_unlock(&bnx2fc_dev_lock); - - /* Ensure ALL destroy work has been completed before return */ - flush_workqueue(bnx2fc_wq); return; default: @@ -1215,8 +1212,8 @@ static int bnx2fc_vport_destroy(struct fc_vport *vport) mutex_unlock(&n_port->lp_mutex); bnx2fc_free_vport(interface->hba, port->lport); bnx2fc_port_shutdown(port->lport); + bnx2fc_port_destroy(port); bnx2fc_interface_put(interface); - queue_work(bnx2fc_wq, &port->destroy_work); return 0; } @@ -1525,7 +1522,6 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, port->lport = lport; port->priv = interface; port->get_netdev = bnx2fc_netdev; - INIT_WORK(&port->destroy_work, bnx2fc_destroy_work); /* Configure fcoe_port */ rc = bnx2fc_lport_config(lport); @@ -1653,8 +1649,8 @@ static void __bnx2fc_destroy(struct bnx2fc_interface *interface) bnx2fc_interface_cleanup(interface); bnx2fc_stop(interface); list_del(&interface->list); + bnx2fc_port_destroy(port); bnx2fc_interface_put(interface); - queue_work(bnx2fc_wq, &port->destroy_work); } /** @@ -1694,15 +1690,12 @@ netdev_err: return rc; } -static void bnx2fc_destroy_work(struct work_struct *work) +static void bnx2fc_port_destroy(struct fcoe_port *port) { - struct fcoe_port *port; struct fc_lport *lport; - port = container_of(work, struct fcoe_port, destroy_work); lport = port->lport; - - BNX2FC_HBA_DBG(lport, "Entered bnx2fc_destroy_work\n"); + BNX2FC_HBA_DBG(lport, "Entered %s, destroying lport %p\n", __func__, lport); bnx2fc_if_destroy(lport); } @@ -2556,9 +2549,6 @@ static void bnx2fc_ulp_exit(struct cnic_dev *dev) __bnx2fc_destroy(interface); mutex_unlock(&bnx2fc_dev_lock); - /* Ensure ALL destroy work has been completed before return */ - flush_workqueue(bnx2fc_wq); - bnx2fc_ulp_stop(hba); /* unregister cnic device */ if (test_and_clear_bit(BNX2FC_CNIC_REGISTERED, &hba->reg_with_cnic)) diff --git a/drivers/scsi/elx/libefc/efc_els.c b/drivers/scsi/elx/libefc/efc_els.c index 7bb4f9aad2c8..84bc81d7ce76 100644 --- a/drivers/scsi/elx/libefc/efc_els.c +++ b/drivers/scsi/elx/libefc/efc_els.c @@ -46,18 +46,14 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen) efc = node->efc; - spin_lock_irqsave(&node->els_ios_lock, flags); - if (!node->els_io_enabled) { efc_log_err(efc, "els io alloc disabled\n"); - spin_unlock_irqrestore(&node->els_ios_lock, flags); return NULL; } els = mempool_alloc(efc->els_io_pool, GFP_ATOMIC); if (!els) { atomic_add_return(1, &efc->els_io_alloc_failed_count); - spin_unlock_irqrestore(&node->els_ios_lock, flags); return NULL; } @@ -74,7 +70,6 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen) &els->io.req.phys, GFP_KERNEL); if (!els->io.req.virt) { mempool_free(els, efc->els_io_pool); - spin_unlock_irqrestore(&node->els_ios_lock, flags); return NULL; } @@ -94,10 +89,11 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen) /* add els structure to ELS IO list */ INIT_LIST_HEAD(&els->list_entry); + spin_lock_irqsave(&node->els_ios_lock, flags); list_add_tail(&els->list_entry, &node->els_ios_list); + spin_unlock_irqrestore(&node->els_ios_lock, flags); } - spin_unlock_irqrestore(&node->els_ios_lock, flags); return els; } diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index a05ec7aece5a..2f53a2ee024a 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2666,9 +2666,6 @@ static struct Scsi_Host *hisi_sas_shost_alloc(struct platform_device *pdev, goto err_out; error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); - if (error) - error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); - if (error) { dev_err(dev, "No usable DMA addressing method\n"); goto err_out; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index a45ef9a5e12e..a01a3a7b706b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -4695,8 +4695,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_out; rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (rc) { dev_err(dev, "No usable DMA addressing method\n"); rc = -ENODEV; diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 253ceca54a84..7eb8c39da366 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -2267,7 +2267,8 @@ static void myrs_cleanup(struct myrs_hba *cs) myrs_unmap(cs); if (cs->mmio_base) { - cs->disable_intr(cs); + if (cs->disable_intr) + cs->disable_intr(cs); iounmap(cs->mmio_base); cs->mmio_base = NULL; } diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index bbf538fe15b3..2530d1365556 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -4151,10 +4151,22 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) u32 ret = MPI_IO_STATUS_FAIL; u32 regval; + /* + * Fatal errors are programmed to be signalled in irq vector + * pm8001_ha->max_q_num - 1 through pm8001_ha->main_cfg_tbl.pm80xx_tbl. + * fatal_err_interrupt + */ if (vec == (pm8001_ha->max_q_num - 1)) { + u32 mipsall_ready; + + if (pm8001_ha->chip_id == chip_8008 || + pm8001_ha->chip_id == chip_8009) + mipsall_ready = SCRATCH_PAD_MIPSALL_READY_8PORT; + else + mipsall_ready = SCRATCH_PAD_MIPSALL_READY_16PORT; + regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1); - if ((regval & SCRATCH_PAD_MIPSALL_READY) != - SCRATCH_PAD_MIPSALL_READY) { + if ((regval & mipsall_ready) != mipsall_ready) { pm8001_ha->controller_fatal_error = true; pm8001_dbg(pm8001_ha, FAIL, "Firmware Fatal error! Regval:0x%x\n", diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h index c7e5d93bea92..c41ed039c92a 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.h +++ b/drivers/scsi/pm8001/pm80xx_hwi.h @@ -1405,8 +1405,12 @@ typedef struct SASProtocolTimerConfig SASProtocolTimerConfig_t; #define SCRATCH_PAD_BOOT_LOAD_SUCCESS 0x0 #define SCRATCH_PAD_IOP0_READY 0xC00 #define SCRATCH_PAD_IOP1_READY 0x3000 -#define SCRATCH_PAD_MIPSALL_READY (SCRATCH_PAD_IOP1_READY | \ +#define SCRATCH_PAD_MIPSALL_READY_16PORT (SCRATCH_PAD_IOP1_READY | \ SCRATCH_PAD_IOP0_READY | \ + SCRATCH_PAD_ILA_READY | \ + SCRATCH_PAD_RAAE_READY) +#define SCRATCH_PAD_MIPSALL_READY_8PORT (SCRATCH_PAD_IOP0_READY | \ + SCRATCH_PAD_ILA_READY | \ SCRATCH_PAD_RAAE_READY) /* boot loader state */ diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 99a56ca1fb16..fab43dabe5b3 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -2250,6 +2250,7 @@ process_els: io_req->tm_flags == FCP_TMF_TGT_RESET) { clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags); io_req->sc_cmd = NULL; + kref_put(&io_req->refcount, qedf_release_cmd); complete(&io_req->tm_done); } diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index cdc66e2a9488..6ad28bc8e948 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -911,7 +911,7 @@ void qedf_ctx_soft_reset(struct fc_lport *lport) struct qed_link_output if_link; if (lport->vport) { - QEDF_ERR(NULL, "Cannot issue host reset on NPIV port.\n"); + printk_ratelimited("Cannot issue host reset on NPIV port.\n"); return; } @@ -1864,6 +1864,7 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled) vport_qedf->cmd_mgr = base_qedf->cmd_mgr; init_completion(&vport_qedf->flogi_compl); INIT_LIST_HEAD(&vport_qedf->fcports); + INIT_DELAYED_WORK(&vport_qedf->stag_work, qedf_stag_change_work); rc = qedf_vport_libfc_config(vport, vn_port); if (rc) { @@ -3980,7 +3981,9 @@ void qedf_stag_change_work(struct work_struct *work) struct qedf_ctx *qedf = container_of(work, struct qedf_ctx, stag_work.work); - QEDF_ERR(&qedf->dbg_ctx, "Performing software context reset.\n"); + printk_ratelimited("[%s]:[%s:%d]:%d: Performing software context reset.", + dev_name(&qedf->pdev->dev), __func__, __LINE__, + qedf->dbg_ctx.host_no); qedf_ctx_soft_reset(qedf->lport); } diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c index 8b16bbbcb806..87975d1a21c8 100644 --- a/drivers/scsi/ufs/ufshcd-pltfrm.c +++ b/drivers/scsi/ufs/ufshcd-pltfrm.c @@ -92,6 +92,11 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba) clki->min_freq = clkfreq[i]; clki->max_freq = clkfreq[i+1]; clki->name = devm_kstrdup(dev, name, GFP_KERNEL); + if (!clki->name) { + ret = -ENOMEM; + goto out; + } + if (!strcmp(name, "ref_clk")) clki->keep_link_active = true; dev_dbg(dev, "%s: min %u max %u name %s\n", "freq-table-hz", @@ -127,6 +132,8 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name, return -ENOMEM; vreg->name = devm_kstrdup(dev, name, GFP_KERNEL); + if (!vreg->name) + return -ENOMEM; snprintf(prop_name, MAX_PROP_SIZE, "%s-max-microamp", name); if (of_property_read_u32(np, prop_name, &vreg->max_uA)) { diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 460d2b440d2e..50b12d60dc1b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8613,7 +8613,7 @@ static void ufshcd_hba_exit(struct ufs_hba *hba) * @pwr_mode: device power mode to set * * Returns 0 if requested power mode is set successfully - * Returns non-zero if failed to set the requested power mode + * Returns < 0 if failed to set the requested power mode */ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, enum ufs_dev_pwr_mode pwr_mode) @@ -8667,8 +8667,11 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, sdev_printk(KERN_WARNING, sdp, "START_STOP failed for power mode: %d, result %x\n", pwr_mode, ret); - if (ret > 0 && scsi_sense_valid(&sshdr)) - scsi_print_sense_hdr(sdp, NULL, &sshdr); + if (ret > 0) { + if (scsi_sense_valid(&sshdr)) + scsi_print_sense_hdr(sdp, NULL, &sshdr); + ret = -EIO; + } } if (!ret) diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index 6a295c88d850..a7ff0e5b5494 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -142,7 +142,8 @@ static inline u32 ufshci_version(u32 major, u32 minor) #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ SYSTEM_BUS_FATAL_ERROR |\ - CRYPTO_ENGINE_FATAL_ERROR) + CRYPTO_ENGINE_FATAL_ERROR |\ + UIC_LINK_LOST) /* HCS - Host Controller Status 30h */ #define DEVICE_PRESENT 0x1 diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 8075f60fd02c..2d5cf1714ae0 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -443,6 +443,9 @@ static bool iscsit_tpg_check_network_portal( break; } spin_unlock(&tpg->tpg_np_lock); + + if (match) + break; } spin_unlock(&tiqn->tiqn_tpg_lock); diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index ba27b274c967..0b1808e3a912 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -322,6 +322,7 @@ static int addr_cnt; #define GSM1_ESCAPE_BITS 0x20 #define XON 0x11 #define XOFF 0x13 +#define ISO_IEC_646_MASK 0x7F static const struct tty_port_operations gsm_port_ops; @@ -531,7 +532,8 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len) int olen = 0; while (len--) { if (*input == GSM1_SOF || *input == GSM1_ESCAPE - || *input == XON || *input == XOFF) { + || (*input & ISO_IEC_646_MASK) == XON + || (*input & ISO_IEC_646_MASK) == XOFF) { *output++ = GSM1_ESCAPE; *output++ = *input++ ^ GSM1_ESCAPE_BITS; olen++; diff --git a/drivers/tty/rpmsg_tty.c b/drivers/tty/rpmsg_tty.c index dae2a4e44f38..29db413bbc03 100644 --- a/drivers/tty/rpmsg_tty.c +++ b/drivers/tty/rpmsg_tty.c @@ -50,10 +50,17 @@ static int rpmsg_tty_cb(struct rpmsg_device *rpdev, void *data, int len, void *p static int rpmsg_tty_install(struct tty_driver *driver, struct tty_struct *tty) { struct rpmsg_tty_port *cport = idr_find(&tty_idr, tty->index); + struct tty_port *port; tty->driver_data = cport; - return tty_port_install(&cport->port, driver, tty); + port = tty_port_get(&cport->port); + return tty_port_install(port, driver, tty); +} + +static void rpmsg_tty_cleanup(struct tty_struct *tty) +{ + tty_port_put(tty->port); } static int rpmsg_tty_open(struct tty_struct *tty, struct file *filp) @@ -106,12 +113,19 @@ static unsigned int rpmsg_tty_write_room(struct tty_struct *tty) return size; } +static void rpmsg_tty_hangup(struct tty_struct *tty) +{ + tty_port_hangup(tty->port); +} + static const struct tty_operations rpmsg_tty_ops = { .install = rpmsg_tty_install, .open = rpmsg_tty_open, .close = rpmsg_tty_close, .write = rpmsg_tty_write, .write_room = rpmsg_tty_write_room, + .hangup = rpmsg_tty_hangup, + .cleanup = rpmsg_tty_cleanup, }; static struct rpmsg_tty_port *rpmsg_tty_alloc_cport(void) @@ -137,8 +151,10 @@ static struct rpmsg_tty_port *rpmsg_tty_alloc_cport(void) return cport; } -static void rpmsg_tty_release_cport(struct rpmsg_tty_port *cport) +static void rpmsg_tty_destruct_port(struct tty_port *port) { + struct rpmsg_tty_port *cport = container_of(port, struct rpmsg_tty_port, port); + mutex_lock(&idr_lock); idr_remove(&tty_idr, cport->id); mutex_unlock(&idr_lock); @@ -146,7 +162,10 @@ static void rpmsg_tty_release_cport(struct rpmsg_tty_port *cport) kfree(cport); } -static const struct tty_port_operations rpmsg_tty_port_ops = { }; +static const struct tty_port_operations rpmsg_tty_port_ops = { + .destruct = rpmsg_tty_destruct_port, +}; + static int rpmsg_tty_probe(struct rpmsg_device *rpdev) { @@ -166,7 +185,8 @@ static int rpmsg_tty_probe(struct rpmsg_device *rpdev) cport->id, dev); if (IS_ERR(tty_dev)) { ret = dev_err_probe(dev, PTR_ERR(tty_dev), "Failed to register tty port\n"); - goto err_destroy; + tty_port_put(&cport->port); + return ret; } cport->rpdev = rpdev; @@ -177,12 +197,6 @@ static int rpmsg_tty_probe(struct rpmsg_device *rpdev) rpdev->src, rpdev->dst, cport->id); return 0; - -err_destroy: - tty_port_destroy(&cport->port); - rpmsg_tty_release_cport(cport); - - return ret; } static void rpmsg_tty_remove(struct rpmsg_device *rpdev) @@ -192,13 +206,11 @@ static void rpmsg_tty_remove(struct rpmsg_device *rpdev) dev_dbg(&rpdev->dev, "Removing rpmsg tty device %d\n", cport->id); /* User hang up to release the tty */ - if (tty_port_initialized(&cport->port)) - tty_port_tty_hangup(&cport->port, false); + tty_port_tty_hangup(&cport->port, false); tty_unregister_device(rpmsg_tty_driver, cport->id); - tty_port_destroy(&cport->port); - rpmsg_tty_release_cport(cport); + tty_port_put(&cport->port); } static struct rpmsg_device_id rpmsg_driver_tty_id_table[] = { diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index bce28729dd7b..be8626234627 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -83,8 +83,17 @@ static int of_platform_serial_setup(struct platform_device *ofdev, port->mapsize = resource_size(&resource); /* Check for shifted address mapping */ - if (of_property_read_u32(np, "reg-offset", &prop) == 0) + if (of_property_read_u32(np, "reg-offset", &prop) == 0) { + if (prop >= port->mapsize) { + dev_warn(&ofdev->dev, "reg-offset %u exceeds region size %pa\n", + prop, &port->mapsize); + ret = -EINVAL; + goto err_unprepare; + } + port->mapbase += prop; + port->mapsize -= prop; + } port->iotype = UPIO_MEM; if (of_property_read_u32(np, "reg-io-width", &prop) == 0) { diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index e8b5469e9dfa..e17e97ea86fa 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -4779,8 +4779,30 @@ static const struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, /* 135a.0dc0 */ pbn_b2_4_115200 }, + /* Brainboxes Devices */ /* - * BrainBoxes UC-260 + * Brainboxes UC-101 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0BA1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-235/246 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0AA1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_1_115200 }, + /* + * Brainboxes UC-257 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0861, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-260/271/701/756 */ { PCI_VENDOR_ID_INTASHIELD, 0x0D21, PCI_ANY_ID, PCI_ANY_ID, @@ -4788,7 +4810,81 @@ static const struct pci_device_id serial_pci_tbl[] = { pbn_b2_4_115200 }, { PCI_VENDOR_ID_INTASHIELD, 0x0E34, PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-268 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0841, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-275/279 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0881, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_8_115200 }, + /* + * Brainboxes UC-302 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08E1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-310 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08C1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-313 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08A3, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-320/324 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0A61, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_1_115200 }, + /* + * Brainboxes UC-346 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0B02, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-357 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0A81, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0A83, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-368 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0C41, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-420/431 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0921, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, pbn_b2_4_115200 }, /* * Perle PCI-RAS cards diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 2abb3de11a48..3b12bfc1ed67 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2056,7 +2056,10 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state) serial8250_rpm_put(up); } -static void wait_for_lsr(struct uart_8250_port *up, int bits) +/* + * Wait for transmitter & holding register to empty + */ +static void wait_for_xmitr(struct uart_8250_port *up, int bits) { unsigned int status, tmout = 10000; @@ -2073,16 +2076,6 @@ static void wait_for_lsr(struct uart_8250_port *up, int bits) udelay(1); touch_nmi_watchdog(); } -} - -/* - * Wait for transmitter & holding register to empty - */ -static void wait_for_xmitr(struct uart_8250_port *up, int bits) -{ - unsigned int tmout; - - wait_for_lsr(up, bits); /* Wait up to 1s for flow control if necessary */ if (up->port.flags & UPF_CONS_FLOW) { @@ -3333,35 +3326,6 @@ static void serial8250_console_restore(struct uart_8250_port *up) } /* - * Print a string to the serial port using the device FIFO - * - * It sends fifosize bytes and then waits for the fifo - * to get empty. - */ -static void serial8250_console_fifo_write(struct uart_8250_port *up, - const char *s, unsigned int count) -{ - int i; - const char *end = s + count; - unsigned int fifosize = up->port.fifosize; - bool cr_sent = false; - - while (s != end) { - wait_for_lsr(up, UART_LSR_THRE); - - for (i = 0; i < fifosize && s != end; ++i) { - if (*s == '\n' && !cr_sent) { - serial_out(up, UART_TX, '\r'); - cr_sent = true; - } else { - serial_out(up, UART_TX, *s++); - cr_sent = false; - } - } - } -} - -/* * Print a string to the serial port trying not to disturb * any possible real use of the port... * @@ -3376,7 +3340,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, struct uart_8250_em485 *em485 = up->em485; struct uart_port *port = &up->port; unsigned long flags; - unsigned int ier, use_fifo; + unsigned int ier; int locked = 1; touch_nmi_watchdog(); @@ -3408,20 +3372,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, mdelay(port->rs485.delay_rts_before_send); } - use_fifo = (up->capabilities & UART_CAP_FIFO) && - port->fifosize > 1 && - (serial_port_in(port, UART_FCR) & UART_FCR_ENABLE_FIFO) && - /* - * After we put a data in the fifo, the controller will send - * it regardless of the CTS state. Therefore, only use fifo - * if we don't use control flow. - */ - !(up->port.flags & UPF_CONS_FLOW); - - if (likely(use_fifo)) - serial8250_console_fifo_write(up, s, count); - else - uart_console_write(port, s, count, serial8250_console_putchar); + uart_console_write(port, s, count, serial8250_console_putchar); /* * Finally, wait for transmitter to become empty diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 1f1df46242f9..ba053a68529f 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1582,9 +1582,6 @@ static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl) container_of(port, struct uart_amba_port, port); unsigned int cr; - if (port->rs485.flags & SER_RS485_ENABLED) - mctrl &= ~TIOCM_RTS; - cr = pl011_read(uap, REG_CR); #define TIOCMBIT(tiocmbit, uartbit) \ @@ -1808,14 +1805,8 @@ static int pl011_startup(struct uart_port *port) cr &= UART011_CR_RTS | UART011_CR_DTR; cr |= UART01x_CR_UARTEN | UART011_CR_RXE; - if (port->rs485.flags & SER_RS485_ENABLED) { - if (port->rs485.flags & SER_RS485_RTS_AFTER_SEND) - cr &= ~UART011_CR_RTS; - else - cr |= UART011_CR_RTS; - } else { + if (!(port->rs485.flags & SER_RS485_ENABLED)) cr |= UART011_CR_TXE; - } pl011_write(cr, uap, REG_CR); diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index dc40c4155356..0db90be4c3bc 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -144,6 +144,11 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) unsigned long flags; unsigned int old; + if (port->rs485.flags & SER_RS485_ENABLED) { + set &= ~TIOCM_RTS; + clear &= ~TIOCM_RTS; + } + spin_lock_irqsave(&port->lock, flags); old = port->mctrl; port->mctrl = (old & ~clear) | set; @@ -157,23 +162,10 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) static void uart_port_dtr_rts(struct uart_port *uport, int raise) { - int rs485_on = uport->rs485_config && - (uport->rs485.flags & SER_RS485_ENABLED); - int RTS_after_send = !!(uport->rs485.flags & SER_RS485_RTS_AFTER_SEND); - - if (raise) { - if (rs485_on && RTS_after_send) { - uart_set_mctrl(uport, TIOCM_DTR); - uart_clear_mctrl(uport, TIOCM_RTS); - } else { - uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS); - } - } else { - unsigned int clear = TIOCM_DTR; - - clear |= (!rs485_on || RTS_after_send) ? TIOCM_RTS : 0; - uart_clear_mctrl(uport, clear); - } + if (raise) + uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS); + else + uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS); } /* @@ -1075,11 +1067,6 @@ uart_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) goto out; if (!tty_io_error(tty)) { - if (uport->rs485.flags & SER_RS485_ENABLED) { - set &= ~TIOCM_RTS; - clear &= ~TIOCM_RTS; - } - uart_update_mctrl(uport, set, clear); ret = 0; } @@ -2390,6 +2377,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, */ spin_lock_irqsave(&port->lock, flags); port->mctrl &= TIOCM_DTR; + if (port->rs485.flags & SER_RS485_ENABLED && + !(port->rs485.flags & SER_RS485_RTS_AFTER_SEND)) + port->mctrl |= TIOCM_RTS; port->ops->set_mctrl(port, port->mctrl); spin_unlock_irqrestore(&port->lock, flags); diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 1f89ab0e49ac..9570002d07e7 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -550,11 +550,23 @@ static void stm32_usart_transmit_chars(struct uart_port *port) struct stm32_port *stm32_port = to_stm32_port(port); const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; struct circ_buf *xmit = &port->state->xmit; + u32 isr; + int ret; if (port->x_char) { if (stm32_usart_tx_dma_started(stm32_port) && stm32_usart_tx_dma_enabled(stm32_port)) stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAT); + + /* Check that TDR is empty before filling FIFO */ + ret = + readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr, + isr, + (isr & USART_SR_TXE), + 10, 1000); + if (ret) + dev_warn(port->dev, "1 character may be erased\n"); + writel_relaxed(port->x_char, port->membase + ofs->tdr); port->x_char = 0; port->icount.tx++; @@ -730,7 +742,7 @@ static void stm32_usart_start_tx(struct uart_port *port) struct serial_rs485 *rs485conf = &port->rs485; struct circ_buf *xmit = &port->state->xmit; - if (uart_circ_empty(xmit)) + if (uart_circ_empty(xmit) && !port->x_char) return; if (rs485conf->flags & SER_RS485_ENABLED) { diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c index 55c73b1d8704..d00ff98dffab 100644 --- a/drivers/usb/cdns3/drd.c +++ b/drivers/usb/cdns3/drd.c @@ -483,11 +483,11 @@ int cdns_drd_exit(struct cdns *cdns) /* Indicate the cdns3 core was power lost before */ bool cdns_power_is_lost(struct cdns *cdns) { - if (cdns->version == CDNS3_CONTROLLER_V1) { - if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0))) + if (cdns->version == CDNS3_CONTROLLER_V0) { + if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0))) return true; } else { - if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0))) + if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0))) return true; } return false; diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 4169cf40a03b..8f8405b0d608 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -39,8 +39,11 @@ static int ulpi_match(struct device *dev, struct device_driver *driver) struct ulpi *ulpi = to_ulpi_dev(dev); const struct ulpi_device_id *id; - /* Some ULPI devices don't have a vendor id so rely on OF match */ - if (ulpi->id.vendor == 0) + /* + * Some ULPI devices don't have a vendor id + * or provide an id_table so rely on OF match. + */ + if (ulpi->id.vendor == 0 || !drv->id_table) return of_driver_match_device(dev, driver); for (id = drv->id_table; id->vendor; id++) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 3e01dd6e509b..d9712c2602af 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1563,6 +1563,13 @@ int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags) urb->hcpriv = NULL; INIT_LIST_HEAD(&urb->urb_list); atomic_dec(&urb->use_count); + /* + * Order the write of urb->use_count above before the read + * of urb->reject below. Pairs with the memory barriers in + * usb_kill_urb() and usb_poison_urb(). + */ + smp_mb__after_atomic(); + atomic_dec(&urb->dev->urbnum); if (atomic_read(&urb->reject)) wake_up(&usb_kill_urb_queue); @@ -1665,6 +1672,13 @@ static void __usb_hcd_giveback_urb(struct urb *urb) usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); + /* + * Order the write of urb->use_count above before the read + * of urb->reject below. Pairs with the memory barriers in + * usb_kill_urb() and usb_poison_urb(). + */ + smp_mb__after_atomic(); + if (unlikely(atomic_read(&urb->reject))) wake_up(&usb_kill_urb_queue); usb_put_urb(urb); diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 30727729a44c..33d62d7e3929 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -715,6 +715,12 @@ void usb_kill_urb(struct urb *urb) if (!(urb && urb->dev && urb->ep)) return; atomic_inc(&urb->reject); + /* + * Order the write of urb->reject above before the read + * of urb->use_count below. Pairs with the barriers in + * __usb_hcd_giveback_urb() and usb_hcd_submit_urb(). + */ + smp_mb__after_atomic(); usb_hcd_unlink_urb(urb, -ENOENT); wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0); @@ -756,6 +762,12 @@ void usb_poison_urb(struct urb *urb) if (!urb) return; atomic_inc(&urb->reject); + /* + * Order the write of urb->reject above before the read + * of urb->use_count below. Pairs with the barriers in + * __usb_hcd_giveback_urb() and usb_hcd_submit_urb(). + */ + smp_mb__after_atomic(); if (!urb->dev || !urb->ep) return; diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 2bc03f41c70a..eee3504397e6 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -5097,7 +5097,7 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg) hsotg->gadget.speed = USB_SPEED_UNKNOWN; spin_unlock_irqrestore(&hsotg->lock, flags); - for (ep = 0; ep < hsotg->num_of_eps; ep++) { + for (ep = 1; ep < hsotg->num_of_eps; ep++) { if (hsotg->eps_in[ep]) dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep); if (hsotg->eps_out[ep]) diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index 9cc3ad701a29..e14ac15e24c3 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -102,14 +102,26 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) int ret; u32 reg; - usb3_phy = devm_phy_get(dev, "usb3-phy"); - if (PTR_ERR(usb3_phy) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; + usb3_phy = devm_phy_optional_get(dev, "usb3-phy"); + if (IS_ERR(usb3_phy)) { + ret = PTR_ERR(usb3_phy); + dev_err_probe(dev, ret, + "failed to get USB3 PHY\n"); goto err; - } else if (IS_ERR(usb3_phy)) { - usb3_phy = NULL; } + /* + * The following core resets are not required unless a USB3 PHY + * is used, and the subsequent register settings are not required + * unless a core reset is performed (they should be set properly + * by the first-stage boot loader, but may be reverted by a core + * reset). They may also break the configuration if USB3 is actually + * in use but the usb3-phy entry is missing from the device tree. + * Therefore, skip these operations in this case. + */ + if (!usb3_phy) + goto skip_usb3_phy; + crst = devm_reset_control_get_exclusive(dev, "usb_crst"); if (IS_ERR(crst)) { ret = PTR_ERR(crst); @@ -188,6 +200,7 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) goto err; } +skip_usb3_phy: /* * This routes the USB DMA traffic to go through FPD path instead * of reaching DDR directly. This traffic routing is needed to diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c index 1abf08e5164a..6803cd60cc6d 100644 --- a/drivers/usb/gadget/function/f_sourcesink.c +++ b/drivers/usb/gadget/function/f_sourcesink.c @@ -584,6 +584,7 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in, if (is_iso) { switch (speed) { + case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: size = ss->isoc_maxpacket * (ss->isoc_mult + 1) * diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c index dd0819df096e..9040a0561466 100644 --- a/drivers/usb/gadget/udc/at91_udc.c +++ b/drivers/usb/gadget/udc/at91_udc.c @@ -1895,7 +1895,7 @@ static int at91udc_probe(struct platform_device *pdev) at91_vbus_irq, 0, driver_name, udc); if (retval) { DBG("request vbus irq %d failed\n", - udc->board.vbus_pin); + desc_to_gpio(udc->board.vbus_pin)); goto err_unprepare_iclk; } } diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index c1edcc9b13ce..dc570ce4e831 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -437,6 +437,9 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev) struct xhci_hcd *xhci = hcd_to_xhci(hcd); int ret; + if (pm_runtime_suspended(dev)) + pm_runtime_resume(dev); + ret = xhci_priv_suspend_quirk(hcd); if (ret) return ret; diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 29191d33c0e3..1a05e3dcfec8 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2301,6 +2301,16 @@ UNUSUAL_DEV( 0x2027, 0xa001, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, US_FL_SCM_MULT_TARG ), +/* + * Reported by DocMAX <mail@vacharakis.de> + * and Thomas Weißschuh <linux@weissschuh.net> + */ +UNUSUAL_DEV( 0x2109, 0x0715, 0x9999, 0x9999, + "VIA Labs, Inc.", + "VL817 SATA Bridge", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001, "ST", "2A", diff --git a/drivers/usb/typec/port-mapper.c b/drivers/usb/typec/port-mapper.c index 07d307418b47..a7d507802509 100644 --- a/drivers/usb/typec/port-mapper.c +++ b/drivers/usb/typec/port-mapper.c @@ -56,7 +56,12 @@ int typec_link_ports(struct typec_port *con) { struct each_port_arg arg = { .port = con, .match = NULL }; + if (!has_acpi_companion(&con->dev)) + return 0; + bus_for_each_dev(&acpi_bus_type, NULL, &arg, typec_port_match); + if (!arg.match) + return 0; /* * REVISIT: Now each connector can have only a single component master. @@ -74,5 +79,6 @@ int typec_link_ports(struct typec_port *con) void typec_unlink_ports(struct typec_port *con) { - component_master_del(&con->dev, &typec_aggregate_ops); + if (has_acpi_companion(&con->dev)) + component_master_del(&con->dev, &typec_aggregate_ops); } diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index 35a1307349a2..e07d26a3cd8e 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -75,9 +75,25 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val) static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc) { struct tcpci *tcpci = tcpc_to_tcpci(tcpc); + bool vconn_pres; + enum typec_cc_polarity polarity = TYPEC_POLARITY_CC1; unsigned int reg; int ret; + ret = regmap_read(tcpci->regmap, TCPC_POWER_STATUS, ®); + if (ret < 0) + return ret; + + vconn_pres = !!(reg & TCPC_POWER_STATUS_VCONN_PRES); + if (vconn_pres) { + ret = regmap_read(tcpci->regmap, TCPC_TCPC_CTRL, ®); + if (ret < 0) + return ret; + + if (reg & TCPC_TCPC_CTRL_ORIENTATION) + polarity = TYPEC_POLARITY_CC2; + } + switch (cc) { case TYPEC_CC_RA: reg = (TCPC_ROLE_CTRL_CC_RA << TCPC_ROLE_CTRL_CC1_SHIFT) | @@ -112,6 +128,16 @@ static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc) break; } + if (vconn_pres) { + if (polarity == TYPEC_POLARITY_CC2) { + reg &= ~(TCPC_ROLE_CTRL_CC1_MASK << TCPC_ROLE_CTRL_CC1_SHIFT); + reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC1_SHIFT); + } else { + reg &= ~(TCPC_ROLE_CTRL_CC2_MASK << TCPC_ROLE_CTRL_CC2_SHIFT); + reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC2_SHIFT); + } + } + ret = regmap_write(tcpci->regmap, TCPC_ROLE_CTRL, reg); if (ret < 0) return ret; diff --git a/drivers/usb/typec/tcpm/tcpci.h b/drivers/usb/typec/tcpm/tcpci.h index 2be7a77d400e..b2edd45f13c6 100644 --- a/drivers/usb/typec/tcpm/tcpci.h +++ b/drivers/usb/typec/tcpm/tcpci.h @@ -98,6 +98,7 @@ #define TCPC_POWER_STATUS_SOURCING_VBUS BIT(4) #define TCPC_POWER_STATUS_VBUS_DET BIT(3) #define TCPC_POWER_STATUS_VBUS_PRES BIT(2) +#define TCPC_POWER_STATUS_VCONN_PRES BIT(1) #define TCPC_POWER_STATUS_SINKING_VBUS BIT(0) #define TCPC_FAULT_STATUS 0x1f diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 59d4fa2443f2..5fce795b69c7 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5156,7 +5156,8 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port) case SNK_TRYWAIT_DEBOUNCE: break; case SNK_ATTACH_WAIT: - tcpm_set_state(port, SNK_UNATTACHED, 0); + case SNK_DEBOUNCED: + /* Do nothing, as TCPM is still waiting for vbus to reaach VSAFE5V to connect */ break; case SNK_NEGOTIATE_CAPABILITIES: @@ -5263,6 +5264,10 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port) case PR_SWAP_SNK_SRC_SOURCE_ON: /* Do nothing, vsafe0v is expected during transition */ break; + case SNK_ATTACH_WAIT: + case SNK_DEBOUNCED: + /*Do nothing, still waiting for VSAFE5V for connect */ + break; default: if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled) tcpm_set_state(port, SNK_UNATTACHED, 0); diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index bff96d64dddf..6db7c8ddd51c 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -325,7 +325,7 @@ static int ucsi_ccg_init(struct ucsi_ccg *uc) if (status < 0) return status; - if (!data) + if (!(data & DEV_INT)) return 0; status = ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data)); diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 23999df52739..c8e0ea27caf1 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -287,8 +287,6 @@ struct hvfb_par { static uint screen_width = HVFB_WIDTH; static uint screen_height = HVFB_HEIGHT; -static uint screen_width_max = HVFB_WIDTH; -static uint screen_height_max = HVFB_HEIGHT; static uint screen_depth; static uint screen_fb_size; static uint dio_fb_size; /* FB size for deferred IO */ @@ -582,7 +580,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev) int ret = 0; unsigned long t; u8 index; - int i; memset(msg, 0, sizeof(struct synthvid_msg)); msg->vid_hdr.type = SYNTHVID_RESOLUTION_REQUEST; @@ -613,13 +610,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev) goto out; } - for (i = 0; i < msg->resolution_resp.resolution_count; i++) { - screen_width_max = max_t(unsigned int, screen_width_max, - msg->resolution_resp.supported_resolution[i].width); - screen_height_max = max_t(unsigned int, screen_height_max, - msg->resolution_resp.supported_resolution[i].height); - } - screen_width = msg->resolution_resp.supported_resolution[index].width; screen_height = @@ -941,7 +931,7 @@ static void hvfb_get_option(struct fb_info *info) if (x < HVFB_WIDTH_MIN || y < HVFB_HEIGHT_MIN || (synthvid_ver_ge(par->synthvid_version, SYNTHVID_VERSION_WIN10) && - (x > screen_width_max || y > screen_height_max)) || + (x * y * screen_depth / 8 > screen_fb_size)) || (par->synthvid_version == SYNTHVID_VERSION_WIN8 && x * y * screen_depth / 8 > SYNTHVID_FB_SIZE_WIN8) || (par->synthvid_version == SYNTHVID_VERSION_WIN7 && @@ -1194,8 +1184,8 @@ static int hvfb_probe(struct hv_device *hdev, } hvfb_get_option(info); - pr_info("Screen resolution: %dx%d, Color depth: %d\n", - screen_width, screen_height, screen_depth); + pr_info("Screen resolution: %dx%d, Color depth: %d, Frame buffer size: %d\n", + screen_width, screen_height, screen_depth, screen_fb_size); ret = hvfb_getmem(hdev, info); if (ret) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a5bd6926f7ff..d8af6620a941 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1214,6 +1214,35 @@ static int defrag_collect_targets(struct btrfs_inode *inode, goto next; /* + * Our start offset might be in the middle of an existing extent + * map, so take that into account. + */ + range_len = em->len - (cur - em->start); + /* + * If this range of the extent map is already flagged for delalloc, + * skip it, because: + * + * 1) We could deadlock later, when trying to reserve space for + * delalloc, because in case we can't immediately reserve space + * the flusher can start delalloc and wait for the respective + * ordered extents to complete. The deadlock would happen + * because we do the space reservation while holding the range + * locked, and starting writeback, or finishing an ordered + * extent, requires locking the range; + * + * 2) If there's delalloc there, it means there's dirty pages for + * which writeback has not started yet (we clean the delalloc + * flag when starting writeback and after creating an ordered + * extent). If we mark pages in an adjacent range for defrag, + * then we will have a larger contiguous range for delalloc, + * very likely resulting in a larger extent after writeback is + * triggered (except in a case of free space fragmentation). + */ + if (test_range_bit(&inode->io_tree, cur, cur + range_len - 1, + EXTENT_DELALLOC, 0, NULL)) + goto next; + + /* * For do_compress case, we want to compress all valid file * extents, thus no @extent_thresh or mergeable check. */ @@ -1221,7 +1250,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode, goto add; /* Skip too large extent */ - if (em->len >= extent_thresh) + if (range_len >= extent_thresh) goto next; next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, @@ -1442,9 +1471,11 @@ static int defrag_one_cluster(struct btrfs_inode *inode, list_for_each_entry(entry, &target_list, list) { u32 range_len = entry->len; - /* Reached the limit */ - if (max_sectors && max_sectors == *sectors_defragged) + /* Reached or beyond the limit */ + if (max_sectors && *sectors_defragged >= max_sectors) { + ret = 1; break; + } if (max_sectors) range_len = min_t(u32, range_len, @@ -1465,7 +1496,8 @@ static int defrag_one_cluster(struct btrfs_inode *inode, extent_thresh, newer_than, do_compress); if (ret < 0) break; - *sectors_defragged += range_len; + *sectors_defragged += range_len >> + inode->root->fs_info->sectorsize_bits; } out: list_for_each_entry_safe(entry, tmp, &target_list, list) { @@ -1484,6 +1516,12 @@ out: * @newer_than: minimum transid to defrag * @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode * will be defragged. + * + * Return <0 for error. + * Return >=0 for the number of sectors defragged, and range->start will be updated + * to indicate the file offset where next defrag should be started at. + * (Mostly for autodefrag, which sets @max_to_defrag thus we may exit early without + * defragging all the range). */ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, struct btrfs_ioctl_defrag_range_args *range, @@ -1499,6 +1537,7 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, int compress_type = BTRFS_COMPRESS_ZLIB; int ret = 0; u32 extent_thresh = range->extent_thresh; + pgoff_t start_index; if (isize == 0) return 0; @@ -1518,12 +1557,16 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, if (range->start + range->len > range->start) { /* Got a specific range */ - last_byte = min(isize, range->start + range->len) - 1; + last_byte = min(isize, range->start + range->len); } else { /* Defrag until file end */ - last_byte = isize - 1; + last_byte = isize; } + /* Align the range */ + cur = round_down(range->start, fs_info->sectorsize); + last_byte = round_up(last_byte, fs_info->sectorsize) - 1; + /* * If we were not given a ra, allocate a readahead context. As * readahead is just an optimization, defrag will work without it so @@ -1536,16 +1579,26 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, file_ra_state_init(ra, inode->i_mapping); } - /* Align the range */ - cur = round_down(range->start, fs_info->sectorsize); - last_byte = round_up(last_byte, fs_info->sectorsize) - 1; + /* + * Make writeback start from the beginning of the range, so that the + * defrag range can be written sequentially. + */ + start_index = cur >> PAGE_SHIFT; + if (start_index < inode->i_mapping->writeback_index) + inode->i_mapping->writeback_index = start_index; while (cur < last_byte) { + const unsigned long prev_sectors_defragged = sectors_defragged; u64 cluster_end; /* The cluster size 256K should always be page aligned */ BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE)); + if (btrfs_defrag_cancelled(fs_info)) { + ret = -EAGAIN; + break; + } + /* We want the cluster end at page boundary when possible */ cluster_end = (((cur >> PAGE_SHIFT) + (SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1; @@ -1567,14 +1620,27 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, cluster_end + 1 - cur, extent_thresh, newer_than, do_compress, §ors_defragged, max_to_defrag); + + if (sectors_defragged > prev_sectors_defragged) + balance_dirty_pages_ratelimited(inode->i_mapping); + btrfs_inode_unlock(inode, 0); if (ret < 0) break; cur = cluster_end + 1; + if (ret > 0) { + ret = 0; + break; + } } if (ra_allocated) kfree(ra); + /* + * Update range.start for autodefrag, this will indicate where to start + * in next run. + */ + range->start = cur; if (sectors_defragged) { /* * We have defragged some sectors, for compression case they @@ -3086,10 +3152,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, btrfs_inode_lock(inode, 0); err = btrfs_delete_subvolume(dir, dentry); btrfs_inode_unlock(inode, 0); - if (!err) { - fsnotify_rmdir(dir, dentry); - d_delete(dentry); - } + if (!err) + d_delete_notify(dir, dentry); out_dput: dput(dentry); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7d305b974824..b472cd066d1c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2218,6 +2218,7 @@ static int unsafe_request_wait(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; + unsigned int max_sessions; int ret, err = 0; spin_lock(&ci->i_unsafe_lock); @@ -2236,36 +2237,44 @@ static int unsafe_request_wait(struct inode *inode) spin_unlock(&ci->i_unsafe_lock); /* + * The mdsc->max_sessions is unlikely to be changed + * mostly, here we will retry it by reallocating the + * sessions array memory to get rid of the mdsc->mutex + * lock. + */ +retry: + max_sessions = mdsc->max_sessions; + + /* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */ - if (req1 || req2) { + if ((req1 || req2) && likely(max_sessions)) { struct ceph_mds_session **sessions = NULL; struct ceph_mds_session *s; struct ceph_mds_request *req; - unsigned int max; int i; - /* - * The mdsc->max_sessions is unlikely to be changed - * mostly, here we will retry it by reallocating the - * sessions arrary memory to get rid of the mdsc->mutex - * lock. - */ -retry: - max = mdsc->max_sessions; - sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO); - if (!sessions) - return -ENOMEM; + sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL); + if (!sessions) { + err = -ENOMEM; + goto out; + } spin_lock(&ci->i_unsafe_lock); if (req1) { list_for_each_entry(req, &ci->i_unsafe_dirops, r_unsafe_dir_item) { s = req->r_session; - if (unlikely(s->s_mds >= max)) { + if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); + for (i = 0; i < max_sessions; i++) { + s = sessions[i]; + if (s) + ceph_put_mds_session(s); + } + kfree(sessions); goto retry; } if (!sessions[s->s_mds]) { @@ -2278,8 +2287,14 @@ retry: list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; - if (unlikely(s->s_mds >= max)) { + if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); + for (i = 0; i < max_sessions; i++) { + s = sessions[i]; + if (s) + ceph_put_mds_session(s); + } + kfree(sessions); goto retry; } if (!sessions[s->s_mds]) { @@ -2300,7 +2315,7 @@ retry: spin_unlock(&ci->i_ceph_lock); /* send flush mdlog request to MDSes */ - for (i = 0; i < max; i++) { + for (i = 0; i < max_sessions; i++) { s = sessions[i]; if (s) { send_flush_mdlog(s); @@ -2317,15 +2332,19 @@ retry: ceph_timeout_jiffies(req1->r_timeout)); if (ret) err = -EIO; - ceph_mdsc_put_request(req1); } if (req2) { ret = !wait_for_completion_timeout(&req2->r_safe_completion, ceph_timeout_jiffies(req2->r_timeout)); if (ret) err = -EIO; - ceph_mdsc_put_request(req2); } + +out: + if (req1) + ceph_mdsc_put_request(req1); + if (req2) + ceph_mdsc_put_request(req2); return err; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5b9104b8e453..bbed3224ad68 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -583,6 +583,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, struct ceph_inode_info *ci = ceph_inode(dir); struct inode *inode; struct timespec64 now; + struct ceph_string *pool_ns; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_vino vino = { .ino = req->r_deleg_ino, .snap = CEPH_NOSNAP }; @@ -632,6 +633,12 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, in.max_size = cpu_to_le64(lo->stripe_unit); ceph_file_layout_to_legacy(lo, &in.layout); + /* lo is private, so pool_ns can't change */ + pool_ns = rcu_dereference_raw(lo->pool_ns); + if (pool_ns) { + iinfo.pool_ns_len = pool_ns->len; + iinfo.pool_ns_data = pool_ns->str; + } down_read(&mdsc->snap_rwsem); ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session, @@ -750,8 +757,10 @@ retry: restore_deleg_ino(dir, req->r_deleg_ino); ceph_mdsc_put_request(req); try_async = false; + ceph_put_string(rcu_dereference_raw(lo.pool_ns)); goto retry; } + ceph_put_string(rcu_dereference_raw(lo.pool_ns)); goto out_req; } } diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 1466b5d01cbb..d3cd2a94d1e8 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1780,8 +1780,8 @@ void configfs_unregister_group(struct config_group *group) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); + d_drop(dentry); fsnotify_rmdir(d_inode(parent), dentry); - d_delete(dentry); inode_unlock(d_inode(parent)); dput(dentry); @@ -1922,10 +1922,10 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); - fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(dentry)); - d_delete(dentry); + d_drop(dentry); + fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(root)); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 42e5a766d33c..4f25015aa534 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -621,8 +621,8 @@ void devpts_pty_kill(struct dentry *dentry) dentry->d_fsdata = NULL; drop_nlink(dentry->d_inode); - fsnotify_unlink(d_inode(dentry->d_parent), dentry); d_drop(dentry); + fsnotify_unlink(d_inode(dentry->d_parent), dentry); dput(dentry); /* d_alloc_name() in devpts_pty_new() */ } diff --git a/fs/io_uring.c b/fs/io_uring.c index e54c4127422e..2e04f718319d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7822,10 +7822,15 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref) struct io_ring_ctx *ctx = node->rsrc_data->ctx; unsigned long flags; bool first_add = false; + unsigned long delay = HZ; spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); node->done = true; + /* if we are mid-quiesce then do not delay */ + if (node->rsrc_data->quiesce) + delay = 0; + while (!list_empty(&ctx->rsrc_ref_list)) { node = list_first_entry(&ctx->rsrc_ref_list, struct io_rsrc_node, node); @@ -7838,10 +7843,10 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref) spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); if (first_add) - mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ); + mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); } -static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx) +static struct io_rsrc_node *io_rsrc_node_alloc(void) { struct io_rsrc_node *ref_node; @@ -7892,7 +7897,7 @@ static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx) { if (ctx->rsrc_backup_node) return 0; - ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx); + ctx->rsrc_backup_node = io_rsrc_node_alloc(); return ctx->rsrc_backup_node ? 0 : -ENOMEM; } diff --git a/fs/namei.c b/fs/namei.c index b867a92c078e..3f1829b3ab5b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4024,13 +4024,12 @@ int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir, dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); detach_mounts(dentry); - fsnotify_rmdir(dir, dentry); out: inode_unlock(dentry->d_inode); dput(dentry); if (!error) - d_delete(dentry); + d_delete_notify(dir, dentry); return error; } EXPORT_SYMBOL(vfs_rmdir); @@ -4152,7 +4151,6 @@ int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir, if (!error) { dont_mount(dentry); detach_mounts(dentry); - fsnotify_unlink(dir, dentry); } } } @@ -4160,9 +4158,11 @@ out: inode_unlock(target); /* We don't d_delete() NFS sillyrenamed files--they still exist. */ - if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { + if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) { + fsnotify_unlink(dir, dentry); + } else if (!error) { fsnotify_link_count(target); - d_delete(dentry); + d_delete_notify(dir, dentry); } return error; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 6a2033131c06..ccd4f245cae2 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -170,7 +170,7 @@ struct cb_devicenotifyitem { }; struct cb_devicenotifyargs { - int ndevs; + uint32_t ndevs; struct cb_devicenotifyitem *devs; }; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 09c5b1cb3e07..c343666d9a42 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -358,7 +358,7 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, struct cb_process_state *cps) { struct cb_devicenotifyargs *args = argp; - int i; + uint32_t i; __be32 res = 0; struct nfs_client *clp = cps->clp; struct nfs_server *server = NULL; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index a67c41ec545f..f90de8043b0f 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -258,11 +258,9 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, void *argp) { struct cb_devicenotifyargs *args = argp; + uint32_t tmp, n, i; __be32 *p; __be32 status = 0; - u32 tmp; - int n, i; - args->ndevs = 0; /* Num of device notifications */ p = xdr_inline_decode(xdr, sizeof(uint32_t)); @@ -271,7 +269,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, goto out; } n = ntohl(*p++); - if (n <= 0) + if (n == 0) goto out; if (n > ULONG_MAX / sizeof(*args->devs)) { status = htonl(NFS4ERR_BADXDR); @@ -330,19 +328,21 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, dev->cbd_immediate = 0; } - args->ndevs++; - dprintk("%s: type %d layout 0x%x immediate %d\n", __func__, dev->cbd_notify_type, dev->cbd_layout_type, dev->cbd_immediate); } + args->ndevs = n; + dprintk("%s: ndevs %d\n", __func__, args->ndevs); + return 0; +err: + kfree(args->devs); out: + args->devs = NULL; + args->ndevs = 0; dprintk("%s: status %d ndevs %d\n", __func__, ntohl(status), args->ndevs); return status; -err: - kfree(args->devs); - goto out; } static __be32 decode_sessionid(struct xdr_stream *xdr, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8d8b85b5a641..f18e80fda9bf 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -856,6 +856,13 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str server->namelen = pathinfo.max_namelen; } + if (clp->rpc_ops->discover_trunking != NULL && + (server->caps & NFS_CAP_FS_LOCATIONS)) { + error = clp->rpc_ops->discover_trunking(server, mntfh); + if (error < 0) + return error; + } + return 0; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 347793626f19..848f3b8fb821 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1325,6 +1325,14 @@ void nfs_clear_verifier_delegated(struct inode *inode) EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated); #endif /* IS_ENABLED(CONFIG_NFS_V4) */ +static int nfs_dentry_verify_change(struct inode *dir, struct dentry *dentry) +{ + if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE) && + d_really_is_negative(dentry)) + return dentry->d_time == inode_peek_iversion_raw(dir); + return nfs_verify_change_attribute(dir, dentry->d_time); +} + /* * A check for whether or not the parent directory has changed. * In the case it has, we assume that the dentries are untrustworthy @@ -1338,7 +1346,7 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, return 1; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) return 0; - if (!nfs_verify_change_attribute(dir, dentry->d_time)) + if (!nfs_dentry_verify_change(dir, dentry)) return 0; /* Revalidate nfsi->cache_change_attribute before we declare a match */ if (nfs_mapping_need_revalidate_inode(dir)) { @@ -1347,7 +1355,7 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) return 0; } - if (!nfs_verify_change_attribute(dir, dentry->d_time)) + if (!nfs_dentry_verify_change(dir, dentry)) return 0; return 1; } @@ -1437,6 +1445,9 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; + /* Case insensitive server? Revalidate negative dentries */ + if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) + return 1; return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU); } @@ -1537,7 +1548,7 @@ out: * If the lookup failed despite the dentry change attribute being * a match, then we should revalidate the directory cache. */ - if (!ret && nfs_verify_change_attribute(dir, dentry->d_time)) + if (!ret && nfs_dentry_verify_change(dir, dentry)) nfs_mark_dir_for_revalidate(dir); return nfs_lookup_revalidate_done(dir, dentry, inode, ret); } @@ -1776,8 +1787,11 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in dir_verifier = nfs_save_change_attribute(dir); trace_nfs_lookup_enter(dir, dentry, flags); error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); - if (error == -ENOENT) + if (error == -ENOENT) { + if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) + dir_verifier = inode_peek_iversion_raw(dir); goto no_entry; + } if (error < 0) { res = ERR_PTR(error); goto out; @@ -1806,6 +1820,14 @@ out: } EXPORT_SYMBOL_GPL(nfs_lookup); +void nfs_d_prune_case_insensitive_aliases(struct inode *inode) +{ + /* Case insensitive server? Revalidate dentries */ + if (inode && nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE)) + d_prune_aliases(inode); +} +EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases); + #if IS_ENABLED(CONFIG_NFS_V4) static int nfs4_lookup_revalidate(struct dentry *, unsigned int); @@ -1867,6 +1889,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; unsigned int lookup_flags = 0; + unsigned long dir_verifier; bool switched = false; int created = 0; int err; @@ -1940,7 +1963,11 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, switch (err) { case -ENOENT: d_splice_alias(NULL, dentry); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) + dir_verifier = inode_peek_iversion_raw(dir); + else + dir_verifier = nfs_save_change_attribute(dir); + nfs_set_verifier(dentry, dir_verifier); break; case -EISDIR: case -ENOTDIR: @@ -1968,6 +1995,24 @@ out: no_open: res = nfs_lookup(dir, dentry, lookup_flags); + if (!res) { + inode = d_inode(dentry); + if ((lookup_flags & LOOKUP_DIRECTORY) && inode && + !S_ISDIR(inode->i_mode)) + res = ERR_PTR(-ENOTDIR); + else if (inode && S_ISREG(inode->i_mode)) + res = ERR_PTR(-EOPENSTALE); + } else if (!IS_ERR(res)) { + inode = d_inode(res); + if ((lookup_flags & LOOKUP_DIRECTORY) && inode && + !S_ISDIR(inode->i_mode)) { + dput(res); + res = ERR_PTR(-ENOTDIR); + } else if (inode && S_ISREG(inode->i_mode)) { + dput(res); + res = ERR_PTR(-EOPENSTALE); + } + } if (switched) { d_lookup_done(dentry); if (!res) @@ -2186,8 +2231,10 @@ static void nfs_dentry_remove_handle_error(struct inode *dir, switch (error) { case -ENOENT: d_delete(dentry); - fallthrough; + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + break; case 0: + nfs_d_prune_case_insensitive_aliases(d_inode(dentry)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); } } @@ -2380,6 +2427,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) trace_nfs_link_enter(inode, dir, dentry); d_drop(dentry); + if (S_ISREG(inode->i_mode)) + nfs_sync_inode(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); @@ -2469,6 +2518,8 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, } } + if (S_ISREG(old_inode->i_mode)) + nfs_sync_inode(old_inode); task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); if (IS_ERR(task)) { error = PTR_ERR(task); @@ -2529,7 +2580,7 @@ MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache lengt static void nfs_access_free_entry(struct nfs_access_entry *entry) { - put_cred(entry->cred); + put_group_info(entry->group_info); kfree_rcu(entry, rcu_head); smp_mb__before_atomic(); atomic_long_dec(&nfs_access_nr_entries); @@ -2655,6 +2706,43 @@ void nfs_access_zap_cache(struct inode *inode) } EXPORT_SYMBOL_GPL(nfs_access_zap_cache); +static int access_cmp(const struct cred *a, const struct nfs_access_entry *b) +{ + struct group_info *ga, *gb; + int g; + + if (uid_lt(a->fsuid, b->fsuid)) + return -1; + if (uid_gt(a->fsuid, b->fsuid)) + return 1; + + if (gid_lt(a->fsgid, b->fsgid)) + return -1; + if (gid_gt(a->fsgid, b->fsgid)) + return 1; + + ga = a->group_info; + gb = b->group_info; + if (ga == gb) + return 0; + if (ga == NULL) + return -1; + if (gb == NULL) + return 1; + if (ga->ngroups < gb->ngroups) + return -1; + if (ga->ngroups > gb->ngroups) + return 1; + + for (g = 0; g < ga->ngroups; g++) { + if (gid_lt(ga->gid[g], gb->gid[g])) + return -1; + if (gid_gt(ga->gid[g], gb->gid[g])) + return 1; + } + return 0; +} + static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred) { struct rb_node *n = NFS_I(inode)->access_cache.rb_node; @@ -2662,7 +2750,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co while (n != NULL) { struct nfs_access_entry *entry = rb_entry(n, struct nfs_access_entry, rb_node); - int cmp = cred_fscmp(cred, entry->cred); + int cmp = access_cmp(cred, entry); if (cmp < 0) n = n->rb_left; @@ -2674,7 +2762,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co return NULL; } -static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block) +static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_access_entry *cache; @@ -2704,8 +2792,7 @@ static int nfs_access_get_cached_locked(struct inode *inode, const struct cred * spin_lock(&inode->i_lock); retry = false; } - res->cred = cache->cred; - res->mask = cache->mask; + *mask = cache->mask; list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru); err = 0; out: @@ -2717,7 +2804,7 @@ out_zap: return -ENOENT; } -static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res) +static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask) { /* Only check the most recently returned cache entry, * but do it without locking. @@ -2733,35 +2820,36 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru)); cache = list_entry(lh, struct nfs_access_entry, lru); if (lh == &nfsi->access_cache_entry_lru || - cred_fscmp(cred, cache->cred) != 0) + access_cmp(cred, cache) != 0) cache = NULL; if (cache == NULL) goto out; if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; - res->cred = cache->cred; - res->mask = cache->mask; + *mask = cache->mask; err = 0; out: rcu_read_unlock(); return err; } -int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct -nfs_access_entry *res, bool may_block) +int nfs_access_get_cached(struct inode *inode, const struct cred *cred, + u32 *mask, bool may_block) { int status; - status = nfs_access_get_cached_rcu(inode, cred, res); + status = nfs_access_get_cached_rcu(inode, cred, mask); if (status != 0) - status = nfs_access_get_cached_locked(inode, cred, res, + status = nfs_access_get_cached_locked(inode, cred, mask, may_block); return status; } EXPORT_SYMBOL_GPL(nfs_access_get_cached); -static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) +static void nfs_access_add_rbtree(struct inode *inode, + struct nfs_access_entry *set, + const struct cred *cred) { struct nfs_inode *nfsi = NFS_I(inode); struct rb_root *root_node = &nfsi->access_cache; @@ -2774,7 +2862,7 @@ static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry * while (*p != NULL) { parent = *p; entry = rb_entry(parent, struct nfs_access_entry, rb_node); - cmp = cred_fscmp(set->cred, entry->cred); + cmp = access_cmp(cred, entry); if (cmp < 0) p = &parent->rb_left; @@ -2796,13 +2884,16 @@ found: nfs_access_free_entry(entry); } -void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set, + const struct cred *cred) { struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) return; RB_CLEAR_NODE(&cache->rb_node); - cache->cred = get_cred(set->cred); + cache->fsuid = cred->fsuid; + cache->fsgid = cred->fsgid; + cache->group_info = get_group_info(cred->group_info); cache->mask = set->mask; /* The above field assignments must be visible @@ -2810,7 +2901,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) * use rcu_assign_pointer, so just force the memory barrier. */ smp_wmb(); - nfs_access_add_rbtree(inode, cache); + nfs_access_add_rbtree(inode, cache, cred); /* Update accounting */ smp_mb__before_atomic(); @@ -2875,7 +2966,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) trace_nfs_access_enter(inode); - status = nfs_access_get_cached(inode, cred, &cache, may_block); + status = nfs_access_get_cached(inode, cred, &cache.mask, may_block); if (status == 0) goto out_cached; @@ -2895,8 +2986,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP; else cache.mask |= NFS_ACCESS_EXECUTE; - cache.cred = cred; - status = NFS_PROTO(inode)->access(inode, &cache); + status = NFS_PROTO(inode)->access(inode, &cache, cred); if (status != 0) { if (status == -ESTALE) { if (!S_ISDIR(inode->i_mode)) @@ -2906,7 +2996,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) } goto out; } - nfs_access_add_cache(inode, &cache); + nfs_access_add_cache(inode, &cache, cred); out_cached: cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode); if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index 79323b5dab0c..aed0748fd6ec 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h @@ -51,7 +51,7 @@ struct nfs4_file_layout_dsaddr { u32 stripe_count; u8 *stripe_indices; u32 ds_num; - struct nfs4_pnfs_ds *ds_list[1]; + struct nfs4_pnfs_ds *ds_list[]; }; struct nfs4_filelayout_segment { diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 86c3f7e69ec4..acf4b88889dc 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -136,9 +136,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out_err_free_stripe_indices; } - dsaddr = kzalloc(sizeof(*dsaddr) + - (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), - gfp_flags); + dsaddr = kzalloc(struct_size(dsaddr, ds_list, num), gfp_flags); if (!dsaddr) goto out_err_free_stripe_indices; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 12f6acb483bb..2de7c56a1fbe 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -373,6 +373,7 @@ extern unsigned long nfs_access_cache_count(struct shrinker *shrink, extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc); struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); +void nfs_d_prune_case_insensitive_aliases(struct inode *inode); int nfs_create(struct user_namespace *, struct inode *, struct dentry *, umode_t, bool); int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7100514d306b..1597eef40d54 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -220,7 +220,8 @@ static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle, task_flags); } -static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) +static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry, + const struct cred *cred) { struct nfs3_accessargs arg = { .fh = NFS_FH(inode), @@ -231,7 +232,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS], .rpc_argp = &arg, .rpc_resp = &res, - .rpc_cred = entry->cred, + .rpc_cred = cred, }; int status = -ENOMEM; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 8b21ff1be717..32129446beca 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -46,7 +46,7 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, { struct inode *inode = file_inode(filep); struct nfs_server *server = NFS_SERVER(inode); - u32 bitmask[3]; + u32 bitmask[NFS_BITMASK_SZ]; struct nfs42_falloc_args args = { .falloc_fh = NFS_FH(inode), .falloc_offset = offset, @@ -69,9 +69,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, return status; } - memcpy(bitmask, server->cache_consistency_bitmask, sizeof(bitmask)); - if (server->attr_bitmask[1] & FATTR4_WORD1_SPACE_USED) - bitmask[1] |= FATTR4_WORD1_SPACE_USED; + nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask, inode, + NFS_INO_INVALID_BLOCKS); res.falloc_fattr = nfs_alloc_fattr(); if (!res.falloc_fattr) @@ -1044,13 +1043,14 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, struct inode *src_inode = file_inode(src_f); struct inode *dst_inode = file_inode(dst_f); struct nfs_server *server = NFS_SERVER(dst_inode); + __u32 dst_bitmask[NFS_BITMASK_SZ]; struct nfs42_clone_args args = { .src_fh = NFS_FH(src_inode), .dst_fh = NFS_FH(dst_inode), .src_offset = src_offset, .dst_offset = dst_offset, .count = count, - .dst_bitmask = server->cache_consistency_bitmask, + .dst_bitmask = dst_bitmask, }; struct nfs42_clone_res res = { .server = server, @@ -1079,6 +1079,9 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, if (!res.dst_fattr) return -ENOMEM; + nfs4_bitmask_set(dst_bitmask, server->cache_consistency_bitmask, + dst_inode, NFS_INO_INVALID_BLOCKS); + status = nfs4_call_sync(server->client, server, msg, &args.seq_args, &res.seq_res, 0); trace_nfs4_clone(src_inode, dst_inode, &args, status); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ed5eaca6801e..84f39b6f1b1e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -260,8 +260,8 @@ struct nfs4_state_maintenance_ops { }; struct nfs4_mig_recovery_ops { - int (*get_locations)(struct inode *, struct nfs4_fs_locations *, - struct page *, const struct cred *); + int (*get_locations)(struct nfs_server *, struct nfs_fh *, + struct nfs4_fs_locations *, struct page *, const struct cred *); int (*fsid_present)(struct inode *, const struct cred *); }; @@ -280,7 +280,8 @@ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, int nfs4_submount(struct fs_context *, struct nfs_server *); int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); - +size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, + size_t salen, struct net *net, int port); /* nfs4proc.c */ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); extern int nfs4_async_handle_error(struct rpc_task *task, @@ -302,8 +303,9 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, struct nfs4_fs_locations *, struct page *); -extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *, - struct page *page, const struct cred *); +extern int nfs4_proc_get_locations(struct nfs_server *, struct nfs_fh *, + struct nfs4_fs_locations *, + struct page *page, const struct cred *); extern int nfs4_proc_fsid_present(struct inode *, const struct cred *); extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct dentry *, @@ -315,6 +317,8 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, fmode_t fmode); +extern void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[], + struct inode *inode, unsigned long cache_validity); extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct inode *inode); extern int update_open_stateid(struct nfs4_state *state, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index d8b5a250ca05..47a6cf892c95 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1343,8 +1343,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, } nfs_put_client(clp); - if (server->nfs_client->cl_hostname == NULL) + if (server->nfs_client->cl_hostname == NULL) { server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); + if (server->nfs_client->cl_hostname == NULL) + return -ENOMEM; + } nfs_server_insert_lists(server); return nfs_probe_server(server, NFS_FH(d_inode(server->super->s_root))); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 873342308dc0..3680c8da510c 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -164,16 +164,21 @@ static int nfs4_validate_fspath(struct dentry *dentry, return 0; } -static size_t nfs_parse_server_name(char *string, size_t len, - struct sockaddr *sa, size_t salen, struct net *net) +size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, + size_t salen, struct net *net, int port) { ssize_t ret; ret = rpc_pton(net, string, len, sa, salen); if (ret == 0) { - ret = nfs_dns_resolve_name(net, string, len, sa, salen); - if (ret < 0) - ret = 0; + ret = rpc_uaddr2sockaddr(net, string, len, sa, salen); + if (ret == 0) { + ret = nfs_dns_resolve_name(net, string, len, sa, salen); + if (ret < 0) + ret = 0; + } + } else if (port) { + rpc_set_port(sa, port); } return ret; } @@ -328,7 +333,7 @@ static int try_location(struct fs_context *fc, nfs_parse_server_name(buf->data, buf->len, &ctx->nfs_server.address, sizeof(ctx->nfs_server._address), - fc->net_ns); + fc->net_ns, 0); if (ctx->nfs_server.addrlen == 0) continue; @@ -496,7 +501,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, continue; salen = nfs_parse_server_name(buf->data, buf->len, - sap, addr_bufsize, net); + sap, addr_bufsize, net, 0); if (salen == 0) continue; rpc_set_port(sap, NFS_PORT); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ee3bc79f6ca3..b18f31b2c9e7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -108,10 +108,6 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, const struct cred *, bool); #endif -static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], - const __u32 *src, struct inode *inode, - struct nfs_server *server, - struct nfs4_label *label); #ifdef CONFIG_NFS_V4_SECURITY_LABEL static inline struct nfs4_label * @@ -2653,9 +2649,8 @@ static int nfs4_opendata_access(const struct cred *cred, } else if ((fmode & FMODE_READ) && !opendata->file_created) mask = NFS4_ACCESS_READ; - cache.cred = cred; nfs_access_set_mask(&cache, opendata->o_res.access_result); - nfs_access_add_cache(state->inode, &cache); + nfs_access_add_cache(state->inode, &cache, cred); flags = NFS4_ACCESS_READ | NFS4_ACCESS_EXECUTE | NFS4_ACCESS_LOOKUP; if ((mask & ~cache.mask & flags) == 0) @@ -3670,7 +3665,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (!nfs4_have_delegation(inode, FMODE_READ)) { nfs4_bitmask_set(calldata->arg.bitmask_store, server->cache_consistency_bitmask, - inode, server, NULL); + inode, 0); calldata->arg.bitmask = calldata->arg.bitmask_store; } else calldata->arg.bitmask = NULL; @@ -3841,7 +3836,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f FATTR4_WORD0_FH_EXPIRE_TYPE | FATTR4_WORD0_LINK_SUPPORT | FATTR4_WORD0_SYMLINK_SUPPORT | - FATTR4_WORD0_ACLSUPPORT; + FATTR4_WORD0_ACLSUPPORT | + FATTR4_WORD0_CASE_INSENSITIVE | + FATTR4_WORD0_CASE_PRESERVING; if (minorversion) bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT; @@ -3870,10 +3867,16 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->caps |= NFS_CAP_HARDLINKS; if (res.has_symlinks != 0) server->caps |= NFS_CAP_SYMLINKS; + if (res.case_insensitive) + server->caps |= NFS_CAP_CASE_INSENSITIVE; + if (res.case_preserving) + server->caps |= NFS_CAP_CASE_PRESERVING; #ifdef CONFIG_NFS_V4_SECURITY_LABEL if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL) server->caps |= NFS_CAP_SECURITY_LABEL; #endif + if (res.attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS) + server->caps |= NFS_CAP_FS_LOCATIONS; if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID)) server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID; if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE)) @@ -3932,6 +3935,114 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) return err; } +static void test_fs_location_for_trunking(struct nfs4_fs_location *location, + struct nfs_client *clp, + struct nfs_server *server) +{ + int i; + + for (i = 0; i < location->nservers; i++) { + struct nfs4_string *srv_loc = &location->servers[i]; + struct sockaddr addr; + size_t addrlen; + struct xprt_create xprt_args = { + .ident = 0, + .net = clp->cl_net, + }; + struct nfs4_add_xprt_data xprtdata = { + .clp = clp, + }; + struct rpc_add_xprt_test rpcdata = { + .add_xprt_test = clp->cl_mvops->session_trunk, + .data = &xprtdata, + }; + char *servername = NULL; + + if (!srv_loc->len) + continue; + + addrlen = nfs_parse_server_name(srv_loc->data, srv_loc->len, + &addr, sizeof(addr), + clp->cl_net, server->port); + if (!addrlen) + return; + xprt_args.dstaddr = &addr; + xprt_args.addrlen = addrlen; + servername = kmalloc(srv_loc->len + 1, GFP_KERNEL); + if (!servername) + return; + memcpy(servername, srv_loc->data, srv_loc->len); + servername[srv_loc->len] = '\0'; + xprt_args.servername = servername; + + xprtdata.cred = nfs4_get_clid_cred(clp); + rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, + rpc_clnt_setup_test_and_add_xprt, + &rpcdata); + if (xprtdata.cred) + put_cred(xprtdata.cred); + kfree(servername); + } +} + +static int _nfs4_discover_trunking(struct nfs_server *server, + struct nfs_fh *fhandle) +{ + struct nfs4_fs_locations *locations = NULL; + struct page *page; + const struct cred *cred; + struct nfs_client *clp = server->nfs_client; + const struct nfs4_state_maintenance_ops *ops = + clp->cl_mvops->state_renewal_ops; + int status = -ENOMEM, i; + + cred = ops->get_state_renewal_cred(clp); + if (cred == NULL) { + cred = nfs4_get_clid_cred(clp); + if (cred == NULL) + return -ENOKEY; + } + + page = alloc_page(GFP_KERNEL); + locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (page == NULL || locations == NULL) + goto out; + + status = nfs4_proc_get_locations(server, fhandle, locations, page, + cred); + if (status) + goto out; + + for (i = 0; i < locations->nlocations; i++) + test_fs_location_for_trunking(&locations->locations[i], clp, + server); +out: + if (page) + __free_page(page); + kfree(locations); + return status; +} + +static int nfs4_discover_trunking(struct nfs_server *server, + struct nfs_fh *fhandle) +{ + struct nfs4_exception exception = { + .interruptible = true, + }; + struct nfs_client *clp = server->nfs_client; + int err = 0; + + if (!nfs4_has_session(clp)) + goto out; + do { + err = nfs4_handle_exception(server, + _nfs4_discover_trunking(server, fhandle), + &exception); + } while (exception.retry); +out: + return err; +} + static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { @@ -4441,7 +4552,8 @@ static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle, return err; } -static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) +static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry, + const struct cred *cred) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_accessargs args = { @@ -4455,7 +4567,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], .rpc_argp = &args, .rpc_resp = &res, - .rpc_cred = entry->cred, + .rpc_cred = cred, }; int status = 0; @@ -4475,14 +4587,15 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry return status; } -static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) +static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry, + const struct cred *cred) { struct nfs4_exception exception = { .interruptible = true, }; int err; do { - err = _nfs4_proc_access(inode, entry); + err = _nfs4_proc_access(inode, entry, cred); trace_nfs4_access(inode, err); err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); @@ -4663,8 +4776,10 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, nfs_fattr_init(res->dir_attr); - if (inode) + if (inode) { nfs4_inode_return_delegation(inode); + nfs_d_prune_case_insensitive_aliases(inode); + } } static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) @@ -4730,6 +4845,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, return 0; if (task->tk_status == 0) { + nfs_d_prune_case_insensitive_aliases(d_inode(data->old_dentry)); if (new_dir != old_dir) { /* Note: If we moved a directory, nlink will change */ nfs4_update_changeattr(old_dir, &res->old_cinfo, @@ -5422,14 +5538,14 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } -static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src, - struct inode *inode, struct nfs_server *server, - struct nfs4_label *label) +void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[], + struct inode *inode, unsigned long cache_validity) { - unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + struct nfs_server *server = NFS_SERVER(inode); unsigned int i; memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ); + cache_validity |= READ_ONCE(NFS_I(inode)->cache_validity); if (cache_validity & NFS_INO_INVALID_CHANGE) bitmask[0] |= FATTR4_WORD0_CHANGE; @@ -5441,8 +5557,6 @@ static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src, bitmask[1] |= FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP; if (cache_validity & NFS_INO_INVALID_NLINK) bitmask[1] |= FATTR4_WORD1_NUMLINKS; - if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL) - bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL; if (cache_validity & NFS_INO_INVALID_CTIME) bitmask[1] |= FATTR4_WORD1_TIME_METADATA; if (cache_validity & NFS_INO_INVALID_MTIME) @@ -5469,7 +5583,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, } else { nfs4_bitmask_set(hdr->args.bitmask_store, server->cache_consistency_bitmask, - hdr->inode, server, NULL); + hdr->inode, NFS_INO_INVALID_BLOCKS); hdr->args.bitmask = hdr->args.bitmask_store; } @@ -6507,8 +6621,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; nfs4_bitmask_set(data->args.bitmask_store, - server->cache_consistency_bitmask, inode, server, - NULL); + server->cache_consistency_bitmask, inode, 0); data->args.bitmask = data->args.bitmask_store; nfs_copy_fh(&data->fh, NFS_FH(inode)); nfs4_stateid_copy(&data->stateid, stateid); @@ -7611,7 +7724,7 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, const char *key, const void *buf, size_t buflen, int flags) { - struct nfs_access_entry cache; + u32 mask; int ret; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) @@ -7626,8 +7739,8 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, * do a cached access check for the XA* flags to possibly avoid * doing an RPC and getting EACCES back. */ - if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { - if (!(cache.mask & NFS_ACCESS_XAWRITE)) + if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { + if (!(mask & NFS_ACCESS_XAWRITE)) return -EACCES; } @@ -7648,14 +7761,14 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *key, void *buf, size_t buflen) { - struct nfs_access_entry cache; + u32 mask; ssize_t ret; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) return -EOPNOTSUPP; - if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { - if (!(cache.mask & NFS_ACCESS_XAREAD)) + if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { + if (!(mask & NFS_ACCESS_XAREAD)) return -EACCES; } @@ -7680,13 +7793,13 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) ssize_t ret, size; char *buf; size_t buflen; - struct nfs_access_entry cache; + u32 mask; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) return 0; - if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { - if (!(cache.mask & NFS_ACCESS_XALIST)) + if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { + if (!(mask & NFS_ACCESS_XALIST)) return 0; } @@ -7818,18 +7931,18 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, * appended to this compound to identify the client ID which is * performing recovery. */ -static int _nfs40_proc_get_locations(struct inode *inode, +static int _nfs40_proc_get_locations(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs4_fs_locations *locations, struct page *page, const struct cred *cred) { - struct nfs_server *server = NFS_SERVER(inode); struct rpc_clnt *clnt = server->client; u32 bitmask[2] = { [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, }; struct nfs4_fs_locations_arg args = { .clientid = server->nfs_client->cl_clientid, - .fh = NFS_FH(inode), + .fh = fhandle, .page = page, .bitmask = bitmask, .migration = 1, /* skip LOOKUP */ @@ -7875,17 +7988,17 @@ static int _nfs40_proc_get_locations(struct inode *inode, * When the client supports GETATTR(fs_locations_info), it can * be plumbed in here. */ -static int _nfs41_proc_get_locations(struct inode *inode, +static int _nfs41_proc_get_locations(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs4_fs_locations *locations, struct page *page, const struct cred *cred) { - struct nfs_server *server = NFS_SERVER(inode); struct rpc_clnt *clnt = server->client; u32 bitmask[2] = { [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, }; struct nfs4_fs_locations_arg args = { - .fh = NFS_FH(inode), + .fh = fhandle, .page = page, .bitmask = bitmask, .migration = 1, /* skip LOOKUP */ @@ -7934,11 +8047,11 @@ static int _nfs41_proc_get_locations(struct inode *inode, * -NFS4ERR_LEASE_MOVED is returned if the server still has leases * from this client that require migration recovery. */ -int nfs4_proc_get_locations(struct inode *inode, +int nfs4_proc_get_locations(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs4_fs_locations *locations, struct page *page, const struct cred *cred) { - struct nfs_server *server = NFS_SERVER(inode); struct nfs_client *clp = server->nfs_client; const struct nfs4_mig_recovery_ops *ops = clp->cl_mvops->mig_recovery_ops; @@ -7951,10 +8064,11 @@ int nfs4_proc_get_locations(struct inode *inode, (unsigned long long)server->fsid.major, (unsigned long long)server->fsid.minor, clp->cl_hostname); - nfs_display_fhandle(NFS_FH(inode), __func__); + nfs_display_fhandle(fhandle, __func__); do { - status = ops->get_locations(inode, locations, page, cred); + status = ops->get_locations(server, fhandle, locations, page, + cred); if (status != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, status, &exception); @@ -10423,6 +10537,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .free_client = nfs4_free_client, .create_server = nfs4_create_server, .clone_server = nfs_clone_server, + .discover_trunking = nfs4_discover_trunking, }; static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index d88b779f9dd0..f5a62c0d999b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2098,7 +2098,8 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred } inode = d_inode(server->super->s_root); - result = nfs4_proc_get_locations(inode, locations, page, cred); + result = nfs4_proc_get_locations(server, NFS_FH(inode), locations, + page, cred); if (result) { dprintk("<-- %s: failed to retrieve fs_locations: %d\n", __func__, result); @@ -2106,6 +2107,9 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred } result = -NFS4ERR_NXIO; + if (!locations->nlocations) + goto out; + if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { dprintk("<-- %s: No fs_locations data, migration skipped\n", __func__); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 69862bf6db00..8e70b92df4cc 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3533,6 +3533,42 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint return 0; } +static int decode_attr_case_insensitive(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + __be32 *p; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_CASE_INSENSITIVE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_CASE_INSENSITIVE)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + *res = be32_to_cpup(p); + bitmap[0] &= ~FATTR4_WORD0_CASE_INSENSITIVE; + } + dprintk("%s: case_insensitive=%s\n", __func__, *res == 0 ? "false" : "true"); + return 0; +} + +static int decode_attr_case_preserving(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + __be32 *p; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_CASE_PRESERVING - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_CASE_PRESERVING)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + *res = be32_to_cpup(p); + bitmap[0] &= ~FATTR4_WORD0_CASE_PRESERVING; + } + dprintk("%s: case_preserving=%s\n", __func__, *res == 0 ? "false" : "true"); + return 0; +} + static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) { __be32 *p; @@ -3696,8 +3732,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st if (unlikely(!p)) goto out_eio; n = be32_to_cpup(p); - if (n <= 0) - goto out_eio; for (res->nlocations = 0; res->nlocations < n; res->nlocations++) { u32 m; struct nfs4_fs_location *loc; @@ -4200,10 +4234,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, } else printk(KERN_WARNING "%s: label too long (%u)!\n", __func__, len); + if (label && label->label) + dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n", + __func__, label->len, (char *)label->label, + label->len, label->pi, label->lfs); } - if (label && label->label) - dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__, - (char *)label->label, label->len, label->pi, label->lfs); return status; } @@ -4412,6 +4447,10 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re goto xdr_error; if ((status = decode_attr_aclsupport(xdr, bitmap, &res->acl_bitmask)) != 0) goto xdr_error; + if ((status = decode_attr_case_insensitive(xdr, bitmap, &res->case_insensitive)) != 0) + goto xdr_error; + if ((status = decode_attr_case_preserving(xdr, bitmap, &res->case_preserving)) != 0) + goto xdr_error; if ((status = decode_attr_exclcreat_supported(xdr, bitmap, res->exclcreat_bitmask)) != 0) goto xdr_error; diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 8cb70755e3c9..a6f740366963 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -142,10 +142,11 @@ static struct attribute *nfs_netns_client_attrs[] = { &nfs_netns_client_id.attr, NULL, }; +ATTRIBUTE_GROUPS(nfs_netns_client); static struct kobj_type nfs_netns_client_type = { .release = nfs_netns_client_release, - .default_attrs = nfs_netns_client_attrs, + .default_groups = nfs_netns_client_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = nfs_netns_client_namespace, }; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b9f27fbcd768..68b020f2002b 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1247,7 +1247,8 @@ static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry) clear_ncl(d_inode(dentry)); dget(dentry); ret = simple_unlink(dir, dentry); - d_delete(dentry); + d_drop(dentry); + fsnotify_unlink(dir, dentry); dput(dentry); WARN_ON_ONCE(ret); } @@ -1338,8 +1339,8 @@ void nfsd_client_rmdir(struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); WARN_ON_ONCE(ret); + d_drop(dentry); fsnotify_rmdir(dir, dentry); - d_delete(dentry); dput(dentry); inode_unlock(dir); } diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 73b1615f9d96..1026f67b1d1e 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -158,7 +158,6 @@ static size_t fanotify_event_len(unsigned int info_mode, struct fanotify_event *event) { size_t event_len = FAN_EVENT_METADATA_LEN; - struct fanotify_info *info; int fh_len; int dot_len = 0; @@ -168,8 +167,6 @@ static size_t fanotify_event_len(unsigned int info_mode, if (fanotify_is_error_event(event->mask)) event_len += FANOTIFY_ERROR_INFO_LEN; - info = fanotify_event_info(event); - if (fanotify_event_has_any_dir_fh(event)) { event_len += fanotify_dir_name_info_len(event); } else if ((info_mode & FAN_REPORT_NAME) && diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 731558a6f27d..dd77b7aaabf5 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -661,17 +661,6 @@ static struct ctl_table ocfs2_nm_table[] = { { } }; -static struct ctl_table ocfs2_mod_table[] = { - { - .procname = "nm", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_nm_table - }, - { } -}; - static struct ctl_table_header *ocfs2_table_header; /* @@ -682,7 +671,7 @@ static int __init ocfs2_stack_glue_init(void) { strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); - ocfs2_table_header = register_sysctl("fs/ocfs2", ocfs2_mod_table); + ocfs2_table_header = register_sysctl("fs/ocfs2/nm", ocfs2_nm_table); if (!ocfs2_table_header) { printk(KERN_ERR "ocfs2 stack glue: unable to register sysctl\n"); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1d6b7a50736b..ea8f6cd01f50 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -258,10 +258,6 @@ int udf_expand_file_adinicb(struct inode *inode) char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); int err; - struct writeback_control udf_wbc = { - .sync_mode = WB_SYNC_NONE, - .nr_to_write = 1, - }; WARN_ON_ONCE(!inode_is_locked(inode)); if (!iinfo->i_lenAlloc) { @@ -305,8 +301,10 @@ int udf_expand_file_adinicb(struct inode *inode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; /* from now on we have normal address_space methods */ inode->i_data.a_ops = &udf_aops; + set_page_dirty(page); + unlock_page(page); up_write(&iinfo->i_data_sem); - err = inode->i_data.a_ops->writepage(page, &udf_wbc); + err = filemap_fdatawrite(inode->i_mapping); if (err) { /* Restore everything back so that we don't lose data... */ lock_page(page); @@ -317,6 +315,7 @@ int udf_expand_file_adinicb(struct inode *inode) unlock_page(page); iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; inode->i_data.a_ops = &udf_adinicb_aops; + iinfo->i_lenAlloc = inode->i_size; up_write(&iinfo->i_data_sem); } put_page(page); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9c95df26fc26..f35aea98bc35 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1258,6 +1258,7 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); +void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); unsigned long bio_start_io_acct(struct bio *bio); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, struct block_device *orig_bdev); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index a26f37a27167..11efc45de66a 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -111,7 +111,7 @@ struct ethtool_link_ext_state_info { enum ethtool_link_ext_substate_bad_signal_integrity bad_signal_integrity; enum ethtool_link_ext_substate_cable_issue cable_issue; enum ethtool_link_ext_substate_module module; - u8 __link_ext_substate; + u32 __link_ext_substate; }; }; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 3a2d7dc3c607..bb8467cd11ae 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -225,16 +225,53 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, } /* + * fsnotify_delete - @dentry was unlinked and unhashed + * + * Caller must make sure that dentry->d_name is stable. + * + * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode + * as this may be called after d_delete() and old_dentry may be negative. + */ +static inline void fsnotify_delete(struct inode *dir, struct inode *inode, + struct dentry *dentry) +{ + __u32 mask = FS_DELETE; + + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name, + 0); +} + +/** + * d_delete_notify - delete a dentry and call fsnotify_delete() + * @dentry: The dentry to delete + * + * This helper is used to guaranty that the unlinked inode cannot be found + * by lookup of this name after fsnotify_delete() event has been delivered. + */ +static inline void d_delete_notify(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + + ihold(inode); + d_delete(dentry); + fsnotify_delete(dir, inode, dentry); + iput(inode); +} + +/* * fsnotify_unlink - 'name' was unlinked * * Caller must make sure that dentry->d_name is stable. */ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) { - /* Expected to be called before d_delete() */ - WARN_ON_ONCE(d_is_negative(dentry)); + if (WARN_ON_ONCE(d_is_negative(dentry))) + return; - fsnotify_dirent(dir, dentry, FS_DELETE); + fsnotify_delete(dir, d_inode(dentry), dentry); } /* @@ -258,10 +295,10 @@ static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry) */ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) { - /* Expected to be called before d_delete() */ - WARN_ON_ONCE(d_is_negative(dentry)); + if (WARN_ON_ONCE(d_is_negative(dentry))) + return; - fsnotify_dirent(dir, dentry, FS_DELETE | FS_ISDIR); + fsnotify_delete(dir, d_inode(dentry), dentry); } /* diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index a5a724c308d8..819ec92dc2a8 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -80,7 +80,7 @@ LSM_HOOK(int, 0, sb_clone_mnt_opts, const struct super_block *oldsb, unsigned long *set_kern_flags) LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) -LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry, +LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, void **ctx, u32 *ctxlen) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3213c7227b59..e490b84732d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2548,6 +2548,7 @@ struct packet_type { struct net_device *); bool (*id_match)(struct packet_type *ptype, struct sock *sk); + struct net *af_packet_net; void *af_packet_priv; struct list_head list; }; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 00835bacd236..02aa49323d1d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -61,7 +61,9 @@ struct nfs_access_entry { struct rb_node rb_node; struct list_head lru; - const struct cred * cred; + kuid_t fsuid; + kgid_t fsgid; + struct group_info *group_info; __u32 mask; struct rcu_head rcu_head; }; @@ -395,7 +397,7 @@ extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fa extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int); -extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); +extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *, const struct cred *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct user_namespace *, struct inode *, int); extern int nfs_open(struct inode *, struct file *); @@ -532,8 +534,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); extern void nfs_access_zap_cache(struct inode *inode); -extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, - bool may_block); +extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, + u32 *mask, bool may_block); /* * linux/fs/nfs/symlink.c diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 77b2dba27bbb..ca0959e51e81 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -266,6 +266,8 @@ struct nfs_server { #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) #define NFS_CAP_LGOPEN (1U << 5) +#define NFS_CAP_CASE_INSENSITIVE (1U << 6) +#define NFS_CAP_CASE_PRESERVING (1U << 7) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_STATEID_NFSV41 (1U << 16) @@ -282,5 +284,5 @@ struct nfs_server { #define NFS_CAP_COPY_NOTIFY (1U << 27) #define NFS_CAP_XATTR (1U << 28) #define NFS_CAP_READ_PLUS (1U << 29) - +#define NFS_CAP_FS_LOCATIONS (1U << 30) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 967a0098f0a9..728cb0c1f0b6 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1194,6 +1194,8 @@ struct nfs4_server_caps_res { u32 has_links; u32 has_symlinks; u32 fh_expire_type; + u32 case_insensitive; + u32 case_preserving; }; #define NFS4_PATHNAME_MAXCOMPONENTS 512 @@ -1737,7 +1739,7 @@ struct nfs_rpc_ops { struct nfs_fh *, struct nfs_fattr *); int (*lookupp) (struct inode *, struct nfs_fh *, struct nfs_fattr *); - int (*access) (struct inode *, struct nfs_access_entry *); + int (*access) (struct inode *, struct nfs_access_entry *, const struct cred *); int (*readlink)(struct inode *, struct page *, unsigned int, unsigned int); int (*create) (struct inode *, struct dentry *, @@ -1795,6 +1797,7 @@ struct nfs_rpc_ops { struct nfs_server *(*create_server)(struct fs_context *); struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); + int (*discover_trunking)(struct nfs_server *, struct nfs_fh *); }; /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 117f230bcdfd..733649184b27 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -693,18 +693,6 @@ struct perf_event { u64 total_time_running; u64 tstamp; - /* - * timestamp shadows the actual context timing but it can - * be safely used in NMI interrupt context. It reflects the - * context time as it was when the event was last scheduled in, - * or when ctx_sched_in failed to schedule the event because we - * run out of PMC. - * - * ctx_time already accounts for ctx->timestamp. Therefore to - * compute ctx_time for a sample, simply add perf_clock(). - */ - u64 shadow_ctx_time; - struct perf_event_attr attr; u16 header_size; u16 id_header_size; @@ -852,6 +840,7 @@ struct perf_event_context { */ u64 time; u64 timestamp; + u64 timeoffset; /* * These fields let us detect when two contexts have both @@ -934,6 +923,8 @@ struct bpf_perf_event_data_kern { struct perf_cgroup_info { u64 time; u64 timestamp; + u64 timeoffset; + int active; }; struct perf_cgroup { diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 7c7e627503d2..07481bb87d4e 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -86,4 +86,9 @@ extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); void pidhash_init(void); void pid_idr_init(void); +static inline bool task_is_in_init_pid_ns(struct task_struct *tsk) +{ + return task_active_pid_ns(tsk) == &init_pid_ns; +} + #endif /* _LINUX_PID_NS_H */ diff --git a/include/linux/psi.h b/include/linux/psi.h index a70ca833c6d7..f8ce53bfdb2a 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -33,7 +33,7 @@ void cgroup_move_task(struct task_struct *p, struct css_set *to); struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res); -void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); +void psi_trigger_destroy(struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 516c0fe836fd..1a3cef26d129 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -141,9 +141,6 @@ struct psi_trigger { * events to one per window */ u64 last_event_time; - - /* Refcounting to prevent premature destruction */ - struct kref refcount; }; struct psi_group { diff --git a/include/linux/quota.h b/include/linux/quota.h index 18ebd39c9487..fd692b4a41d5 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -91,7 +91,7 @@ extern bool qid_valid(struct kqid qid); * * When there is no mapping defined for the user-namespace, type, * qid tuple an invalid kqid is returned. Callers are expected to - * test for and handle handle invalid kqids being returned. + * test for and handle invalid kqids being returned. * Invalid kqids may be tested for using qid_valid(). */ static inline struct kqid make_kqid(struct user_namespace *from, diff --git a/include/linux/sched.h b/include/linux/sched.h index 508b91d57470..f5b2be39a78c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -619,10 +619,6 @@ struct sched_dl_entity { * task has to wait for a replenishment to be performed at the * next firing of dl_timer. * - * @dl_boosted tells if we are boosted due to DI. If so we are - * outside bandwidth enforcement mechanism (but only until we - * exit the critical section); - * * @dl_yielded tells if task gave up the CPU before consuming * all its available runtime during the last job. * diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bf11e1fbd69b..8a636e678902 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -318,7 +318,7 @@ enum skb_drop_reason { SKB_DROP_REASON_NO_SOCKET, SKB_DROP_REASON_PKT_TOO_SMALL, SKB_DROP_REASON_TCP_CSUM, - SKB_DROP_REASON_TCP_FILTER, + SKB_DROP_REASON_SOCKET_FILTER, SKB_DROP_REASON_UDP_CSUM, SKB_DROP_REASON_MAX, }; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 5785d909c321..3e8ecdebe601 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -430,15 +430,7 @@ struct platform_hibernation_ops { #ifdef CONFIG_HIBERNATION /* kernel/power/snapshot.c */ -extern void __register_nosave_region(unsigned long b, unsigned long e, int km); -static inline void __init register_nosave_region(unsigned long b, unsigned long e) -{ - __register_nosave_region(b, e, 0); -} -static inline void __init register_nosave_region_late(unsigned long b, unsigned long e) -{ - __register_nosave_region(b, e, 1); -} +extern void register_nosave_region(unsigned long b, unsigned long e); extern int swsusp_page_is_forbidden(struct page *); extern void swsusp_set_page_free(struct page *); extern void swsusp_unset_page_free(struct page *); @@ -458,7 +450,6 @@ int pfn_is_nosave(unsigned long pfn); int hibernate_quiet_exec(int (*func)(void *data), void *data); #else /* CONFIG_HIBERNATION */ static inline void register_nosave_region(unsigned long b, unsigned long e) {} -static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} static inline void swsusp_unset_page_free(struct page *p) {} diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h index 031f148ab373..b5deafd91f67 100644 --- a/include/linux/usb/role.h +++ b/include/linux/usb/role.h @@ -92,6 +92,12 @@ fwnode_usb_role_switch_get(struct fwnode_handle *node) static inline void usb_role_switch_put(struct usb_role_switch *sw) { } static inline struct usb_role_switch * +usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline struct usb_role_switch * usb_role_switch_register(struct device *parent, const struct usb_role_switch_desc *desc) { diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 78ea3e332688..e7ce719838b5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -6,6 +6,8 @@ #define RTR_SOLICITATION_INTERVAL (4*HZ) #define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ +#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ + #define TEMP_VALID_LIFETIME (7*86400) #define TEMP_PREFERRED_LIFETIME (86400) #define REGEN_MAX_RETRY (3) diff --git a/include/net/bonding.h b/include/net/bonding.h index f6ae3a4baea4..83cfd2d70247 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -346,7 +346,7 @@ static inline bool bond_uses_primary(struct bonding *bond) static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) { - struct slave *slave = rcu_dereference(bond->curr_active_slave); + struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave); return bond_uses_primary(bond) && slave ? slave->dev : NULL; } diff --git a/include/net/ip.h b/include/net/ip.h index 81e23a102a0d..b51bae43b0dd 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -525,19 +525,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); + /* We had many attacks based on IPID, use the private + * generator as much as we can. + */ + if (sk && inet_sk(sk)->inet_daddr) { + iph->id = htons(inet_sk(sk)->inet_id); + inet_sk(sk)->inet_id += segs; + return; + } if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { - /* This is only to work around buggy Windows95/2000 - * VJ compression implementations. If the ID field - * does not change, they drop every other packet in - * a TCP stream using header compression. - */ - if (sk && inet_sk(sk)->inet_daddr) { - iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += segs; - } else { - iph->id = 0; - } + iph->id = 0; } else { + /* Unfortunately we need the big hammer to get a suitable IPID */ __ip_select_ident(net, iph, segs); } } diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a9a4ccc0cdb5..40ae8f1b18e5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -282,7 +282,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, fn = rcu_dereference(f6i->fib6_node); if (fn) { - *cookie = fn->fn_sernum; + *cookie = READ_ONCE(fn->fn_sernum); /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */ smp_rmb(); status = true; diff --git a/include/net/route.h b/include/net/route.h index 4c858dcf1aa8..25404fc2b483 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -370,7 +370,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, { struct neighbour *neigh; - neigh = __ipv4_neigh_lookup_noref(dev, daddr); + neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &daddr, dev, false); diff --git a/include/net/tcp.h b/include/net/tcp.h index 44e442bf23f9..b9fc978fb2ca 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1369,6 +1369,7 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); +#ifdef CONFIG_INET void __sk_defer_free_flush(struct sock *sk); static inline void sk_defer_free_flush(struct sock *sk) @@ -1377,6 +1378,9 @@ static inline void sk_defer_free_flush(struct sock *sk) return; __sk_defer_free_flush(sk); } +#else +static inline void sk_defer_free_flush(struct sock *sk) {} +#endif int tcp_filter(struct sock *sk, struct sk_buff *skb); void tcp_set_state(struct sock *sk, int state); diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 3e042ca2cedb..a8a64b97504d 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -14,7 +14,7 @@ EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ - EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \ + EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ EMe(SKB_DROP_REASON_MAX, MAX) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 1e566ac4b812..29982d60b68a 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -794,6 +794,9 @@ RPC_SHOW_SOCKET RPC_SHOW_SOCK + +#include <trace/events/net_probe_common.h> + /* * Now redefine the EM() and EMe() macros to map the enums to the strings * that will be printed in the output. @@ -816,27 +819,32 @@ DECLARE_EVENT_CLASS(xs_socket_event, __field(unsigned int, socket_state) __field(unsigned int, sock_state) __field(unsigned long long, ino) - __string(dstaddr, - xprt->address_strings[RPC_DISPLAY_ADDR]) - __string(dstport, - xprt->address_strings[RPC_DISPLAY_PORT]) + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( struct inode *inode = SOCK_INODE(socket); + const struct sock *sk = socket->sk; + const struct inet_sock *inet = inet_sk(sk); + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + + TP_STORE_ADDR_PORTS(__entry, inet, sk); + __entry->socket_state = socket->state; __entry->sock_state = socket->sk->sk_state; __entry->ino = (unsigned long long)inode->i_ino; - __assign_str(dstaddr, - xprt->address_strings[RPC_DISPLAY_ADDR]); - __assign_str(dstport, - xprt->address_strings[RPC_DISPLAY_PORT]); + ), TP_printk( - "socket:[%llu] dstaddr=%s/%s " + "socket:[%llu] srcaddr=%pISpc dstaddr=%pISpc " "state=%u (%s) sk_state=%u (%s)", - __entry->ino, __get_str(dstaddr), __get_str(dstport), + __entry->ino, + __entry->saddr, + __entry->daddr, __entry->socket_state, rpc_show_socket_state(__entry->socket_state), __entry->sock_state, @@ -866,29 +874,33 @@ DECLARE_EVENT_CLASS(xs_socket_event_done, __field(unsigned int, socket_state) __field(unsigned int, sock_state) __field(unsigned long long, ino) - __string(dstaddr, - xprt->address_strings[RPC_DISPLAY_ADDR]) - __string(dstport, - xprt->address_strings[RPC_DISPLAY_PORT]) + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( struct inode *inode = SOCK_INODE(socket); + const struct sock *sk = socket->sk; + const struct inet_sock *inet = inet_sk(sk); + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + + TP_STORE_ADDR_PORTS(__entry, inet, sk); + __entry->socket_state = socket->state; __entry->sock_state = socket->sk->sk_state; __entry->ino = (unsigned long long)inode->i_ino; __entry->error = error; - __assign_str(dstaddr, - xprt->address_strings[RPC_DISPLAY_ADDR]); - __assign_str(dstport, - xprt->address_strings[RPC_DISPLAY_PORT]); ), TP_printk( - "error=%d socket:[%llu] dstaddr=%s/%s " + "error=%d socket:[%llu] srcaddr=%pISpc dstaddr=%pISpc " "state=%u (%s) sk_state=%u (%s)", __entry->error, - __entry->ino, __get_str(dstaddr), __get_str(dstport), + __entry->ino, + __entry->saddr, + __entry->daddr, __entry->socket_state, rpc_show_socket_state(__entry->socket_state), __entry->sock_state, @@ -953,7 +965,8 @@ TRACE_EVENT(rpc_socket_nospace, { BIT(XPRT_REMOVE), "REMOVE" }, \ { BIT(XPRT_CONGESTED), "CONGESTED" }, \ { BIT(XPRT_CWND_WAIT), "CWND_WAIT" }, \ - { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" }) + { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" }, \ + { BIT(XPRT_SND_IS_COOKIE), "SND_IS_COOKIE" }) DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class, TP_PROTO( @@ -1150,8 +1163,11 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, __entry->task_id = -1; __entry->client_id = -1; } - __entry->snd_task_id = xprt->snd_task ? - xprt->snd_task->tk_pid : -1; + if (xprt->snd_task && + !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) + __entry->snd_task_id = xprt->snd_task->tk_pid; + else + __entry->snd_task_id = -1; ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER @@ -1196,8 +1212,12 @@ DECLARE_EVENT_CLASS(xprt_cong_event, __entry->task_id = -1; __entry->client_id = -1; } - __entry->snd_task_id = xprt->snd_task ? - xprt->snd_task->tk_pid : -1; + if (xprt->snd_task && + !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) + __entry->snd_task_id = xprt->snd_task->tk_pid; + else + __entry->snd_task_id = -1; + __entry->cong = xprt->cong; __entry->cwnd = xprt->cwnd; __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state); diff --git a/include/trace/perf.h b/include/trace/perf.h index ea4405de175a..5d48c46a3008 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -23,8 +23,9 @@ #undef __get_rel_dynamic_array #define __get_rel_dynamic_array(field) \ - ((void *)(&__entry->__rel_loc_##field) + \ - sizeof(__entry->__rel_loc_##field) + \ + ((void *)__entry + \ + offsetof(typeof(*__entry), __rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ (__entry->__rel_loc_##field & 0xffff)) #undef __get_rel_dynamic_array_len diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 8c6f7c433518..3d29919045af 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -128,7 +128,7 @@ TRACE_MAKE_SYSTEM_STR(); struct trace_event_raw_##name { \ struct trace_entry ent; \ tstruct \ - char __data[0]; \ + char __data[]; \ }; \ \ static struct trace_event_class event_class_##name; @@ -318,9 +318,10 @@ TRACE_MAKE_SYSTEM_STR(); #define __get_str(field) ((char *)__get_dynamic_array(field)) #undef __get_rel_dynamic_array -#define __get_rel_dynamic_array(field) \ - ((void *)(&__entry->__rel_loc_##field) + \ - sizeof(__entry->__rel_loc_##field) + \ +#define __get_rel_dynamic_array(field) \ + ((void *)__entry + \ + offsetof(typeof(*__entry), __rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ (__entry->__rel_loc_##field & 0xffff)) #undef __get_rel_dynamic_array_len diff --git a/include/uapi/linux/cyclades.h b/include/uapi/linux/cyclades.h new file mode 100644 index 000000000000..6225c5aebe06 --- /dev/null +++ b/include/uapi/linux/cyclades.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_CYCLADES_H +#define _UAPI_LINUX_CYCLADES_H + +#warning "Support for features provided by this header has been removed" +#warning "Please consider updating your code" + +struct cyclades_monitor { + unsigned long int_count; + unsigned long char_count; + unsigned long char_max; + unsigned long char_last; +}; + +#define CYGETMON 0x435901 +#define CYGETTHRESH 0x435902 +#define CYSETTHRESH 0x435903 +#define CYGETDEFTHRESH 0x435904 +#define CYSETDEFTHRESH 0x435905 +#define CYGETTIMEOUT 0x435906 +#define CYSETTIMEOUT 0x435907 +#define CYGETDEFTIMEOUT 0x435908 +#define CYSETDEFTIMEOUT 0x435909 +#define CYSETRFLOW 0x43590a +#define CYGETRFLOW 0x43590b +#define CYSETRTSDTR_INV 0x43590c +#define CYGETRTSDTR_INV 0x43590d +#define CYZSETPOLLCYCLE 0x43590e +#define CYZGETPOLLCYCLE 0x43590f +#define CYGETCD1400VER 0x435910 +#define CYSETWAIT 0x435912 +#define CYGETWAIT 0x435913 + +#endif /* _UAPI_LINUX_CYCLADES_H */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 9563d294f181..b46bcdb0cab1 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 +#define KVM_CAP_SYS_ATTRIBUTES 209 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 49e567209c6b..22c8ae94e4c1 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -472,13 +472,14 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, u32, size, u64, flags) { struct pt_regs *regs; - long res; + long res = -EINVAL; if (!try_get_task_stack(task)) return -EFAULT; regs = task_pt_regs(task); - res = __bpf_get_stack(regs, task, NULL, buf, size, flags); + if (regs) + res = __bpf_get_stack(regs, task, NULL, buf, size, flags); put_task_stack(task); return res; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index b31e1465868a..9d05c3ca2d5e 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3643,6 +3643,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, cgroup_get(cgrp); cgroup_kn_unlock(of->kn); + /* Allow only one trigger per file descriptor */ + if (ctx->psi.trigger) { + cgroup_put(cgrp); + return -EBUSY; + } + psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; new = psi_trigger_create(psi, buf, nbytes, res); if (IS_ERR(new)) { @@ -3650,8 +3656,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, return PTR_ERR(new); } - psi_trigger_replace(&ctx->psi.trigger, new); - + smp_store_release(&ctx->psi.trigger, new); cgroup_put(cgrp); return nbytes; @@ -3690,7 +3695,7 @@ static void cgroup_pressure_release(struct kernfs_open_file *of) { struct cgroup_file_ctx *ctx = of->priv; - psi_trigger_replace(&ctx->psi.trigger, NULL); + psi_trigger_destroy(ctx->psi.trigger); } bool cgroup_psi_enabled(void) diff --git a/kernel/events/core.c b/kernel/events/core.c index fc18664f49b0..479c9e672ec4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -674,6 +674,23 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state) WRITE_ONCE(event->state, state); } +/* + * UP store-release, load-acquire + */ + +#define __store_release(ptr, val) \ +do { \ + barrier(); \ + WRITE_ONCE(*(ptr), (val)); \ +} while (0) + +#define __load_acquire(ptr) \ +({ \ + __unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr)); \ + barrier(); \ + ___p; \ +}) + #ifdef CONFIG_CGROUP_PERF static inline bool @@ -719,34 +736,51 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event) return t->time; } -static inline void __update_cgrp_time(struct perf_cgroup *cgrp) +static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) { - struct perf_cgroup_info *info; - u64 now; - - now = perf_clock(); + struct perf_cgroup_info *t; - info = this_cpu_ptr(cgrp->info); + t = per_cpu_ptr(event->cgrp->info, event->cpu); + if (!__load_acquire(&t->active)) + return t->time; + now += READ_ONCE(t->timeoffset); + return now; +} - info->time += now - info->timestamp; +static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv) +{ + if (adv) + info->time += now - info->timestamp; info->timestamp = now; + /* + * see update_context_time() + */ + WRITE_ONCE(info->timeoffset, info->time - info->timestamp); } -static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) +static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final) { struct perf_cgroup *cgrp = cpuctx->cgrp; struct cgroup_subsys_state *css; + struct perf_cgroup_info *info; if (cgrp) { + u64 now = perf_clock(); + for (css = &cgrp->css; css; css = css->parent) { cgrp = container_of(css, struct perf_cgroup, css); - __update_cgrp_time(cgrp); + info = this_cpu_ptr(cgrp->info); + + __update_cgrp_time(info, now, true); + if (final) + __store_release(&info->active, 0); } } } static inline void update_cgrp_time_from_event(struct perf_event *event) { + struct perf_cgroup_info *info; struct perf_cgroup *cgrp; /* @@ -760,8 +794,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) /* * Do not update time when cgroup is not active */ - if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) - __update_cgrp_time(event->cgrp); + if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) { + info = this_cpu_ptr(event->cgrp->info); + __update_cgrp_time(info, perf_clock(), true); + } } static inline void @@ -785,7 +821,8 @@ perf_cgroup_set_timestamp(struct task_struct *task, for (css = &cgrp->css; css; css = css->parent) { cgrp = container_of(css, struct perf_cgroup, css); info = this_cpu_ptr(cgrp->info); - info->timestamp = ctx->timestamp; + __update_cgrp_time(info, ctx->timestamp, false); + __store_release(&info->active, 1); } } @@ -982,14 +1019,6 @@ out: } static inline void -perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) -{ - struct perf_cgroup_info *t; - t = per_cpu_ptr(event->cgrp->info, event->cpu); - event->shadow_ctx_time = now - t->timestamp; -} - -static inline void perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx) { struct perf_cpu_context *cpuctx; @@ -1066,7 +1095,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) { } -static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) +static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, + bool final) { } @@ -1098,12 +1128,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next) { } -static inline void -perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) +static inline u64 perf_cgroup_event_time(struct perf_event *event) { + return 0; } -static inline u64 perf_cgroup_event_time(struct perf_event *event) +static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) { return 0; } @@ -1525,22 +1555,59 @@ static void perf_unpin_context(struct perf_event_context *ctx) /* * Update the record of the current time in a context. */ -static void update_context_time(struct perf_event_context *ctx) +static void __update_context_time(struct perf_event_context *ctx, bool adv) { u64 now = perf_clock(); - ctx->time += now - ctx->timestamp; + if (adv) + ctx->time += now - ctx->timestamp; ctx->timestamp = now; + + /* + * The above: time' = time + (now - timestamp), can be re-arranged + * into: time` = now + (time - timestamp), which gives a single value + * offset to compute future time without locks on. + * + * See perf_event_time_now(), which can be used from NMI context where + * it's (obviously) not possible to acquire ctx->lock in order to read + * both the above values in a consistent manner. + */ + WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp); +} + +static void update_context_time(struct perf_event_context *ctx) +{ + __update_context_time(ctx, true); } static u64 perf_event_time(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; + if (unlikely(!ctx)) + return 0; + if (is_cgroup_event(event)) return perf_cgroup_event_time(event); - return ctx ? ctx->time : 0; + return ctx->time; +} + +static u64 perf_event_time_now(struct perf_event *event, u64 now) +{ + struct perf_event_context *ctx = event->ctx; + + if (unlikely(!ctx)) + return 0; + + if (is_cgroup_event(event)) + return perf_cgroup_event_time_now(event, now); + + if (!(__load_acquire(&ctx->is_active) & EVENT_TIME)) + return ctx->time; + + now += READ_ONCE(ctx->timeoffset); + return now; } static enum event_type_t get_event_type(struct perf_event *event) @@ -2350,7 +2417,7 @@ __perf_remove_from_context(struct perf_event *event, if (ctx->is_active & EVENT_TIME) { update_context_time(ctx); - update_cgrp_time_from_cpuctx(cpuctx); + update_cgrp_time_from_cpuctx(cpuctx, false); } event_sched_out(event, cpuctx, ctx); @@ -2361,6 +2428,9 @@ __perf_remove_from_context(struct perf_event *event, list_del_event(event, ctx); if (!ctx->nr_events && ctx->is_active) { + if (ctx == &cpuctx->ctx) + update_cgrp_time_from_cpuctx(cpuctx, true); + ctx->is_active = 0; ctx->rotate_necessary = 0; if (ctx->task) { @@ -2482,40 +2552,6 @@ void perf_event_disable_inatomic(struct perf_event *event) irq_work_queue(&event->pending); } -static void perf_set_shadow_time(struct perf_event *event, - struct perf_event_context *ctx) -{ - /* - * use the correct time source for the time snapshot - * - * We could get by without this by leveraging the - * fact that to get to this function, the caller - * has most likely already called update_context_time() - * and update_cgrp_time_xx() and thus both timestamp - * are identical (or very close). Given that tstamp is, - * already adjusted for cgroup, we could say that: - * tstamp - ctx->timestamp - * is equivalent to - * tstamp - cgrp->timestamp. - * - * Then, in perf_output_read(), the calculation would - * work with no changes because: - * - event is guaranteed scheduled in - * - no scheduled out in between - * - thus the timestamp would be the same - * - * But this is a bit hairy. - * - * So instead, we have an explicit cgroup call to remain - * within the time source all along. We believe it - * is cleaner and simpler to understand. - */ - if (is_cgroup_event(event)) - perf_cgroup_set_shadow_time(event, event->tstamp); - else - event->shadow_ctx_time = event->tstamp - ctx->timestamp; -} - #define MAX_INTERRUPTS (~0ULL) static void perf_log_throttle(struct perf_event *event, int enable); @@ -2556,8 +2592,6 @@ event_sched_in(struct perf_event *event, perf_pmu_disable(event->pmu); - perf_set_shadow_time(event, ctx); - perf_log_itrace_start(event); if (event->pmu->add(event, PERF_EF_START)) { @@ -3251,16 +3285,6 @@ static void ctx_sched_out(struct perf_event_context *ctx, return; } - ctx->is_active &= ~event_type; - if (!(ctx->is_active & EVENT_ALL)) - ctx->is_active = 0; - - if (ctx->task) { - WARN_ON_ONCE(cpuctx->task_ctx != ctx); - if (!ctx->is_active) - cpuctx->task_ctx = NULL; - } - /* * Always update time if it was set; not only when it changes. * Otherwise we can 'forget' to update time for any but the last @@ -3274,7 +3298,22 @@ static void ctx_sched_out(struct perf_event_context *ctx, if (is_active & EVENT_TIME) { /* update (and stop) ctx time */ update_context_time(ctx); - update_cgrp_time_from_cpuctx(cpuctx); + update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx); + /* + * CPU-release for the below ->is_active store, + * see __load_acquire() in perf_event_time_now() + */ + barrier(); + } + + ctx->is_active &= ~event_type; + if (!(ctx->is_active & EVENT_ALL)) + ctx->is_active = 0; + + if (ctx->task) { + WARN_ON_ONCE(cpuctx->task_ctx != ctx); + if (!ctx->is_active) + cpuctx->task_ctx = NULL; } is_active ^= ctx->is_active; /* changed bits */ @@ -3711,13 +3750,19 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx, return 0; } +/* + * Because the userpage is strictly per-event (there is no concept of context, + * so there cannot be a context indirection), every userpage must be updated + * when context time starts :-( + * + * IOW, we must not miss EVENT_TIME edges. + */ static inline bool event_update_userpage(struct perf_event *event) { if (likely(!atomic_read(&event->mmap_count))) return false; perf_event_update_time(event); - perf_set_shadow_time(event, event->ctx); perf_event_update_userpage(event); return true; @@ -3801,13 +3846,23 @@ ctx_sched_in(struct perf_event_context *ctx, struct task_struct *task) { int is_active = ctx->is_active; - u64 now; lockdep_assert_held(&ctx->lock); if (likely(!ctx->nr_events)) return; + if (is_active ^ EVENT_TIME) { + /* start ctx time */ + __update_context_time(ctx, false); + perf_cgroup_set_timestamp(task, ctx); + /* + * CPU-release for the below ->is_active store, + * see __load_acquire() in perf_event_time_now() + */ + barrier(); + } + ctx->is_active |= (event_type | EVENT_TIME); if (ctx->task) { if (!is_active) @@ -3818,13 +3873,6 @@ ctx_sched_in(struct perf_event_context *ctx, is_active ^= ctx->is_active; /* changed bits */ - if (is_active & EVENT_TIME) { - /* start ctx time */ - now = perf_clock(); - ctx->timestamp = now; - perf_cgroup_set_timestamp(task, ctx); - } - /* * First go through the list and put on any pinned groups * in order to give them the best chance of going on. @@ -4418,6 +4466,18 @@ static inline u64 perf_event_count(struct perf_event *event) return local64_read(&event->count) + atomic64_read(&event->child_count); } +static void calc_timer_values(struct perf_event *event, + u64 *now, + u64 *enabled, + u64 *running) +{ + u64 ctx_time; + + *now = perf_clock(); + ctx_time = perf_event_time_now(event, *now); + __perf_update_times(event, ctx_time, enabled, running); +} + /* * NMI-safe method to read a local event, that is an event that * is: @@ -4477,10 +4537,9 @@ int perf_event_read_local(struct perf_event *event, u64 *value, *value = local64_read(&event->count); if (enabled || running) { - u64 now = event->shadow_ctx_time + perf_clock(); - u64 __enabled, __running; + u64 __enabled, __running, __now;; - __perf_update_times(event, now, &__enabled, &__running); + calc_timer_values(event, &__now, &__enabled, &__running); if (enabled) *enabled = __enabled; if (running) @@ -5802,18 +5861,6 @@ static int perf_event_index(struct perf_event *event) return event->pmu->event_idx(event); } -static void calc_timer_values(struct perf_event *event, - u64 *now, - u64 *enabled, - u64 *running) -{ - u64 ctx_time; - - *now = perf_clock(); - ctx_time = event->shadow_ctx_time + *now; - __perf_update_times(event, ctx_time, enabled, running); -} - static void perf_event_init_userpage(struct perf_event *event) { struct perf_event_mmap_page *userpg; @@ -6353,7 +6400,6 @@ accounting: ring_buffer_attach(event, rb); perf_event_update_time(event); - perf_set_shadow_time(event, event->ctx); perf_event_init_userpage(event); perf_event_update_userpage(event); } else { diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f7a986078213..330d49937692 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -978,8 +978,7 @@ static void memory_bm_recycle(struct memory_bitmap *bm) * Register a range of page frames the contents of which should not be saved * during hibernation (to be used in the early initialization code). */ -void __init __register_nosave_region(unsigned long start_pfn, - unsigned long end_pfn, int use_kmalloc) +void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) { struct nosave_region *region; @@ -995,18 +994,12 @@ void __init __register_nosave_region(unsigned long start_pfn, goto Report; } } - if (use_kmalloc) { - /* During init, this shouldn't fail */ - region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); - BUG_ON(!region); - } else { - /* This allocation cannot fail */ - region = memblock_alloc(sizeof(struct nosave_region), - SMP_CACHE_BYTES); - if (!region) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct nosave_region)); - } + /* This allocation cannot fail */ + region = memblock_alloc(sizeof(struct nosave_region), + SMP_CACHE_BYTES); + if (!region) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(struct nosave_region)); region->start_pfn = start_pfn; region->end_pfn = end_pfn; list_add_tail(®ion->list, &nosave_regions); diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 105df4dfc783..52571dcad768 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -39,23 +39,20 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active) { struct rb_node *node; struct wakelock *wl; - char *str = buf; - char *end = buf + PAGE_SIZE; + int len = 0; mutex_lock(&wakelocks_lock); for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { wl = rb_entry(node, struct wakelock, node); if (wl->ws->active == show_active) - str += scnprintf(str, end - str, "%s ", wl->name); + len += sysfs_emit_at(buf, len, "%s ", wl->name); } - if (str > buf) - str--; - str += scnprintf(str, end - str, "\n"); + len += sysfs_emit_at(buf, len, "\n"); mutex_unlock(&wakelocks_lock); - return (str - buf); + return len; } #if CONFIG_PM_WAKELOCKS_LIMIT > 0 diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 84f1d91604cc..d64f0b1d8cd3 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -123,7 +123,7 @@ static struct rcu_tasks rt_name = \ .call_func = call, \ .rtpcpu = &rt_name ## __percpu, \ .name = n, \ - .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS), \ + .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS) + 1, \ .percpu_enqueue_lim = 1, \ .percpu_dequeue_lim = 1, \ .barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \ @@ -216,6 +216,7 @@ static void cblist_init_generic(struct rcu_tasks *rtp) int cpu; unsigned long flags; int lim; + int shift; raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rcu_task_enqueue_lim < 0) { @@ -229,7 +230,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp) if (lim > nr_cpu_ids) lim = nr_cpu_ids; - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids / lim)); + shift = ilog2(nr_cpu_ids / lim); + if (((nr_cpu_ids - 1) >> shift) >= lim) + shift++; + WRITE_ONCE(rtp->percpu_enqueue_shift, shift); WRITE_ONCE(rtp->percpu_dequeue_lim, lim); smp_store_release(&rtp->percpu_enqueue_lim, lim); for_each_possible_cpu(cpu) { @@ -298,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, if (unlikely(needadjust)) { raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rtp->percpu_enqueue_lim != nr_cpu_ids) { - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids)); + WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids); smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids); pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name); @@ -413,7 +417,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp) if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) { raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rtp->percpu_enqueue_lim > 1) { - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids)); + WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); smp_store_release(&rtp->percpu_enqueue_lim, 1); rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu(); pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2e4ae00e52d1..848eaa0efe0e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5822,8 +5822,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) } if (schedstat_enabled() && rq->core->core_forceidle_count) { - if (cookie) - rq->core->core_forceidle_start = rq_clock(rq->core); + rq->core->core_forceidle_start = rq_clock(rq->core); rq->core->core_forceidle_occupation = occ; } @@ -8219,9 +8218,7 @@ int __cond_resched_lock(spinlock_t *lock) if (spin_needbreak(lock) || resched) { spin_unlock(lock); - if (resched) - preempt_schedule_common(); - else + if (!_cond_resched()) cpu_relax(); ret = 1; spin_lock(lock); @@ -8239,9 +8236,7 @@ int __cond_resched_rwlock_read(rwlock_t *lock) if (rwlock_needbreak(lock) || resched) { read_unlock(lock); - if (resched) - preempt_schedule_common(); - else + if (!_cond_resched()) cpu_relax(); ret = 1; read_lock(lock); @@ -8259,9 +8254,7 @@ int __cond_resched_rwlock_write(rwlock_t *lock) if (rwlock_needbreak(lock) || resched) { write_unlock(lock); - if (resched) - preempt_schedule_common(); - else + if (!_cond_resched()) cpu_relax(); ret = 1; write_lock(lock); diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c index 1fb45672ec85..c8746a9a7ada 100644 --- a/kernel/sched/core_sched.c +++ b/kernel/sched/core_sched.c @@ -277,7 +277,7 @@ void __sched_core_account_forceidle(struct rq *rq) rq_i = cpu_rq(i); p = rq_i->core_pick ?: rq_i->curr; - if (!p->core_cookie) + if (p == rq_i->idle) continue; __schedstat_add(p->stats.core_forceidle_sum, delta); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 095b0aa378df..5146163bfabb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3028,9 +3028,11 @@ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) static inline void dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { - u32 divider = get_pelt_divider(&se->avg); sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg); - cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider; + sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum); + /* See update_cfs_rq_load_avg() */ + cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum, + cfs_rq->avg.load_avg * PELT_MIN_DIVIDER); } #else static inline void @@ -3381,7 +3383,6 @@ void set_task_rq_fair(struct sched_entity *se, se->avg.last_update_time = n_last_update_time; } - /* * When on migration a sched_entity joins/leaves the PELT hierarchy, we need to * propagate its contribution. The key to this propagation is the invariant @@ -3449,15 +3450,14 @@ void set_task_rq_fair(struct sched_entity *se, * XXX: only do this for the part of runnable > running ? * */ - static inline void update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { - long delta = gcfs_rq->avg.util_avg - se->avg.util_avg; - u32 divider; + long delta_sum, delta_avg = gcfs_rq->avg.util_avg - se->avg.util_avg; + u32 new_sum, divider; /* Nothing to update */ - if (!delta) + if (!delta_avg) return; /* @@ -3466,23 +3466,30 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq */ divider = get_pelt_divider(&cfs_rq->avg); + /* Set new sched_entity's utilization */ se->avg.util_avg = gcfs_rq->avg.util_avg; - se->avg.util_sum = se->avg.util_avg * divider; + new_sum = se->avg.util_avg * divider; + delta_sum = (long)new_sum - (long)se->avg.util_sum; + se->avg.util_sum = new_sum; /* Update parent cfs_rq utilization */ - add_positive(&cfs_rq->avg.util_avg, delta); - cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider; + add_positive(&cfs_rq->avg.util_avg, delta_avg); + add_positive(&cfs_rq->avg.util_sum, delta_sum); + + /* See update_cfs_rq_load_avg() */ + cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum, + cfs_rq->avg.util_avg * PELT_MIN_DIVIDER); } static inline void update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { - long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg; - u32 divider; + long delta_sum, delta_avg = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg; + u32 new_sum, divider; /* Nothing to update */ - if (!delta) + if (!delta_avg) return; /* @@ -3493,19 +3500,25 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf /* Set new sched_entity's runnable */ se->avg.runnable_avg = gcfs_rq->avg.runnable_avg; - se->avg.runnable_sum = se->avg.runnable_avg * divider; + new_sum = se->avg.runnable_avg * divider; + delta_sum = (long)new_sum - (long)se->avg.runnable_sum; + se->avg.runnable_sum = new_sum; /* Update parent cfs_rq runnable */ - add_positive(&cfs_rq->avg.runnable_avg, delta); - cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider; + add_positive(&cfs_rq->avg.runnable_avg, delta_avg); + add_positive(&cfs_rq->avg.runnable_sum, delta_sum); + /* See update_cfs_rq_load_avg() */ + cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum, + cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER); } static inline void update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { - long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum; + long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum; unsigned long load_avg; u64 load_sum = 0; + s64 delta_sum; u32 divider; if (!runnable_sum) @@ -3532,7 +3545,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq * assuming all tasks are equally runnable. */ if (scale_load_down(gcfs_rq->load.weight)) { - load_sum = div_s64(gcfs_rq->avg.load_sum, + load_sum = div_u64(gcfs_rq->avg.load_sum, scale_load_down(gcfs_rq->load.weight)); } @@ -3549,19 +3562,22 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT; runnable_sum = max(runnable_sum, running_sum); - load_sum = (s64)se_weight(se) * runnable_sum; - load_avg = div_s64(load_sum, divider); - - se->avg.load_sum = runnable_sum; + load_sum = se_weight(se) * runnable_sum; + load_avg = div_u64(load_sum, divider); - delta = load_avg - se->avg.load_avg; - if (!delta) + delta_avg = load_avg - se->avg.load_avg; + if (!delta_avg) return; - se->avg.load_avg = load_avg; + delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum; - add_positive(&cfs_rq->avg.load_avg, delta); - cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider; + se->avg.load_sum = runnable_sum; + se->avg.load_avg = load_avg; + add_positive(&cfs_rq->avg.load_avg, delta_avg); + add_positive(&cfs_rq->avg.load_sum, delta_sum); + /* See update_cfs_rq_load_avg() */ + cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum, + cfs_rq->avg.load_avg * PELT_MIN_DIVIDER); } static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum) @@ -3652,7 +3668,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum * * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example. * - * Returns true if the load decayed or we removed load. + * Return: true if the load decayed or we removed load. * * Since both these conditions indicate a changed cfs_rq->avg.load we should * call update_tg_load_avg() when this function returns true. @@ -3677,15 +3693,32 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) r = removed_load; sub_positive(&sa->load_avg, r); - sa->load_sum = sa->load_avg * divider; + sub_positive(&sa->load_sum, r * divider); + /* See sa->util_sum below */ + sa->load_sum = max_t(u32, sa->load_sum, sa->load_avg * PELT_MIN_DIVIDER); r = removed_util; sub_positive(&sa->util_avg, r); - sa->util_sum = sa->util_avg * divider; + sub_positive(&sa->util_sum, r * divider); + /* + * Because of rounding, se->util_sum might ends up being +1 more than + * cfs->util_sum. Although this is not a problem by itself, detaching + * a lot of tasks with the rounding problem between 2 updates of + * util_avg (~1ms) can make cfs->util_sum becoming null whereas + * cfs_util_avg is not. + * Check that util_sum is still above its lower bound for the new + * util_avg. Given that period_contrib might have moved since the last + * sync, we are only sure that util_sum must be above or equal to + * util_avg * minimum possible divider + */ + sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * PELT_MIN_DIVIDER); r = removed_runnable; sub_positive(&sa->runnable_avg, r); - sa->runnable_sum = sa->runnable_avg * divider; + sub_positive(&sa->runnable_sum, r * divider); + /* See sa->util_sum above */ + sa->runnable_sum = max_t(u32, sa->runnable_sum, + sa->runnable_avg * PELT_MIN_DIVIDER); /* * removed_runnable is the unweighted version of removed_load so we @@ -3772,17 +3805,18 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s */ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { - /* - * cfs_rq->avg.period_contrib can be used for both cfs_rq and se. - * See ___update_load_avg() for details. - */ - u32 divider = get_pelt_divider(&cfs_rq->avg); - dequeue_load_avg(cfs_rq, se); sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg); - cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider; + sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum); + /* See update_cfs_rq_load_avg() */ + cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum, + cfs_rq->avg.util_avg * PELT_MIN_DIVIDER); + sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg); - cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider; + sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum); + /* See update_cfs_rq_load_avg() */ + cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum, + cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER); add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum); @@ -8539,6 +8573,8 @@ group_type group_classify(unsigned int imbalance_pct, * * If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings * of @dst_cpu are idle and @sg has lower priority. + * + * Return: true if @dst_cpu can pull tasks, false otherwise. */ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds, struct sg_lb_stats *sgs, @@ -8614,6 +8650,7 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs /** * update_sg_lb_stats - Update sched_group's statistics for load balancing. * @env: The load balancing environment. + * @sds: Load-balancing data with statistics of the local group. * @group: sched_group whose statistics are to be updated. * @sgs: variable to hold the statistics for this group. * @sg_status: Holds flag indicating the status of the sched_group @@ -9421,12 +9458,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s /** * find_busiest_group - Returns the busiest group within the sched_domain * if there is an imbalance. + * @env: The load balancing environment. * * Also calculates the amount of runnable load which should be moved * to restore balance. * - * @env: The load balancing environment. - * * Return: - The busiest group if imbalance exists. */ static struct sched_group *find_busiest_group(struct lb_env *env) diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index e06071bf3472..c336f5f481bc 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -37,9 +37,11 @@ update_irq_load_avg(struct rq *rq, u64 running) } #endif +#define PELT_MIN_DIVIDER (LOAD_AVG_MAX - 1024) + static inline u32 get_pelt_divider(struct sched_avg *avg) { - return LOAD_AVG_MAX - 1024 + avg->period_contrib; + return PELT_MIN_DIVIDER + avg->period_contrib; } static inline void cfs_se_util_change(struct sched_avg *avg) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index a679613a7cb7..c137c4d6983e 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1162,7 +1162,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, t->event = 0; t->last_event_time = 0; init_waitqueue_head(&t->event_wait); - kref_init(&t->refcount); mutex_lock(&group->trigger_lock); @@ -1191,15 +1190,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, return t; } -static void psi_trigger_destroy(struct kref *ref) +void psi_trigger_destroy(struct psi_trigger *t) { - struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount); - struct psi_group *group = t->group; + struct psi_group *group; struct task_struct *task_to_destroy = NULL; - if (static_branch_likely(&psi_disabled)) + /* + * We do not check psi_disabled since it might have been disabled after + * the trigger got created. + */ + if (!t) return; + group = t->group; /* * Wakeup waiters to stop polling. Can happen if cgroup is deleted * from under a polling process. @@ -1235,9 +1238,9 @@ static void psi_trigger_destroy(struct kref *ref) mutex_unlock(&group->trigger_lock); /* - * Wait for both *trigger_ptr from psi_trigger_replace and - * poll_task RCUs to complete their read-side critical sections - * before destroying the trigger and optionally the poll_task + * Wait for psi_schedule_poll_work RCU to complete its read-side + * critical section before destroying the trigger and optionally the + * poll_task. */ synchronize_rcu(); /* @@ -1254,18 +1257,6 @@ static void psi_trigger_destroy(struct kref *ref) kfree(t); } -void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new) -{ - struct psi_trigger *old = *trigger_ptr; - - if (static_branch_likely(&psi_disabled)) - return; - - rcu_assign_pointer(*trigger_ptr, new); - if (old) - kref_put(&old->refcount, psi_trigger_destroy); -} - __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait) { @@ -1275,24 +1266,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr, if (static_branch_likely(&psi_disabled)) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - rcu_read_lock(); - - t = rcu_dereference(*(void __rcu __force **)trigger_ptr); - if (!t) { - rcu_read_unlock(); + t = smp_load_acquire(trigger_ptr); + if (!t) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - } - kref_get(&t->refcount); - - rcu_read_unlock(); poll_wait(file, &t->event_wait, wait); if (cmpxchg(&t->event, 1, 0) == 1) ret |= EPOLLPRI; - kref_put(&t->refcount, psi_trigger_destroy); - return ret; } @@ -1316,14 +1298,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, buf[buf_size - 1] = '\0'; - new = psi_trigger_create(&psi_system, buf, nbytes, res); - if (IS_ERR(new)) - return PTR_ERR(new); - seq = file->private_data; + /* Take seq->lock to protect seq->private from concurrent writes */ mutex_lock(&seq->lock); - psi_trigger_replace(&seq->private, new); + + /* Allow only one trigger per file descriptor */ + if (seq->private) { + mutex_unlock(&seq->lock); + return -EBUSY; + } + + new = psi_trigger_create(&psi_system, buf, nbytes, res); + if (IS_ERR(new)) { + mutex_unlock(&seq->lock); + return PTR_ERR(new); + } + + smp_store_release(&seq->private, new); mutex_unlock(&seq->lock); return nbytes; @@ -1358,7 +1350,7 @@ static int psi_fop_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; - psi_trigger_replace(&seq->private, NULL); + psi_trigger_destroy(seq->private); return single_release(inode, file); } diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 752ed89a293b..a5eb5e7fd624 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -70,10 +70,16 @@ config HAVE_C_RECORDMCOUNT help C version of recordmcount available? +config HAVE_BUILDTIME_MCOUNT_SORT + bool + help + An architecture selects this if it sorts the mcount_loc section + at build time. + config BUILDTIME_MCOUNT_SORT bool default y - depends on BUILDTIME_TABLE_SORT && !S390 + depends on HAVE_BUILDTIME_MCOUNT_SORT && DYNAMIC_FTRACE help Sort the mcount_loc section at build time. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a569a0cb81ee..c860f582b078 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7740,7 +7740,8 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) err = kzalloc(sizeof(*err), GFP_KERNEL); if (!err) err = ERR_PTR(-ENOMEM); - tr->n_err_log_entries++; + else + tr->n_err_log_entries++; return err; } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 5e6a988a8a51..ada87bfb5bb8 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2503,6 +2503,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); expr->fn = hist_field_unary_minus; expr->operands[0] = operand1; + expr->size = operand1->size; + expr->is_signed = operand1->is_signed; expr->operator = FIELD_OP_UNARY_MINUS; expr->name = expr_str(expr, 0); expr->type = kstrdup_const(operand1->type, GFP_KERNEL); @@ -2719,6 +2721,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, /* The operand sizes should be the same, so just pick one */ expr->size = operand1->size; + expr->is_signed = operand1->is_signed; expr->operator = field_op; expr->type = kstrdup_const(operand1->type, GFP_KERNEL); @@ -3935,6 +3938,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data, var_ref_idx = find_var_ref_idx(hist_data, var_ref); if (WARN_ON(var_ref_idx < 0)) { + kfree(p); ret = var_ref_idx; goto err; } @@ -6163,7 +6167,9 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, lockdep_assert_held(&event_mutex); - if (glob && strlen(glob)) { + WARN_ON(!glob); + + if (strlen(glob)) { hist_err_clear(); last_cmd_set(file, param); } @@ -6196,7 +6202,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, continue; } break; - } while (p); + } while (1); if (!p) param = NULL; diff --git a/kernel/ucount.c b/kernel/ucount.c index 7b32c356ebc5..65b597431c86 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -190,6 +190,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) kfree(new); } else { hlist_add_head(&new->node, hashent); + get_user_ns(new->ns); spin_unlock_irq(&ucounts_lock); return new; } @@ -210,6 +211,7 @@ void put_ucounts(struct ucounts *ucounts) if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) { hlist_del_init(&ucounts->node); spin_unlock_irqrestore(&ucounts_lock, flags); + put_user_ns(ucounts->ns); kfree(ucounts); } } diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 6220fa67fb7e..09d293c30fd2 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -488,9 +488,13 @@ void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, unsigned int users) { unsigned int wake_batch; + unsigned int min_batch; + unsigned int depth = (sbq->sb.depth + users - 1) / users; - wake_batch = clamp_val((sbq->sb.depth + users - 1) / - users, 4, SBQ_WAKE_BATCH); + min_batch = sbq->sb.depth >= (4 * SBQ_WAIT_QUEUES) ? 4 : 1; + + wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, + min_batch, SBQ_WAKE_BATCH); __sbitmap_queue_update_wake_batch(sbq, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 84ba456a78cc..1402d5ca242d 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -560,10 +560,10 @@ static bool __allowed_ingress(const struct net_bridge *br, !br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { if (*state == BR_STATE_FORWARDING) { *state = br_vlan_get_pvid_state(vg); - return br_vlan_state_allowed(*state, true); - } else { - return true; + if (!br_vlan_state_allowed(*state, true)) + goto drop; } + return true; } } v = br_vlan_find(vg, *vid); @@ -2020,7 +2020,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) goto out_err; } err = br_vlan_dump_dev(dev, skb, cb, dump_flags); - if (err && err != -EMSGSIZE) + /* if the dump completed without an error we return 0 here */ + if (err != -EMSGSIZE) goto out_err; } else { for_each_netdev_rcu(net, dev) { diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index d8b9dbabd4a4..88cc0ad7d386 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -190,12 +190,23 @@ static const struct seq_operations softnet_seq_ops = { .show = softnet_seq_show, }; -static void *ptype_get_idx(loff_t pos) +static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { + struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; + struct net_device *dev; loff_t i = 0; int t; + for_each_netdev_rcu(seq_file_net(seq), dev) { + ptype_list = &dev->ptype_all; + list_for_each_entry_rcu(pt, ptype_list, list) { + if (i == pos) + return pt; + ++i; + } + } + list_for_each_entry_rcu(pt, &ptype_all, list) { if (i == pos) return pt; @@ -216,22 +227,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; + return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net_device *dev; struct packet_type *pt; struct list_head *nxt; int hash; ++*pos; if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); + return ptype_get_idx(seq, 0); pt = v; nxt = pt->list.next; + if (pt->dev) { + if (nxt != &pt->dev->ptype_all) + goto found; + + dev = pt->dev; + for_each_netdev_continue_rcu(seq_file_net(seq), dev) { + if (!list_empty(&dev->ptype_all)) { + nxt = dev->ptype_all.next; + goto found; + } + } + + nxt = ptype_all.next; + goto ptype_all; + } + if (pt->type == htons(ETH_P_ALL)) { +ptype_all: if (nxt != &ptype_all) goto found; hash = 0; @@ -260,7 +289,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { + else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && + (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 57c1d8431386..139cec29ed06 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -162,12 +162,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - if (ip_dont_fragment(sk, &rt->dst)) { + /* Do not bother generating IPID for small packets (eg SYNACK) */ + if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) { iph->frag_off = htons(IP_DF); iph->id = 0; } else { iph->frag_off = 0; - __ip_select_ident(net, iph, 1); + /* TCP packets here are SYNACK with fat IPv4/TCP options. + * Avoid using the hashed IP ident generator. + */ + if (sk->sk_protocol == IPPROTO_TCP) + iph->id = (__force __be16)prandom_u32(); + else + __ip_select_ident(net, iph, 1); } if (opt && opt->opt.optlen) { @@ -825,15 +832,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, /* Everything is OK. Generate! */ ip_fraglist_init(skb, iph, hlen, &iter); - if (iter.frag) - ip_options_fragment(iter.frag); - for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (iter.frag) { + bool first_frag = (iter.offset == 0); + IPCB(iter.frag)->flags = IPCB(skb)->flags; ip_fraglist_prepare(skb, &iter); + if (first_frag && IPCB(skb)->opt.optlen) { + /* ipcb->opt is not populated for frags + * coming from __ip_make_skb(), + * ip_options_fragment() needs optlen + */ + IPCB(iter.frag)->opt.optlen = + IPCB(skb)->opt.optlen; + ip_options_fragment(iter.frag); + ip_send_check(iter.iph); + } } skb->tstamp = tstamp; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 0e56df3a45e2..bcf7bc71cb56 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -220,7 +220,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) continue; } - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != inet_sdif(skb)) continue; sock_hold(sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9eb5fc247868..9f97b9cbf7b3 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -722,6 +722,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret = -EINVAL; int chk_addr_ret; + lock_sock(sk); if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; @@ -741,7 +742,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ sk_dst_reset(sk); ret = 0; -out: return ret; +out: + release_sock(sk); + return ret; } /* diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3b75836db19b..78e81465f5f3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -842,6 +842,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } release_sock(sk); + sk_defer_free_flush(sk); if (spliced) return spliced; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b53476e78c84..fec656f5a39e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2095,7 +2095,7 @@ process: nf_reset_ct(skb); if (tcp_filter(sk, skb)) { - drop_reason = SKB_DROP_REASON_TCP_FILTER; + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto discard_and_relse; } th = (const struct tcphdr *)skb->data; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3eee17790a82..f927c199a93c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2589,7 +2589,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, __u32 valid_lft, u32 prefered_lft) { struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1); - int create = 0; + int create = 0, update_lft = 0; if (!ifp && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -2633,19 +2633,32 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, unsigned long now; u32 stored_lft; - /* Update lifetime (RFC4862 5.5.3 e) - * We deviate from RFC4862 by honoring all Valid Lifetimes to - * improve the reaction of SLAAC to renumbering events - * (draft-gont-6man-slaac-renum-06, Section 4.2) - */ + /* update lifetime (RFC2462 5.5.3 e) */ spin_lock_bh(&ifp->lock); now = jiffies; if (ifp->valid_lft > (now - ifp->tstamp) / HZ) stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!create && stored_lft) { + const u32 minimum_lft = min_t(u32, + stored_lft, MIN_VALID_LIFETIME); + valid_lft = max(valid_lft, minimum_lft); + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = 1; + } + + if (update_lft) { ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = now; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 463c37dea449..413f66781e50 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -112,7 +112,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i) fn = rcu_dereference_protected(f6i->fib6_node, lockdep_is_held(&f6i->fib6_table->tb6_lock)); if (fn) - fn->fn_sernum = fib6_new_sernum(net); + WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net)); } /* @@ -590,12 +590,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, spin_unlock_bh(&table->tb6_lock); if (res > 0) { cb->args[4] = 1; - cb->args[5] = w->root->fn_sernum; + cb->args[5] = READ_ONCE(w->root->fn_sernum); } } else { - if (cb->args[5] != w->root->fn_sernum) { + int sernum = READ_ONCE(w->root->fn_sernum); + if (cb->args[5] != sernum) { /* Begin at the root if the tree changed */ - cb->args[5] = w->root->fn_sernum; + cb->args[5] = sernum; w->state = FWS_INIT; w->node = w->root; w->skip = w->count; @@ -1345,7 +1346,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt, /* paired with smp_rmb() in fib6_get_cookie_safe() */ smp_wmb(); while (fn) { - fn->fn_sernum = sernum; + WRITE_ONCE(fn->fn_sernum, sernum); fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&rt->fib6_table->tb6_lock)); } @@ -2174,8 +2175,8 @@ static int fib6_clean_node(struct fib6_walker *w) }; if (c->sernum != FIB6_NO_SERNUM_CHANGE && - w->node->fn_sernum != c->sernum) - w->node->fn_sernum = c->sernum; + READ_ONCE(w->node->fn_sernum) != c->sernum) + WRITE_ONCE(w->node->fn_sernum, c->sernum); if (!c->func) { WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE); @@ -2543,7 +2544,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter, iter->w.state = FWS_INIT; iter->w.node = iter->w.root; iter->w.args = iter; - iter->sernum = iter->w.root->fn_sernum; + iter->sernum = READ_ONCE(iter->w.root->fn_sernum); INIT_LIST_HEAD(&iter->w.lh); fib6_walker_link(net, &iter->w); } @@ -2571,8 +2572,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) { - if (iter->sernum != iter->w.root->fn_sernum) { - iter->sernum = iter->w.root->fn_sernum; + int sernum = READ_ONCE(iter->w.root->fn_sernum); + + if (iter->sernum != sernum) { + iter->sernum = sernum; iter->w.state = FWS_INIT; iter->w.node = iter->w.root; WARN_ON(iter->w.skip); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fe786df4f849..97ade833f58c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1036,14 +1036,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Local address not yet configured!\n", - p->name); + pr_warn_ratelimited("%s xmit: Local address not yet configured!\n", + p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, true, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", - p->name); + pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n", + p->name); else ret = 1; rcu_read_unlock(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e6de94203c13..f4884cda13b9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2802,7 +2802,7 @@ static void ip6_link_failure(struct sk_buff *skb) if (from) { fn = rcu_dereference(from->fib6_node); if (fn && (rt->rt6i_flags & RTF_DEFAULT)) - fn->fn_sernum = -1; + WRITE_ONCE(fn->fn_sernum, -1); } } rcu_read_unlock(); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 75af1f701e1d..782b1d452269 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -478,6 +478,20 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) return NULL; } +static struct mptcp_pm_addr_entry * +__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, + bool lookup_by_id) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, &pernet->local_addr_list, list) { + if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) || + (lookup_by_id && entry->addr.id == info->id)) + return entry; + } + return NULL; +} + static int lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr) { @@ -777,7 +791,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, removed = true; __MPTCP_INC_STATS(sock_net(sk), rm_type); } - __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap); + __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap); if (!removed) continue; @@ -1763,18 +1777,21 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } - list_for_each_entry(entry, &pernet->local_addr_list, list) { - if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) || - (lookup_by_id && entry->addr.id == addr.addr.id)) { - mptcp_nl_addr_backup(net, &entry->addr, bkup); - - if (bkup) - entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; - else - entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; - } + spin_lock_bh(&pernet->lock); + entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); + if (!entry) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; } + if (bkup) + entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else + entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + addr = *entry; + spin_unlock_bh(&pernet->lock); + + mptcp_nl_addr_backup(net, &addr.addr, bkup); return 0; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0e6b42c76ea0..85317ce38e3f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -408,7 +408,7 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); struct mptcp_subflow_context { struct list_head node;/* conn_list of subflows */ - char reset_start[0]; + struct_group(reset, unsigned long avg_pacing_rate; /* protected by msk socket lock */ u64 local_key; @@ -458,7 +458,7 @@ struct mptcp_subflow_context { long delegated_status; - char reset_end[0]; + ); struct list_head delegated_node; /* link into delegated_action, protected by local BH */ @@ -494,7 +494,7 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) static inline void mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { - memset(subflow->reset_start, 0, subflow->reset_end - subflow->reset_start); + memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 894a325d39f2..d6aa5b47031e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1924,15 +1924,17 @@ repeat: pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_ct_put(ct); skb->_nfct = 0; - NF_CT_STAT_INC_ATOMIC(state->net, invalid); - if (ret == -NF_DROP) - NF_CT_STAT_INC_ATOMIC(state->net, drop); /* Special case: TCP tracker reports an attempt to reopen a * closed/aborted connection. We have to go back and create a * fresh conntrack. */ if (ret == -NF_REPEAT) goto repeat; + + NF_CT_STAT_INC_ATOMIC(state->net, invalid); + if (ret == -NF_DROP) + NF_CT_STAT_INC_ATOMIC(state->net, drop); + ret = -ret; goto out; } diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 7f19ee259609..55415f011943 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -20,13 +20,14 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> +#define HELPER_NAME "netbios-ns" #define NMBD_PORT 137 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_netbios_ns"); -MODULE_ALIAS_NFCT_HELPER("netbios_ns"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); static unsigned int timeout __read_mostly = 3; module_param(timeout, uint, 0400); @@ -44,7 +45,7 @@ static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper helper __read_mostly = { - .name = "netbios-ns", + .name = HELPER_NAME, .tuple.src.l3num = NFPROTO_IPV4, .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), .tuple.dst.protonum = IPPROTO_UDP, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 77938b1042f3..cf454f8ca2b0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8264,14 +8264,12 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha void *data, *data_boundary; struct nft_rule_dp *prule; struct nft_rule *rule; - int i; /* already handled or inactive chain? */ if (chain->blob_next || !nft_is_active_next(net, chain)) return 0; rule = list_entry(&chain->rules, struct nft_rule, list); - i = 0; data_size = 0; list_for_each_entry_continue(rule, &chain->rules, list) { @@ -8301,7 +8299,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha return -ENOMEM; size = 0; - track.last = last; + track.last = nft_expr_last(rule); nft_rule_for_each_expr(expr, last, rule) { track.cur = expr; diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 7d00a1452b1d..3362417ebfdb 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -62,6 +62,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx, { bool invert = false; u32 flags, limit; + int err; if (!tb[NFTA_CONNLIMIT_COUNT]) return -EINVAL; @@ -84,7 +85,15 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx, priv->limit = limit; priv->invert = invert; - return nf_ct_netns_get(ctx->net, ctx->family); + err = nf_ct_netns_get(ctx->net, ctx->family); + if (err < 0) + goto err_netns; + + return 0; +err_netns: + kfree(priv->list); + + return err; } static void nft_connlimit_do_destroy(const struct nft_ctx *ctx, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5bd409ab4cc2..85ea7ddb48db 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1774,6 +1774,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; match->max_num_members = args->max_num_members; list_add(&match->list, &fanout_list); @@ -3353,6 +3354,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; + po->prot_hook.af_packet_net = sock_net(sk); if (proto) { po->prot_hook.type = proto; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6be2672a65ea..df864e692267 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct sk_buff *skb; - unsigned long resend_at, rto_j; + unsigned long resend_at; rxrpc_seq_t cursor, seq, top; ktime_t now, max_age, oldest, ack_ts; int ix; @@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - rto_j = call->peer->rto_j; - now = ktime_get_real(); - max_age = ktime_sub(now, jiffies_to_usecs(rto_j)); + max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j)); spin_lock_bh(&call->lock); @@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) } resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rto_j; + resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans); WRITE_ONCE(call->resend_at, resend_at); if (unacked) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 10f2bf2e9068..a45c83f22236 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -468,7 +468,7 @@ done: if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; - ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans); + ack_lost_at = rxrpc_get_rto_backoff(call->peer, false); ack_lost_at += nowj; WRITE_ONCE(call->ack_lost_at, ack_lost_at); rxrpc_reduce_call_timer(call, ack_lost_at, nowj, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2cb496c84878..179825a3b2fd 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1204,7 +1204,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, err = -ENOENT; if (!ops) { - NL_SET_ERR_MSG(extack, "Specified qdisc not found"); + NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9267922ea9c3..23a9d6242429 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1810,6 +1810,26 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; + if (q->offload) { + /* Options not supported by the offload. */ + if (hopt->rate.overhead || hopt->ceil.overhead) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter"); + goto failure; + } + if (hopt->rate.mpu || hopt->ceil.mpu) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter"); + goto failure; + } + if (hopt->quantum) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter"); + goto failure; + } + if (hopt->prio) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter"); + goto failure; + } + } + /* Keeping backward compatible with rate_table based iproute2 tc */ if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB], diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 961854e56736..d5ea62b82bb8 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -566,12 +566,17 @@ static void smc_stat_fallback(struct smc_sock *smc) mutex_unlock(&net->smc.mutex_fback_rsn); } -static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code) +static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) { wait_queue_head_t *smc_wait = sk_sleep(&smc->sk); - wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk); + wait_queue_head_t *clc_wait; unsigned long flags; + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } smc->use_fallback = true; smc->fallback_rsn = reason_code; smc_stat_fallback(smc); @@ -586,18 +591,30 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code) * smc socket->wq, which should be removed * to clcsocket->wq during the fallback. */ + clc_wait = sk_sleep(smc->clcsock->sk); spin_lock_irqsave(&smc_wait->lock, flags); spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING); list_splice_init(&smc_wait->head, &clc_wait->head); spin_unlock(&clc_wait->lock); spin_unlock_irqrestore(&smc_wait->lock, flags); } + mutex_unlock(&smc->clcsock_release_lock); + return 0; } /* fall back during connect */ static int smc_connect_fallback(struct smc_sock *smc, int reason_code) { - smc_switch_to_fallback(smc, reason_code); + struct net *net = sock_net(&smc->sk); + int rc = 0; + + rc = smc_switch_to_fallback(smc, reason_code); + if (rc) { /* fallback fails */ + this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt); + if (smc->sk.sk_state == SMC_INIT) + sock_put(&smc->sk); /* passive closing */ + return rc; + } smc_copy_sock_settings_to_clc(smc); smc->connect_nonblock = 0; if (smc->sk.sk_state == SMC_INIT) @@ -1518,11 +1535,12 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, { /* RDMA setup failed, switch back to TCP */ smc_conn_abort(new_smc, local_first); - if (reason_code < 0) { /* error, no fallback possible */ + if (reason_code < 0 || + smc_switch_to_fallback(new_smc, reason_code)) { + /* error, no fallback possible */ smc_listen_out_err(new_smc); return; } - smc_switch_to_fallback(new_smc, reason_code); if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { if (smc_clc_send_decline(new_smc, reason_code, version) < 0) { smc_listen_out_err(new_smc); @@ -1964,8 +1982,11 @@ static void smc_listen_work(struct work_struct *work) /* check if peer is smc capable */ if (!tcp_sk(newclcsock->sk)->syn_smc) { - smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC); - smc_listen_out_connected(new_smc); + rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC); + if (rc) + smc_listen_out_err(new_smc); + else + smc_listen_out_connected(new_smc); return; } @@ -2254,7 +2275,9 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_FASTOPEN) { if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { - smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + if (rc) + goto out; } else { rc = -EINVAL; goto out; @@ -2447,6 +2470,11 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, /* generic setsockopts reaching us here always apply to the * CLC socket */ + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } if (unlikely(!smc->clcsock->ops->setsockopt)) rc = -EOPNOTSUPP; else @@ -2456,6 +2484,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, sk->sk_err = smc->clcsock->sk->sk_err; sk_error_report(sk); } + mutex_unlock(&smc->clcsock_release_lock); if (optlen < sizeof(int)) return -EINVAL; @@ -2472,7 +2501,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, case TCP_FASTOPEN_NO_COOKIE: /* option not supported by SMC */ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { - smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); } else { rc = -EINVAL; } @@ -2515,13 +2544,23 @@ static int smc_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct smc_sock *smc; + int rc; smc = smc_sk(sock->sk); + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } /* socket options apply to the CLC socket */ - if (unlikely(!smc->clcsock->ops->getsockopt)) + if (unlikely(!smc->clcsock->ops->getsockopt)) { + mutex_unlock(&smc->clcsock_release_lock); return -EOPNOTSUPP; - return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, - optval, optlen); + } + rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, + optval, optlen); + mutex_unlock(&smc->clcsock_release_lock); + return rc; } static int smc_ioctl(struct socket *sock, unsigned int cmd, diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index fe97f3106536..4a4082bb22ad 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -222,10 +222,8 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size, if (ret) return ret; - if (!ret) { - *buf_in = buf; - *body_size = toksize; - } + *buf_in = buf; + *body_size = toksize; return ret; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a312ea2bc440..c83fe618767c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2900,7 +2900,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, unsigned long connect_timeout; unsigned long reconnect_timeout; unsigned char resvport, reuseport; - int ret = 0; + int ret = 0, ident; rcu_read_lock(); xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); @@ -2914,8 +2914,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, reuseport = xprt->reuseport; connect_timeout = xprt->connect_timeout; reconnect_timeout = xprt->max_reconnect_timeout; + ident = xprt->xprt_class->ident; rcu_read_unlock(); + if (!xprtargs->ident) + xprtargs->ident = ident; xprt = xprt_create_transport(xprtargs); if (IS_ERR(xprt)) { ret = PTR_ERR(xprt); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ee5336d73fdd..35588f0afa86 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -600,9 +600,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_rmdir(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } @@ -613,9 +613,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_unlink(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_unlink(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 2766dd21935b..b64a0286b182 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -295,8 +295,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, online = 1; else if (!strncmp(buf, "remove", 6)) remove = 1; - else - return -EINVAL; + else { + count = -EINVAL; + goto out_put; + } if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) { count = -EINTR; @@ -307,25 +309,28 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, goto release_tasks; } if (offline) { - set_bit(XPRT_OFFLINE, &xprt->state); - spin_lock(&xps->xps_lock); - xps->xps_nactive--; - spin_unlock(&xps->xps_lock); + if (!test_and_set_bit(XPRT_OFFLINE, &xprt->state)) { + spin_lock(&xps->xps_lock); + xps->xps_nactive--; + spin_unlock(&xps->xps_lock); + } } else if (online) { - clear_bit(XPRT_OFFLINE, &xprt->state); - spin_lock(&xps->xps_lock); - xps->xps_nactive++; - spin_unlock(&xps->xps_lock); + if (test_and_clear_bit(XPRT_OFFLINE, &xprt->state)) { + spin_lock(&xps->xps_lock); + xps->xps_nactive++; + spin_unlock(&xps->xps_lock); + } } else if (remove) { if (test_bit(XPRT_OFFLINE, &xprt->state)) { - set_bit(XPRT_REMOVE, &xprt->state); - xprt_force_disconnect(xprt); - if (test_bit(XPRT_CONNECTED, &xprt->state)) { - if (!xprt->sending.qlen && - !xprt->pending.qlen && - !xprt->backlog.qlen && - !atomic_long_read(&xprt->queuelen)) - rpc_xprt_switch_remove_xprt(xps, xprt); + if (!test_and_set_bit(XPRT_REMOVE, &xprt->state)) { + xprt_force_disconnect(xprt); + if (test_bit(XPRT_CONNECTED, &xprt->state)) { + if (!xprt->sending.qlen && + !xprt->pending.qlen && + !xprt->backlog.qlen && + !atomic_long_read(&xprt->queuelen)) + rpc_xprt_switch_remove_xprt(xps, xprt); + } } } else { count = -EINVAL; @@ -422,6 +427,7 @@ static struct attribute *rpc_sysfs_xprt_attrs[] = { &rpc_sysfs_xprt_change_state.attr, NULL, }; +ATTRIBUTE_GROUPS(rpc_sysfs_xprt); static struct kobj_attribute rpc_sysfs_xprt_switch_info = __ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL); @@ -430,6 +436,7 @@ static struct attribute *rpc_sysfs_xprt_switch_attrs[] = { &rpc_sysfs_xprt_switch_info.attr, NULL, }; +ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch); static struct kobj_type rpc_sysfs_client_type = { .release = rpc_sysfs_client_release, @@ -439,14 +446,14 @@ static struct kobj_type rpc_sysfs_client_type = { static struct kobj_type rpc_sysfs_xprt_switch_type = { .release = rpc_sysfs_xprt_switch_release, - .default_attrs = rpc_sysfs_xprt_switch_attrs, + .default_groups = rpc_sysfs_xprt_switch_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = rpc_sysfs_xprt_switch_namespace, }; static struct kobj_type rpc_sysfs_xprt_type = { .release = rpc_sysfs_xprt_release, - .default_attrs = rpc_sysfs_xprt_attrs, + .default_groups = rpc_sysfs_xprt_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = rpc_sysfs_xprt_namespace, }; diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 17f174d6ea3b..faba7136dd9a 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -13,10 +13,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - #undef RPCRDMA_BACKCHANNEL_DEBUG /** diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index ff699307e820..515dd7a66a04 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -45,10 +45,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - static void frwr_cid_init(struct rpcrdma_ep *ep, struct rpcrdma_mr *mr) { diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 8035a983c8ce..281ddb87ac8d 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -54,10 +54,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - /* Returns size of largest RPC-over-RDMA header in a Call message * * The largest Call header contains a full-size Read list and a diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 16e5696314a4..42e375dbdadb 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -60,10 +60,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - /* * tunables */ diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3d3673ba9e1e..f172d1298013 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -63,17 +63,6 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -/* - * Globals/Macros - */ - -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - -/* - * internal functions - */ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, @@ -274,8 +263,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ep->re_connect_status = -ENETUNREACH; goto wake_connect_worker; case RDMA_CM_EVENT_REJECTED: - dprintk("rpcrdma: connection to %pISpc rejected: %s\n", - sap, rdma_reject_msg(id, event->status)); ep->re_connect_status = -ECONNREFUSED; if (event->status == IB_CM_REJ_STALE_CONN) ep->re_connect_status = -ENOTCONN; @@ -291,8 +278,6 @@ disconnected: break; } - dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap, - ep->re_id->device->name, rdma_event_msg(event->event)); return 0; } @@ -419,14 +404,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->re_attr.qp_type = IB_QPT_RC; ep->re_attr.port_num = ~0; - dprintk("RPC: %s: requested max: dtos: send %d recv %d; " - "iovs: send %d recv %d\n", - __func__, - ep->re_attr.cap.max_send_wr, - ep->re_attr.cap.max_recv_wr, - ep->re_attr.cap.max_send_sge, - ep->re_attr.cap.max_recv_sge); - ep->re_send_batch = ep->re_max_requests >> 3; ep->re_send_count = ep->re_send_batch; init_waitqueue_head(&ep->re_connect_wait); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d8ee06a9650a..69b6ee5a5fd1 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1910,7 +1910,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); int ret; - if (RPC_IS_ASYNC(task)) { + if (RPC_IS_ASYNC(task)) { /* * We want the AF_LOCAL connect to be resolved in the * filesystem namespace of the process making the rpc diff --git a/scripts/Makefile b/scripts/Makefile index ecd3acacd0ec..ce5aa9030b74 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -25,7 +25,7 @@ HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED endif -ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_BUILDTIME_MCOUNT_SORT HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED endif diff --git a/security/security.c b/security/security.c index 3d4eb474f35b..22261d79f333 100644 --- a/security/security.c +++ b/security/security.c @@ -1048,8 +1048,19 @@ int security_dentry_init_security(struct dentry *dentry, int mode, const char **xattr_name, void **ctx, u32 *ctxlen) { - return call_int_hook(dentry_init_security, -EOPNOTSUPP, dentry, mode, - name, xattr_name, ctx, ctxlen); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.dentry_init_security, list) { + rc = hp->hook.dentry_init_security(dentry, mode, name, + xattr_name, ctx, ctxlen); + if (rc != LSM_RET_DEFAULT(dentry_init_security)) + return rc; + } + return LSM_RET_DEFAULT(dentry_init_security); } EXPORT_SYMBOL(security_dentry_init_security); diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 2da3316bb559..bf6e96011dfe 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -452,6 +452,9 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 +/* attributes for system fd (group 0) */ +#define KVM_X86_XCOMP_GUEST_SUPP 0 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index ab9353f2fd46..9a5c1f008fe6 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -68,7 +68,7 @@ int handle__sched_switch(u64 *ctx) */ struct task_struct *prev = (struct task_struct *)ctx[1]; struct task_struct *next = (struct task_struct *)ctx[2]; - struct event event = {}; + struct runq_event event = {}; u64 *tsp, delta_us; long state; u32 pid; diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c index 2414cc764461..d78f4148597f 100644 --- a/tools/bpf/runqslower/runqslower.c +++ b/tools/bpf/runqslower/runqslower.c @@ -100,7 +100,7 @@ static int bump_memlock_rlimit(void) void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) { - const struct event *e = data; + const struct runq_event *e = data; struct tm *tm; char ts[32]; time_t t; diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h index 9db225425e5f..4f70f07200c2 100644 --- a/tools/bpf/runqslower/runqslower.h +++ b/tools/bpf/runqslower/runqslower.h @@ -4,7 +4,7 @@ #define TASK_COMM_LEN 16 -struct event { +struct runq_event { char task[TASK_COMM_LEN]; __u64 delta_us; pid_t pid; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 9563d294f181..b46bcdb0cab1 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 +#define KVM_CAP_SYS_ATTRIBUTES 209 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index af798b9d232c..a3c1e67441f9 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -261,7 +261,7 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) } ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, - "Set FPSIMD registers via %s\n", type->name); + "Got FPSIMD registers via %s\n", type->name); if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) goto out; @@ -557,7 +557,14 @@ static int do_parent(pid_t child) } /* prctl() flags */ - ptrace_set_get_inherit(child, &vec_types[i]); + if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { + ptrace_set_get_inherit(child, &vec_types[i]); + } else { + ksft_test_result_skip("%s SVE_PT_VL_INHERIT set\n", + vec_types[i].name); + ksft_test_result_skip("%s SVE_PT_VL_INHERIT cleared\n", + vec_types[i].name); + } /* Step through every possible VQ */ for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 81ebf99d6ff0..0e4926bc9a58 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -85,6 +85,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests TEST_GEN_PROGS_x86_64 += x86_64/amx_test +TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 66775de26952..4ed6aa049a91 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -345,7 +345,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, * guest_code - The vCPU's entry point */ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); -void vm_xsave_req_perm(void); bool vm_is_unrestricted_guest(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 423d8a61bd2e..8a470da7b71a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -458,6 +458,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); +void vm_xsave_req_perm(int bit); enum x86_page_size { X86_PAGE_SIZE_4K = 0, diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 8c53f96ab7fe..d8cf851ab119 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -393,13 +393,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, struct kvm_vm *vm; int i; -#ifdef __x86_64__ - /* - * Permission needs to be requested before KVM_SET_CPUID2. - */ - vm_xsave_req_perm(); -#endif - /* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */ if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES) slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 5f9d7e91dc69..9f000dfb5594 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -665,16 +665,31 @@ static bool is_xfd_supported(void) return !!(eax & CPUID_XFD_BIT); } -void vm_xsave_req_perm(void) +void vm_xsave_req_perm(int bit) { - unsigned long bitmask; + int kvm_fd; + u64 bitmask; long rc; + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + kvm_fd = open_kvm_dev_path_or_exit(); + rc = ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); + close(kvm_fd); + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) + exit(KSFT_SKIP); + TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); + if (!(bitmask & (1ULL << bit))) + exit(KSFT_SKIP); if (!is_xfd_supported()) - return; + exit(KSFT_SKIP); + + rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); - rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, - XSTATE_XTILE_DATA_BIT); /* * The older kernel version(<5.15) can't support * ARCH_REQ_XCOMP_GUEST_PERM and directly return. @@ -684,7 +699,7 @@ void vm_xsave_req_perm(void) rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); - TEST_ASSERT(bitmask & XFEATURE_XTILE_MASK, + TEST_ASSERT(bitmask & (1ULL << bit), "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", bitmask); } diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 523c1e99ed64..52a3ef6629e8 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -329,6 +329,8 @@ int main(int argc, char *argv[]) u32 amx_offset; int stage, ret; + vm_xsave_req_perm(XSTATE_XTILE_DATA_BIT); + /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index 2da8eb8e2d96..a626d40fdb48 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -105,7 +105,6 @@ static void guest_code(void *arg) if (cpu_has_svm()) { run_guest(svm->vmcb, svm->vmcb_gpa); - svm->vmcb->save.rip += 3; run_guest(svm->vmcb, svm->vmcb_gpa); } else { vmlaunch(); diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index 8f6997d35816..d9d1d4190126 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -240,11 +240,8 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, *p += sizeof(__u32); } - if (ioam6h->type.bit6) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) - return 1; + if (ioam6h->type.bit6) *p += sizeof(__u32); - } if (ioam6h->type.bit7) { if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 27d0eb9afdca..b8bdbec0cf69 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -75,6 +75,7 @@ init() # let $ns2 reach any $ns1 address from any interface ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i + ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i done } @@ -1476,7 +1477,7 @@ ipv6_tests() reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow chk_join_nr "single subflow IPv6" 1 1 1 @@ -1511,7 +1512,7 @@ ipv6_tests() ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 2 - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow chk_join_nr "remove subflow and signal IPv6" 2 2 2 chk_add_nr 1 1 diff --git a/tools/tracing/Makefile b/tools/tracing/Makefile index 87e0ec48e2e7..95e485f12d97 100644 --- a/tools/tracing/Makefile +++ b/tools/tracing/Makefile @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 include ../scripts/Makefile.include -all: latency +all: latency rtla -clean: latency_clean +clean: latency_clean rtla_clean -install: latency_install +install: latency_install rtla_install latency: $(call descend,latency) @@ -16,4 +16,14 @@ latency_install: latency_clean: $(call descend,latency,clean) -.PHONY: all install clean latency latency_install latency_clean +rtla: + $(call descend,rtla) + +rtla_install: + $(call descend,rtla,install) + +rtla_clean: + $(call descend,rtla,clean) + +.PHONY: all install clean latency latency_install latency_clean \ + rtla rtla_install rtla_clean diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 2d52ff0bff7d..7c39728d08de 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -59,7 +59,7 @@ endif .PHONY: all all: rtla -rtla: $(OBJ) doc +rtla: $(OBJ) $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS) static: $(OBJ) diff --git a/usr/include/Makefile b/usr/include/Makefile index 7be7468c177b..83822c33e9e7 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -28,6 +28,7 @@ no-header-test += linux/am437x-vpfe.h no-header-test += linux/android/binder.h no-header-test += linux/android/binderfs.h no-header-test += linux/coda.h +no-header-test += linux/cyclades.h no-header-test += linux/errqueue.h no-header-test += linux/fsmap.h no-header-test += linux/hdlc/ioctl.h diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 2ad013b8bde9..59b1dd4a549e 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -463,8 +463,8 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) idx = srcu_read_lock(&kvm->irq_srcu); gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) + hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list, + link, srcu_read_lock_held(&kvm->irq_srcu)) if (kian->gsi == gsi) { srcu_read_unlock(&kvm->irq_srcu, idx); return true; @@ -480,8 +480,8 @@ void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) { struct kvm_irq_ack_notifier *kian; - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) + hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list, + link, srcu_read_lock_held(&kvm->irq_srcu)) if (kian->gsi == gsi) kian->irq_acked(kian); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9a20f2299386..58d31da8a2f7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2248,7 +2248,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn return NULL; } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { @@ -2463,9 +2462,8 @@ static int kvm_try_get_pfn(kvm_pfn_t pfn) } static int hva_to_pfn_remapped(struct vm_area_struct *vma, - unsigned long addr, bool *async, - bool write_fault, bool *writable, - kvm_pfn_t *p_pfn) + unsigned long addr, bool write_fault, + bool *writable, kvm_pfn_t *p_pfn) { kvm_pfn_t pfn; pte_t *ptep; @@ -2575,7 +2573,7 @@ retry: if (vma == NULL) pfn = KVM_PFN_ERR_FAULT; else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { - r = hva_to_pfn_remapped(vma, addr, async, write_fault, writable, &pfn); + r = hva_to_pfn_remapped(vma, addr, write_fault, writable, &pfn); if (r == -EAGAIN) goto retry; if (r < 0) |