diff options
176 files changed, 2070 insertions, 1025 deletions
diff --git a/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml b/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml index 876bf90ed96e..ccb6b8dd8e11 100644 --- a/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml +++ b/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml @@ -30,6 +30,16 @@ properties: maxItems: 1 clocks: + minItems: 1 + maxItems: 2 + + clock-names: + minItems: 1 + items: + - const: core + - const: iface + + power-domains: maxItems: 1 operating-points-v2: true @@ -44,6 +54,25 @@ required: additionalProperties: false +allOf: + - if: + properties: + compatible: + contains: + enum: + - qcom,eliza-inline-crypto-engine + - qcom,milos-inline-crypto-engine + + then: + required: + - power-domains + - clock-names + properties: + clocks: + minItems: 2 + clock-names: + minItems: 2 + examples: - | #include <dt-bindings/clock/qcom,sm8550-gcc.h> @@ -52,7 +81,11 @@ examples: compatible = "qcom,sm8550-inline-crypto-engine", "qcom,inline-crypto-engine"; reg = <0x01d88000 0x8000>; - clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>; + clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>, + <&gcc GCC_UFS_PHY_AHB_CLK>; + clock-names = "core", + "iface"; + power-domains = <&gcc UFS_PHY_GDSC>; operating-points-v2 = <&ice_opp_table>; diff --git a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml index b66ae6300faf..65882ff79d8d 100644 --- a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml +++ b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml @@ -84,7 +84,8 @@ properties: This reference is provided for background information only. $ref: /schemas/types.yaml#/definitions/phandle-array items: - - items: + - minItems: 4 + items: - description: Phandle to HSP(High-Speed Peripheral) device - description: Offset of phy control register for internal or external clock selection diff --git a/MAINTAINERS b/MAINTAINERS index 9ec290e38b44..e035a3be797c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6535,7 +6535,7 @@ F: include/linux/blk-cgroup.h CONTROL GROUP - CPUSET M: Waiman Long <longman@redhat.com> -R: Chen Ridong <chenridong@huaweicloud.com> +R: Ridong Chen <ridong.chen@linux.dev> L: cgroups@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git diff --git a/arch/arm/boot/dts/gemini/gemini-sl93512r.dts b/arch/arm/boot/dts/gemini/gemini-sl93512r.dts index 4992ec276de9..341dec9b636a 100644 --- a/arch/arm/boot/dts/gemini/gemini-sl93512r.dts +++ b/arch/arm/boot/dts/gemini/gemini-sl93512r.dts @@ -146,7 +146,7 @@ partitions { compatible = "redboot-fis"; /* Eraseblock at 0xfe0000 */ - fis-index-block = <0x1fc>; + fis-index-block = <0x7f>; }; }; diff --git a/arch/arm/boot/dts/gemini/gemini-sq201.dts b/arch/arm/boot/dts/gemini/gemini-sq201.dts index f8c6f6e5cdea..bfd1e8581ad6 100644 --- a/arch/arm/boot/dts/gemini/gemini-sq201.dts +++ b/arch/arm/boot/dts/gemini/gemini-sq201.dts @@ -134,7 +134,7 @@ partitions { compatible = "redboot-fis"; /* Eraseblock at 0xfe0000 */ - fis-index-block = <0x1fc>; + fis-index-block = <0x7f>; }; }; diff --git a/arch/arm/boot/dts/microchip/sam9x7.dtsi b/arch/arm/boot/dts/microchip/sam9x7.dtsi index d242d7a934d0..c680a5033b6b 100644 --- a/arch/arm/boot/dts/microchip/sam9x7.dtsi +++ b/arch/arm/boot/dts/microchip/sam9x7.dtsi @@ -990,9 +990,9 @@ <62 IRQ_TYPE_LEVEL_HIGH 3>, /* Queue 3 */ <63 IRQ_TYPE_LEVEL_HIGH 3>, /* Queue 4 */ <64 IRQ_TYPE_LEVEL_HIGH 3>; /* Queue 5 */ - clocks = <&pmc PMC_TYPE_PERIPHERAL 24>, <&pmc PMC_TYPE_PERIPHERAL 24>, <&pmc PMC_TYPE_GCK 24>, <&pmc PMC_TYPE_GCK 67>; - clock-names = "hclk", "pclk", "tx_clk", "tsu_clk"; - assigned-clocks = <&pmc PMC_TYPE_GCK 67>; + clocks = <&pmc PMC_TYPE_PERIPHERAL 24>, <&pmc PMC_TYPE_PERIPHERAL 24>, <&pmc PMC_TYPE_GCK 24>; + clock-names = "hclk", "pclk", "tsu_clk"; + assigned-clocks = <&pmc PMC_TYPE_GCK 24>; assigned-clock-rates = <266666666>; status = "disabled"; }; diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c index 201191cf68f3..349e6c54518e 100644 --- a/arch/arm/mach-socfpga/platsmp.c +++ b/arch/arm/mach-socfpga/platsmp.c @@ -78,6 +78,7 @@ static void __init socfpga_smp_prepare_cpus(unsigned int max_cpus) } socfpga_scu_base_addr = of_iomap(np, 0); + of_node_put(np); if (!socfpga_scu_base_addr) return; scu_enable(socfpga_scu_base_addr); diff --git a/arch/arm64/boot/dts/qcom/eliza.dtsi b/arch/arm64/boot/dts/qcom/eliza.dtsi index 4a7a0ac40ce6..7e97361a5dc5 100644 --- a/arch/arm64/boot/dts/qcom/eliza.dtsi +++ b/arch/arm64/boot/dts/qcom/eliza.dtsi @@ -843,7 +843,11 @@ "qcom,inline-crypto-engine"; reg = <0x0 0x01d88000 0x0 0x18000>; - clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>; + clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>, + <&gcc GCC_UFS_PHY_AHB_CLK>; + clock-names = "core", + "iface"; + power-domains = <&gcc GCC_UFS_PHY_GDSC>; }; tcsr_mutex: hwlock@1f40000 { diff --git a/arch/arm64/boot/dts/qcom/glymur.dtsi b/arch/arm64/boot/dts/qcom/glymur.dtsi index f23cf81ddb77..82436984485d 100644 --- a/arch/arm64/boot/dts/qcom/glymur.dtsi +++ b/arch/arm64/boot/dts/qcom/glymur.dtsi @@ -2314,11 +2314,9 @@ clocks = <&gcc GCC_USB3_MP_PHY_AUX_CLK>, <&tcsr TCSR_USB3_0_CLKREF_EN>, - <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_USB3_MP_PHY_COM_AUX_CLK>, <&gcc GCC_USB3_MP_PHY_PIPE_0_CLK>; clock-names = "aux", - "clkref", "ref", "com_aux", "pipe"; @@ -2343,11 +2341,9 @@ clocks = <&gcc GCC_USB3_MP_PHY_AUX_CLK>, <&tcsr TCSR_USB3_1_CLKREF_EN>, - <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_USB3_MP_PHY_COM_AUX_CLK>, <&gcc GCC_USB3_MP_PHY_PIPE_1_CLK>; clock-names = "aux", - "clkref", "ref", "com_aux", "pipe"; @@ -2482,15 +2478,13 @@ reg = <0x0 0x00fde000 0x0 0x8000>; clocks = <&gcc GCC_USB3_SEC_PHY_AUX_CLK>, - <&rpmhcc RPMH_CXO_CLK>, + <&tcsr TCSR_USB4_1_CLKREF_EN>, <&gcc GCC_USB3_SEC_PHY_COM_AUX_CLK>, - <&gcc GCC_USB3_SEC_PHY_PIPE_CLK>, - <&tcsr TCSR_USB4_1_CLKREF_EN>; + <&gcc GCC_USB3_SEC_PHY_PIPE_CLK>; clock-names = "aux", "ref", "com_aux", - "usb3_pipe", - "clkref"; + "usb3_pipe"; power-domains = <&gcc GCC_USB_1_PHY_GDSC>; @@ -3750,15 +3744,13 @@ reg = <0x0 0x088e1000 0x0 0x8000>; clocks = <&gcc GCC_USB3_TERT_PHY_AUX_CLK>, - <&rpmhcc RPMH_CXO_CLK>, + <&tcsr TCSR_USB4_2_CLKREF_EN>, <&gcc GCC_USB3_TERT_PHY_COM_AUX_CLK>, - <&gcc GCC_USB3_TERT_PHY_PIPE_CLK>, - <&tcsr TCSR_USB4_2_CLKREF_EN>; + <&gcc GCC_USB3_TERT_PHY_PIPE_CLK>; clock-names = "aux", "ref", "com_aux", - "usb3_pipe", - "clkref"; + "usb3_pipe"; power-domains = <&gcc GCC_USB_2_PHY_GDSC>; diff --git a/arch/arm64/boot/dts/qcom/milos.dtsi b/arch/arm64/boot/dts/qcom/milos.dtsi index 4a64a98a434b..a6e463f3885d 100644 --- a/arch/arm64/boot/dts/qcom/milos.dtsi +++ b/arch/arm64/boot/dts/qcom/milos.dtsi @@ -1275,7 +1275,11 @@ "qcom,inline-crypto-engine"; reg = <0x0 0x01d88000 0x0 0x18000>; - clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>; + clocks = <&gcc GCC_UFS_PHY_ICE_CORE_CLK>, + <&gcc GCC_UFS_PHY_AHB_CLK>; + clock-names = "core", + "iface"; + power-domains = <&gcc UFS_PHY_GDSC>; }; tcsr_mutex: hwlock@1f40000 { diff --git a/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi b/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi index 0d9a324cc6cc..db291730130c 100644 --- a/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi +++ b/arch/arm64/boot/dts/qcom/x1-dell-thena.dtsi @@ -982,12 +982,6 @@ status = "okay"; }; -&i2c20 { - clock-frequency = <400000>; - - status = "okay"; -}; - &lpass_tlmm { spkr_01_sd_n_active: spkr-01-sd-n-active-state { pins = "gpio12"; @@ -1308,6 +1302,7 @@ &tlmm { gpio-reserved-ranges = <44 4>, /* SPI11 (TPM) */ <76 4>, /* SPI19 (TZ Protected) */ + <80 2>, /* I2C20 (Battery SMBus) */ <238 1>; /* UFS Reset */ cam_rgb_default: cam-rgb-default-state { diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index d905a0777f93..96ce783f24e7 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -260,6 +260,7 @@ CONFIG_PCI_ENDPOINT=y CONFIG_PCI_ENDPOINT_CONFIGFS=y CONFIG_PCI_EPF_TEST=m CONFIG_PCI_PWRCTRL_GENERIC=m +CONFIG_POWER_SEQUENCING_PCIE_M2=m CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_FW_LOADER_USER_HELPER=y diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index dc2957662ff2..cdf3e8422ea1 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -132,7 +132,7 @@ static inline bool kvm_s2_trans_exec_el0(struct kvm *kvm, struct kvm_s2_trans *t u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc); if (!kvm_has_xnx(kvm)) - xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10); + xn &= 0b10; switch (xn) { case 0b00: @@ -148,7 +148,7 @@ static inline bool kvm_s2_trans_exec_el1(struct kvm *kvm, struct kvm_s2_trans *t u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc); if (!kvm_has_xnx(kvm)) - xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10); + xn &= 0b10; switch (xn) { case 0b00: diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 9f8f0ae8e86e..889c2c15d7bd 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1569,7 +1569,8 @@ int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) /* Do the stage-2 translation */ ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0)); out.esr = 0; - ret = kvm_walk_nested_s2(vcpu, ipa, &out); + scoped_guard(srcu, &vcpu->kvm->srcu) + ret = kvm_walk_nested_s2(vcpu, ipa, &out); if (ret < 0) return ret; @@ -1665,7 +1666,8 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) } /* Walk the guest's PT, looking for a match along the way */ - ret = walk_s1(vcpu, &wi, &wr, va); + scoped_guard(srcu, &vcpu->kvm->srcu) + ret = walk_s1(vcpu, &wi, &wr, va); switch (ret) { case -EINTR: /* We interrupted the walk on a match, return the level */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 320cd45d49c5..e9b36a3b27bb 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -181,6 +181,8 @@ static inline void __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu) val |= CPACR_EL1_ZEN; if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN; + if (cpus_have_final_cap(ARM64_HAS_S1POE)) + val |= CPACR_EL1_E0POE; write_sysreg(val, cpacr_el1); } diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 0c1defa5fb0f..91a7dfad6686 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -925,7 +925,9 @@ static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) static bool stage2_pte_executable(kvm_pte_t pte) { - return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); + enum kvm_pgtable_prot prot = kvm_pgtable_stage2_pte_prot(pte); + + return prot & (KVM_PGTABLE_PROT_UX | KVM_PGTABLE_PROT_PX); } static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx, diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 38f672e94087..6f7bc9a9992e 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -89,21 +89,28 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) * again, and there is no reason to affect the whole VM for this. */ num_mmus = atomic_read(&kvm->online_vcpus) * S2_MMU_PER_VCPU; - tmp = kvrealloc(kvm->arch.nested_mmus, - size_mul(sizeof(*kvm->arch.nested_mmus), num_mmus), - GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!tmp) - return -ENOMEM; - swap(kvm->arch.nested_mmus, tmp); + if (num_mmus > kvm->arch.nested_mmus_size) { + tmp = kvcalloc(num_mmus, sizeof(*tmp), GFP_KERNEL_ACCOUNT); + if (!tmp) + return -ENOMEM; - /* - * If we went through a realocation, adjust the MMU back-pointers in - * the previously initialised kvm_pgtable structures. - */ - if (kvm->arch.nested_mmus != tmp) - for (int i = 0; i < kvm->arch.nested_mmus_size; i++) - kvm->arch.nested_mmus[i].pgt->mmu = &kvm->arch.nested_mmus[i]; + write_lock(&kvm->mmu_lock); + + if (kvm->arch.nested_mmus_size) { + memcpy(tmp, kvm->arch.nested_mmus, + size_mul(sizeof(*tmp), kvm->arch.nested_mmus_size)); + + for (int i = 0; i < kvm->arch.nested_mmus_size; i++) + tmp[i].pgt->mmu = &tmp[i]; + } + + swap(kvm->arch.nested_mmus, tmp); + + write_unlock(&kvm->mmu_lock); + + kvfree(tmp); + } for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++) ret = init_nested_s2_mmu(kvm, &kvm->arch.nested_mmus[i]); diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 1d7e5d560af4..1e3706ac3b8e 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -597,8 +597,10 @@ static void vgic_its_invalidate_cache(struct vgic_its *its) unsigned long idx; xa_for_each(&its->translation_cache, idx, irq) { - xa_erase(&its->translation_cache, idx); - vgic_put_irq(kvm, irq); + /* Only the context that erases the entry drops its cache ref. */ + irq = xa_erase(&its->translation_cache, idx); + if (irq) + vgic_put_irq(kvm, irq); } } diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index c28f9a7d0bd8..730c90b4a876 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -56,6 +56,10 @@ CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m +CONFIG_VFIO_DEVICE_CDEV=y +CONFIG_IOMMUFD_DRIVER=y +CONFIG_IOMMUFD_DRIVER_CORE=y +CONFIG_IOMMUFD=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_S390_HYPFS_FS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index d89c988f33ea..dd5fc1426c88 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -54,6 +54,10 @@ CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m +CONFIG_VFIO_DEVICE_CDEV=y +CONFIG_IOMMUFD_DRIVER=y +CONFIG_IOMMUFD_DRIVER_CORE=y +CONFIG_IOMMUFD=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_S390_HYPFS_FS=y diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index 59017fd3d935..50a270edb020 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -12,12 +12,11 @@ #if defined(CONFIG_BUG) && defined(CONFIG_CC_HAS_ASM_IMMEDIATE_STRINGS) #ifdef CONFIG_DEBUG_BUGVERBOSE -#define __BUG_ENTRY_VERBOSE(format, file, line) \ - " .long " format " - . # bug_entry::format\n" \ +#define __BUG_ENTRY_VERBOSE(file, line) \ " .long " file " - . # bug_entry::file\n" \ " .short " line " # bug_entry::line\n" #else -#define __BUG_ENTRY_VERBOSE(format, file, line) +#define __BUG_ENTRY_VERBOSE(file, line) #endif #ifdef CONFIG_DEBUG_BUGVERBOSE_DETAILED @@ -28,9 +27,10 @@ #define __BUG_ENTRY(format, file, line, flags, size) \ " .section __bug_table,\"aw\"\n" \ - "1: .long 0b - . # bug_entry::bug_addr\n" \ - __BUG_ENTRY_VERBOSE(format, file, line) \ - " .short "flags" # bug_entry::flags\n" \ + "1: .long 0b - . # bug_entry::bug_addr\n"\ + " .long " format " - . # bug_entry::format\n" \ + __BUG_ENTRY_VERBOSE(file, line) \ + " .short "flags" # bug_entry::flags\n" \ " .org 1b+"size"\n" \ " .previous" diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h index 2d3ae421077e..d2b616604a46 100644 --- a/arch/s390/include/asm/gmap_helpers.h +++ b/arch/s390/include/asm/gmap_helpers.h @@ -12,5 +12,6 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr); void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end); int gmap_helper_disable_cow_sharing(void); void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr); +pte_t *try_get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl); #endif /* _ASM_S390_GMAP_HELPERS_H */ diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index df3fb7d8227b..1b3ac553a642 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -7,4 +7,6 @@ #define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x07 #define __ALIGN_STR __stringify(__ALIGN) +#define _THIS_IP_ ({ unsigned long __ip; asm volatile("larl %0, ." : "=d" (__ip)); __ip; }) + #endif diff --git a/arch/s390/kvm/faultin.c b/arch/s390/kvm/faultin.c index ddf0ca71f374..fee80047bd94 100644 --- a/arch/s390/kvm/faultin.c +++ b/arch/s390/kvm/faultin.c @@ -36,7 +36,8 @@ int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fa struct kvm_s390_mmu_cache *mc = NULL; struct kvm_memory_slot *slot; unsigned long inv_seq; - int foll, rc = 0; + int rc = -EAGAIN; + int foll; foll = f->write_attempt ? FOLL_WRITE : 0; foll |= f->attempt_pfault ? FOLL_NOWAIT : 0; @@ -53,7 +54,14 @@ int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fa return 0; } - while (1) { + if (!mc) { + local_mc = kvm_s390_new_mmu_cache(); + if (!local_mc) + return -ENOMEM; + mc = local_mc; + } + + while (rc == -EAGAIN) { f->valid = false; inv_seq = kvm->mmu_invalidate_seq; /* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */ @@ -93,14 +101,7 @@ int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fa if (is_error_pfn(f->pfn)) return -EFAULT; - if (!mc) { - local_mc = kvm_s390_new_mmu_cache(); - if (!local_mc) - return -ENOMEM; - mc = local_mc; - } - - /* Loop, will automatically release the faulted page. */ + /* Loop, release the faulted page. */ if (mmu_invalidate_retry_gfn_unsafe(kvm, inv_seq, f->gfn)) { kvm_release_faultin_page(kvm, f->page, true, false); continue; @@ -110,20 +111,19 @@ int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fa if (!mmu_invalidate_retry_gfn(kvm, inv_seq, f->gfn)) { f->valid = true; rc = gmap_link(mc, kvm->arch.gmap, f, slot); - kvm_release_faultin_page(kvm, f->page, !!rc, f->write_attempt); - f->page = NULL; } + kvm_release_faultin_page(kvm, f->page, !!rc, f->write_attempt); } - kvm_release_faultin_page(kvm, f->page, true, false); if (rc == -ENOMEM) { rc = kvm_s390_mmu_cache_topup(mc); if (rc) return rc; - } else if (rc != -EAGAIN) { - return rc; + rc = -EAGAIN; } } + + return rc; } int kvm_s390_get_guest_page(struct kvm *kvm, struct guest_fault *f, gfn_t gfn, bool w) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 4f8d5592c9a9..20e28b183c1a 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1466,15 +1466,17 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni struct guest_fault *f, bool p) { union crste newcrste, oldcrste; - gfn_t gfn; + unsigned long mask; + gfn_t r_gfn; int rc; lockdep_assert_held(&sg->kvm->mmu_lock); lockdep_assert_held(&sg->parent->children_lock); - gfn = f->gfn & (is_pmd(*table) ? _SEGMENT_FR_MASK : _REGION3_FR_MASK); + mask = is_pmd(*table) ? _SEGMENT_FR_MASK : _REGION3_FR_MASK; + r_gfn = gpa_to_gfn(raddr) & mask; scoped_guard(spinlock, &sg->host_to_rmap_lock) - rc = gmap_insert_rmap(sg, gfn, gpa_to_gfn(raddr), host->h.tt); + rc = gmap_insert_rmap(sg, f->gfn & mask, r_gfn, host->h.tt); if (rc) return rc; @@ -1497,8 +1499,7 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni return -EAGAIN; newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p); - gfn = gpa_to_gfn(raddr); - while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce)) + while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, r_gfn, sg->asce)) ; return 0; } diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index 957126ab991c..52d55ddea8d4 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -395,15 +395,28 @@ static long _gmap_unmap_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct struct gmap_unmap_priv *priv = walk->priv; struct folio *folio = NULL; union crste old = *crstep; + bool ok; if (!old.h.fc) return 0; if (old.s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags)) folio = phys_to_folio(crste_origin_large(old)); - /* No races should happen because kvm->mmu_lock is held in write mode */ - KVM_BUG_ON(!gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn), - priv->gmap->kvm); + /* + * No races should happen because kvm->mmu_lock is held in write mode, + * but the unmap operation could have triggered an unshadow, which + * causes gmap_crstep_xchg_atomic() to return false and clear the + * vsie_notif bit. Allow the operation to fail once, if the old crste + * had the vsie_notif bit set. A second failure is not allowed, for + * the reasons above. + */ + ok = gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn); + if (!ok) { + KVM_BUG_ON(!old.s.fc1.vsie_notif, priv->gmap->kvm); + old.s.fc1.vsie_notif = 0; + ok = gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn); + KVM_BUG_ON(!ok, priv->gmap->kvm); + } if (folio) uv_convert_from_secure_folio(folio); diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h index 742e42a31744..5374f21aaf8d 100644 --- a/arch/s390/kvm/gmap.h +++ b/arch/s390/kvm/gmap.h @@ -273,11 +273,14 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio gmap_unmap_prefix(gmap, gfn, gfn + align); } if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) { + newcrste = oldcrste; newcrste.s.fc1.vsie_notif = 0; if (needs_lock) gmap_handle_vsie_unshadow_event(gmap, gfn); else _gmap_handle_vsie_unshadow_event(gmap, gfn); + dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce); + return false; } if (!oldcrste.s.fc1.d && newcrste.s.fc1.d && !newcrste.s.fc1.s) SetPageDirty(phys_to_page(crste_origin_large(newcrste))); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e09960c2e6ed..ffb20a64d328 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -999,7 +999,10 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att break; } case KVM_S390_VM_MEM_LIMIT_SIZE: { + struct kvm_memslots *slots; + struct kvm_memory_slot *ms; unsigned long new_limit; + int bkt; if (kvm_is_ucontrol(kvm)) return -EINVAL; @@ -1007,6 +1010,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (get_user(new_limit, (u64 __user *)attr->addr)) return -EFAULT; + guard(mutex)(&kvm->lock); + + new_limit = ALIGN(new_limit, HPAGE_SIZE); if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && new_limit > kvm->arch.mem_limit) return -E2BIG; @@ -1014,12 +1020,27 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (!new_limit) return -EINVAL; - ret = -EBUSY; - if (!kvm->created_vcpus) - ret = gmap_set_limit(kvm->arch.gmap, gpa_to_gfn(new_limit)); + if (kvm->created_vcpus) + return -EBUSY; + + ret = 0; + scoped_guard(mutex, &kvm->slots_lock) { + slots = kvm_memslots(kvm); + if (slots && !kvm_memslots_empty(slots)) { + kvm_for_each_memslot(ms, bkt, slots) { + if (gpa_to_gfn(new_limit) < ms->base_gfn + ms->npages) { + ret = -EBUSY; + break; + } + } + } + if (!ret) + ret = gmap_set_limit(kvm->arch.gmap, gpa_to_gfn(new_limit)); + } + if (ret) + break; VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); - VM_EVENT(kvm, 3, "New guest asce: 0x%p", - (void *)kvm->arch.gmap->asce.val); + VM_EVENT(kvm, 3, "New guest asce: 0x%p", (void *)kvm->arch.gmap->asce.val); break; } default: @@ -5672,6 +5693,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, return -EINVAL; if ((new->base_gfn + new->npages) * PAGE_SIZE > kvm->arch.mem_limit) return -EINVAL; + if (!asce_contains_gfn(kvm->arch.gmap->asce, new->base_gfn + new->npages - 1)) + return -EINVAL; } if (!kvm->arch.migration_mode) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index cc0553da14cb..447ec7ed423d 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1188,6 +1188,7 @@ static void _essa_clear_cbrl(struct kvm_vcpu *vcpu, unsigned long *cbrl, int len union crste *crstep; union pgste pgste; union pte *ptep; + hva_t hva; int i; lockdep_assert_held(&vcpu->kvm->mmu_lock); @@ -1199,8 +1200,11 @@ static void _essa_clear_cbrl(struct kvm_vcpu *vcpu, unsigned long *cbrl, int len if (!ptep || ptep->s.pr) continue; pgste = pgste_get_lock(ptep); - if (pgste.usage == PGSTE_GPS_USAGE_UNUSED || pgste.zero) - gmap_helper_zap_one_page(vcpu->kvm->mm, cbrl[i]); + if (pgste.usage == PGSTE_GPS_USAGE_UNUSED || pgste.zero) { + hva = gpa_to_hva(vcpu->kvm, cbrl[i]); + if (!kvm_is_error_hva(hva)) + gmap_helper_zap_one_page(vcpu->kvm->mm, hva); + } pgste_set_unlock(ptep, pgste); } } diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index c2dafd812a3b..4b865e75351c 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -17,6 +17,7 @@ #include <linux/pagewalk.h> #include <linux/sched/mm.h> #include <linux/mmu_notifier.h> +#include <asm/gmap_helpers.h> #include "kvm-s390.h" #include "dat.h" #include "gaccess.h" @@ -73,6 +74,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str struct pv_make_secure { void *uvcb; struct folio *folio; + struct kvm *kvm; int rc; bool needs_export; }; @@ -103,9 +105,21 @@ static void _kvm_s390_pv_make_secure(struct guest_fault *f) { struct pv_make_secure *priv = f->priv; struct folio *folio; + spinlock_t *ptl; /* pte lock from try_get_locked_pte() */ + pte_t *ptep; folio = pfn_folio(f->pfn); priv->rc = -EAGAIN; + + if (!mmap_read_trylock(priv->kvm->mm)) + return; + + ptep = try_get_locked_pte(priv->kvm->mm, gfn_to_hva(priv->kvm, f->gfn), &ptl); + if (IS_ERR_VALUE(ptep)) { + priv->rc = PTR_ERR(ptep); + goto out; + } + if (folio_trylock(folio)) { priv->rc = __kvm_s390_pv_make_secure(f, folio); if (priv->rc == -E2BIG || priv->rc == -EBUSY) { @@ -114,6 +128,11 @@ static void _kvm_s390_pv_make_secure(struct guest_fault *f) } folio_unlock(folio); } + + if (ptep) + pte_unmap_unlock(ptep, ptl); +out: + mmap_read_unlock(priv->kvm->mm); } /** @@ -127,7 +146,7 @@ static void _kvm_s390_pv_make_secure(struct guest_fault *f) */ int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb) { - struct pv_make_secure priv = { .uvcb = uvcb }; + struct pv_make_secure priv = { .uvcb = uvcb, .kvm = kvm, }; struct guest_fault f = { .write_attempt = true, .gfn = gpa_to_gfn(gaddr), diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c index f8789ffcc05c..1cfe4724fbe2 100644 --- a/arch/s390/mm/gmap_helpers.c +++ b/arch/s390/mm/gmap_helpers.c @@ -17,22 +17,68 @@ #include <asm/gmap_helpers.h> /** - * ptep_zap_softleaf_entry() - discard a software leaf entry. + * try_get_locked_pte() - like get_locked_pte(), but atomic and with trylock * @mm: the mm - * @entry: the software leaf entry that needs to be zapped + * @vmaddr: the userspace virtual address whose pte is to be found + * @ptl: will be set to the pointer to the lock used to lock the pte in case + * of success. * - * Discards the given software leaf entry. If the leaf entry was an actual - * swap entry (and not a migration entry, for example), the actual swapped - * page is also discarded from swap. + * This function returns the pointer to the pte corresponding to @addr in @mm, + * similarly to get_locked_pte(). Unlike get_locked_pte(), no attempt is made + * to allocate missing page tables. If a missing or large entry is found, the + * function will return NULL. If the ptl lock is contended, %-EAGAIN is + * returned. + * + * In case of success, *@ptl will point to the locked pte lock for the returned + * pte, like get_locked_pte() does. + * + * Context: mmap_lock or vma lock for read or for write needs to be held. + * Return: + * * %NULL if the pte cannot be reached. + * * %-EAGAIN if the pte can be reached, but cannot be locked. + * * the pointer to the pte corresponding to @addr in @mm, if it can be reached + * and locked. */ -static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry) +pte_t *try_get_locked_pte(struct mm_struct *mm, unsigned long vmaddr, spinlock_t **ptl) { - if (softleaf_is_swap(entry)) - dec_mm_counter(mm, MM_SWAPENTS); - else if (softleaf_is_migration(entry)) - dec_mm_counter(mm, mm_counter(softleaf_to_folio(entry))); - swap_put_entries_direct(entry, 1); + pmd_t *pmdp, pmd, pmdval; + pud_t *pudp, pud; + p4d_t *p4dp, p4d; + pgd_t *pgdp, pgd; + pte_t *ptep; + + pgdp = pgd_offset(mm, vmaddr); + pgd = pgdp_get(pgdp); + if (pgd_none(pgd) || !pgd_present(pgd)) + return NULL; + p4dp = p4d_offset(pgdp, vmaddr); + p4d = p4dp_get(p4dp); + if (p4d_none(p4d) || !p4d_present(p4d)) + return NULL; + pudp = pud_offset(p4dp, vmaddr); + pud = pudp_get(pudp); + if (pud_none(pud) || pud_leaf(pud) || !pud_present(pud)) + return NULL; + pmdp = pmd_offset(pudp, vmaddr); + pmd = pmdp_get_lockless(pmdp); + if (pmd_none(pmd) || pmd_leaf(pmd) || !pmd_present(pmd)) + return NULL; + ptep = pte_offset_map_rw_nolock(mm, pmdp, vmaddr, &pmdval, ptl); + if (!ptep) + return NULL; + + if (spin_trylock(*ptl)) { + if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmdp)))) { + pte_unmap_unlock(ptep, *ptl); + return ERR_PTR(-EAGAIN); + } + return ptep; + } + + pte_unmap(ptep); + return ERR_PTR(-EAGAIN); } +EXPORT_SYMBOL_GPL(try_get_locked_pte); /** * gmap_helper_zap_one_page() - discard a page if it was swapped. @@ -46,7 +92,8 @@ static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry) void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) { struct vm_area_struct *vma; - spinlock_t *ptl; + spinlock_t *ptl; /* Lock for the host (userspace) page table */ + softleaf_t sl; pte_t *ptep; mmap_assert_locked(mm); @@ -57,11 +104,13 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) return; /* Get pointer to the page table entry */ - ptep = get_locked_pte(mm, vmaddr, &ptl); - if (unlikely(!ptep)) + ptep = try_get_locked_pte(mm, vmaddr, &ptl); + if (IS_ERR_OR_NULL(ptep)) return; - if (pte_swap(*ptep)) { - ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep)); + sl = softleaf_from_pte(*ptep); + if (pte_swap(*ptep) && softleaf_is_swap(sl)) { + dec_mm_counter(mm, MM_SWAPENTS); + swap_put_entries_direct(sl, 1); pte_clear(mm, vmaddr, ptep); } pte_unmap_unlock(ptep, ptl); @@ -113,37 +162,9 @@ EXPORT_SYMBOL_GPL(gmap_helper_discard); */ void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr) { - pmd_t *pmdp, pmd, pmdval; - pud_t *pudp, pud; - p4d_t *p4dp, p4d; - pgd_t *pgdp, pgd; spinlock_t *ptl; /* Lock for the host (userspace) page table */ pte_t *ptep; - pgdp = pgd_offset(mm, vmaddr); - pgd = pgdp_get(pgdp); - if (pgd_none(pgd) || !pgd_present(pgd)) - return; - - p4dp = p4d_offset(pgdp, vmaddr); - p4d = p4dp_get(p4dp); - if (p4d_none(p4d) || !p4d_present(p4d)) - return; - - pudp = pud_offset(p4dp, vmaddr); - pud = pudp_get(pudp); - if (pud_none(pud) || pud_leaf(pud) || !pud_present(pud)) - return; - - pmdp = pmd_offset(pudp, vmaddr); - pmd = pmdp_get_lockless(pmdp); - if (pmd_none(pmd) || pmd_leaf(pmd) || !pmd_present(pmd)) - return; - - ptep = pte_offset_map_rw_nolock(mm, pmdp, vmaddr, &pmdval, &ptl); - if (!ptep) - return; - /* * Several paths exists that takes the ptl lock and then call the * mmu_notifier, which takes the mmu_lock. The unmap path, instead, @@ -156,21 +177,12 @@ void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr) * If the lock is contended the bit is not set and the deadlock is * avoided. */ - if (spin_trylock(ptl)) { - /* - * Make sure the pte we are touching is still the correct - * one. In theory this check should not be needed, but - * better safe than sorry. - * Disabling interrupts or holding the mmap lock is enough to - * guarantee that no concurrent updates to the page tables - * are possible. - */ - if (likely(pmd_same(pmdval, pmdp_get_lockless(pmdp)))) - __atomic64_or(_PAGE_UNUSED, (long *)ptep); - spin_unlock(ptl); - } + ptep = try_get_locked_pte(mm, vmaddr, &ptl); + if (IS_ERR_OR_NULL(ptep)) + return; - pte_unmap(ptep); + __atomic64_or(_PAGE_UNUSED, (long *)ptep); + pte_unmap_unlock(ptep, ptl); } EXPORT_SYMBOL_GPL(gmap_helper_try_set_pte_unused); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 62b5befe0eed..6c6a6d663e29 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3313,37 +3313,6 @@ void sev_guest_memory_reclaimed(struct kvm *kvm) sev_writeback_caches(kvm); } -void sev_free_vcpu(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm; - - if (!is_sev_es_guest(vcpu)) - return; - - svm = to_svm(vcpu); - - /* - * If it's an SNP guest, then the VMSA was marked in the RMP table as - * a guest-owned page. Transition the page to hypervisor state before - * releasing it back to the system. - */ - if (is_sev_snp_guest(vcpu)) { - u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT; - - if (kvm_rmp_make_shared(vcpu->kvm, pfn, PG_LEVEL_4K)) - goto skip_vmsa_free; - } - - if (vcpu->arch.guest_state_protected) - sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa); - - __free_page(virt_to_page(svm->sev_es.vmsa)); - -skip_vmsa_free: - if (svm->sev_es.ghcb_sa_free) - kvfree(svm->sev_es.ghcb_sa); -} - static void dump_ghcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -3583,6 +3552,20 @@ vmgexit_err: return 1; } +static void __sev_es_unmap_ghcb(struct vcpu_svm *svm) +{ + if (svm->sev_es.ghcb_sa_free) { + kvfree(svm->sev_es.ghcb_sa); + svm->sev_es.ghcb_sa = NULL; + svm->sev_es.ghcb_sa_free = false; + } + + if (svm->sev_es.ghcb) { + kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map); + svm->sev_es.ghcb = NULL; + } +} + void sev_es_unmap_ghcb(struct vcpu_svm *svm) { /* Clear any indication that the vCPU is in a type of AP Reset Hold */ @@ -3591,31 +3574,51 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) if (!svm->sev_es.ghcb) return; - if (svm->sev_es.ghcb_sa_free) { - /* - * The scratch area lives outside the GHCB, so there is a - * buffer that, depending on the operation performed, may - * need to be synced, then freed. - */ - if (svm->sev_es.ghcb_sa_sync) { - kvm_write_guest(svm->vcpu.kvm, - svm->sev_es.sw_scratch, - svm->sev_es.ghcb_sa, - svm->sev_es.ghcb_sa_len); - svm->sev_es.ghcb_sa_sync = false; - } - - kvfree(svm->sev_es.ghcb_sa); - svm->sev_es.ghcb_sa = NULL; - svm->sev_es.ghcb_sa_free = false; + /* + * If the scratch area lives outside the GHCB, there's a buffer that, + * depending on the operation performed, may need to be synced. + */ + if (svm->sev_es.ghcb_sa_sync) { + kvm_write_guest(svm->vcpu.kvm, svm->sev_es.sw_scratch, + svm->sev_es.ghcb_sa, svm->sev_es.ghcb_sa_len); + svm->sev_es.ghcb_sa_sync = false; } trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb); sev_es_sync_to_ghcb(svm); - kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map); - svm->sev_es.ghcb = NULL; + __sev_es_unmap_ghcb(svm); +} + +void sev_free_vcpu(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm; + + if (!is_sev_es_guest(vcpu)) + return; + + svm = to_svm(vcpu); + + /* + * If it's an SNP guest, then the VMSA was marked in the RMP table as + * a guest-owned page. Transition the page to hypervisor state before + * releasing it back to the system. + */ + if (is_sev_snp_guest(vcpu)) { + u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT; + + if (kvm_rmp_make_shared(vcpu->kvm, pfn, PG_LEVEL_4K)) + goto skip_vmsa_free; + } + + if (vcpu->arch.guest_state_protected) + sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa); + + __free_page(virt_to_page(svm->sev_es.vmsa)); + +skip_vmsa_free: + __sev_es_unmap_ghcb(svm); } int pre_sev_run(struct vcpu_svm *svm, int cpu) @@ -3685,6 +3688,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 min_len) goto e_scratch; } + WARN_ON_ONCE(svm->sev_es.ghcb_sa_sync || svm->sev_es.ghcb_sa_free); + if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { /* Scratch area begins within GHCB */ ghcb_scratch_beg = control->ghcb_gpa + @@ -3706,6 +3711,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 min_len) scratch_va = (void *)svm->sev_es.ghcb; scratch_va += (scratch_gpa_beg - control->ghcb_gpa); + svm->sev_es.ghcb_sa_sync = false; + svm->sev_es.ghcb_sa_free = false; svm->sev_es.ghcb_sa_len = ghcb_scratch_end - scratch_gpa_beg; } else { /* GHCB v2 requires the scratch area to be within the GHCB. */ @@ -3841,13 +3848,11 @@ struct psc_buffer { struct psc_entry entries[]; } __packed; -static int snp_begin_psc(struct vcpu_svm *svm); +static int snp_do_psc(struct vcpu_svm *svm); static void snp_complete_psc(struct vcpu_svm *svm, u64 psc_ret) { - svm->sev_es.psc_inflight = 0; - svm->sev_es.psc_idx = 0; - svm->sev_es.psc_2m = false; + memset(&svm->sev_es.psc, 0, sizeof(svm->sev_es.psc)); /* * PSC requests always get a "no action" response in SW_EXITINFO1, with @@ -3860,9 +3865,8 @@ static void snp_complete_psc(struct vcpu_svm *svm, u64 psc_ret) static void __snp_complete_one_psc(struct vcpu_svm *svm) { - struct psc_buffer *psc = svm->sev_es.ghcb_sa; - struct psc_entry *entries = psc->entries; - struct psc_hdr *hdr = &psc->hdr; + struct vcpu_sev_es_state *sev_es = &svm->sev_es; + struct psc_buffer *guest_psc = sev_es->ghcb_sa; __u16 idx; /* @@ -3870,14 +3874,15 @@ static void __snp_complete_one_psc(struct vcpu_svm *svm) * corresponding entries in the guest's PSC buffer and zero out the * count of in-flight PSC entries. */ - for (idx = svm->sev_es.psc_idx; svm->sev_es.psc_inflight; - svm->sev_es.psc_inflight--, idx++) { - struct psc_entry entry = READ_ONCE(entries[idx]); + for (idx = sev_es->psc.cur_idx; sev_es->psc.batch_size; + sev_es->psc.batch_size--, idx++) { + struct psc_entry entry = READ_ONCE(guest_psc->entries[idx]); - entries[idx].cur_page = entry.pagesize ? 512 : 1; + guest_psc->entries[idx].cur_page = entry.pagesize ? 512 : 1; } - hdr->cur_entry = idx; + sev_es->psc.cur_idx = idx; + guest_psc->hdr.cur_entry = idx; } static int snp_complete_one_psc(struct kvm_vcpu *vcpu) @@ -3892,63 +3897,30 @@ static int snp_complete_one_psc(struct kvm_vcpu *vcpu) __snp_complete_one_psc(svm); /* Handle the next range (if any). */ - return snp_begin_psc(svm); + return snp_do_psc(svm); } -static int snp_begin_psc(struct vcpu_svm *svm) +static int snp_do_psc(struct vcpu_svm *svm) { struct vcpu_sev_es_state *sev_es = &svm->sev_es; - struct psc_buffer *psc = sev_es->ghcb_sa; - struct psc_entry *entries = psc->entries; + struct psc_buffer *guest_psc = sev_es->ghcb_sa; struct kvm_vcpu *vcpu = &svm->vcpu; - struct psc_hdr *hdr = &psc->hdr; struct psc_entry entry_start; - u16 idx, idx_start, idx_end, max_nr_entries; int npages; bool huge; u64 gfn; - - if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) { - snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); - return 1; - } - - /* - * GHCB v2 requires the scratch area to reside within the GHCB itself, - * and PSC requests are only supported for GHCB v2+. Thus it should be - * impossible to exceed the max PSC entry count (which is derived from - * the size of the shared GHCB buffer). - */ - max_nr_entries = (sev_es->ghcb_sa_len - sizeof(struct psc_hdr)) / - sizeof(struct psc_entry); - if (WARN_ON_ONCE(max_nr_entries > VMGEXIT_PSC_MAX_COUNT)) { - snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); - return 1; - } + u16 idx; next_range: /* There should be no other PSCs in-flight at this point. */ - if (WARN_ON_ONCE(svm->sev_es.psc_inflight)) { + if (WARN_ON_ONCE(svm->sev_es.psc.batch_size)) { snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); return 1; } - /* - * The PSC descriptor buffer can be modified by a misbehaved guest after - * validation, so take care to only use validated copies of values used - * for things like array indexing. - */ - idx_start = READ_ONCE(hdr->cur_entry); - idx_end = READ_ONCE(hdr->end_entry); - - if (idx_end >= max_nr_entries) { - snp_complete_psc(svm, VMGEXIT_PSC_ERROR_INVALID_HDR); - return 1; - } - /* Find the start of the next range which needs processing. */ - for (idx = idx_start; idx <= idx_end; idx++, hdr->cur_entry++) { - entry_start = READ_ONCE(entries[idx]); + for (idx = sev_es->psc.cur_idx; idx <= sev_es->psc.end_idx; idx++) { + entry_start = READ_ONCE(guest_psc->entries[idx]); gfn = entry_start.gfn; huge = entry_start.pagesize; @@ -3974,32 +3946,40 @@ next_range: if (npages) break; + + /* + * Increment the guest-visible index to communicate the current + * entry back to the guest, e.g. in case of failure. No need + * for READ_ONCE() as KVM doesn't consume the field, i.e. a + * misbehaving guest can only break itself. + */ + guest_psc->hdr.cur_entry++; } - if (idx > idx_end) { + if (idx > sev_es->psc.end_idx) { /* Nothing more to process. */ snp_complete_psc(svm, 0); return 1; } - svm->sev_es.psc_2m = huge; - svm->sev_es.psc_idx = idx; - svm->sev_es.psc_inflight = 1; + sev_es->psc.is_2m = huge; + sev_es->psc.cur_idx = idx; + sev_es->psc.batch_size = 1; /* * Find all subsequent PSC entries that contain adjacent GPA * ranges/operations and can be combined into a single * KVM_HC_MAP_GPA_RANGE exit. */ - while (++idx <= idx_end) { - struct psc_entry entry = READ_ONCE(entries[idx]); + while (++idx <= sev_es->psc.end_idx) { + struct psc_entry entry = READ_ONCE(guest_psc->entries[idx]); if (entry.operation != entry_start.operation || entry.gfn != entry_start.gfn + npages || entry.cur_page || !!entry.pagesize != huge) break; - svm->sev_es.psc_inflight++; + sev_es->psc.batch_size++; npages += huge ? 512 : 1; } @@ -4041,6 +4021,46 @@ next_range: BUG(); } +static int snp_begin_psc(struct vcpu_svm *svm) +{ + struct vcpu_sev_es_state *sev_es = &svm->sev_es; + struct psc_buffer *guest_psc = sev_es->ghcb_sa; + u16 max_nr_entries; + + if (!user_exit_on_hypercall(svm->vcpu.kvm, KVM_HC_MAP_GPA_RANGE)) { + snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); + return 1; + } + + /* + * GHCB v2 requires the scratch area to reside within the GHCB itself, + * and PSC requests are only supported for GHCB v2+. Thus it should be + * impossible to exceed the max PSC entry count (which is derived from + * the size of the shared GHCB buffer). + */ + max_nr_entries = (sev_es->ghcb_sa_len - sizeof(struct psc_hdr)) / + sizeof(struct psc_entry); + if (WARN_ON_ONCE(max_nr_entries > VMGEXIT_PSC_MAX_COUNT)) { + snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); + return 1; + } + + /* + * The PSC descriptor buffer can be modified by a misbehaved guest after + * validation, so take care to only use validated copies of values used + * for things like array indexing. + */ + sev_es->psc.cur_idx = READ_ONCE(guest_psc->hdr.cur_entry); + sev_es->psc.end_idx = READ_ONCE(guest_psc->hdr.end_entry); + + if (sev_es->psc.end_idx >= max_nr_entries) { + snp_complete_psc(svm, VMGEXIT_PSC_ERROR_INVALID_HDR); + return 1; + } + + return snp_do_psc(svm); +} + /* * Invoked as part of svm_vcpu_reset() processing of an init event. */ diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index a10668d17a16..5137416be593 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -257,9 +257,12 @@ struct vcpu_sev_es_state { bool ghcb_sa_free; /* SNP Page-State-Change buffer entries currently being processed */ - u16 psc_idx; - u16 psc_inflight; - bool psc_2m; + struct { + u16 cur_idx; + u16 end_idx; + u16 batch_size; + bool is_2m; + } psc; u64 ghcb_registered_gpa; diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index bedc6133f970..1ea7c039160c 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -327,7 +327,7 @@ config PANEL_CHANGE_MESSAGE say 'N' and keep the default message with the version. config PANEL_BOOT_MESSAGE - depends on PANEL_CHANGE_MESSAGE="y" + depends on PANEL_CHANGE_MESSAGE string "New initialization message" default "" help diff --git a/drivers/auxdisplay/line-display.c b/drivers/auxdisplay/line-display.c index fb6d9294140d..915eb5cd96b2 100644 --- a/drivers/auxdisplay/line-display.c +++ b/drivers/auxdisplay/line-display.c @@ -173,7 +173,7 @@ static int linedisp_display(struct linedisp *linedisp, const char *msg, count = strlen(msg); /* if the string ends with a newline, trim it */ - if (msg[count - 1] == '\n') + if (count && msg[count - 1] == '\n') count--; if (!count) { diff --git a/drivers/auxdisplay/max6959.c b/drivers/auxdisplay/max6959.c index 6bbc8d48fb1b..3bdef099a225 100644 --- a/drivers/auxdisplay/max6959.c +++ b/drivers/auxdisplay/max6959.c @@ -86,10 +86,7 @@ static const struct linedisp_ops max6959_linedisp_ops = { static int max6959_enable(struct max6959_priv *priv, bool enable) { - u8 mask = REG_CONFIGURATION_S_BIT; - u8 value = enable ? mask : 0; - - return regmap_update_bits(priv->regmap, REG_CONFIGURATION, mask, value); + return regmap_assign_bits(priv->regmap, REG_CONFIGURATION, REG_CONFIGURATION_S_BIT, enable); } static void max6959_power_off(void *priv) diff --git a/drivers/firmware/samsung/exynos-acpm-dvfs.c b/drivers/firmware/samsung/exynos-acpm-dvfs.c index 06bdf62dea1f..fdea7aa24ca0 100644 --- a/drivers/firmware/samsung/exynos-acpm-dvfs.c +++ b/drivers/firmware/samsung/exynos-acpm-dvfs.c @@ -31,6 +31,9 @@ static void acpm_dvfs_set_xfer(struct acpm_xfer *xfer, u32 *cmd, size_t cmdlen, if (response) { xfer->rxcnt = cmdlen; xfer->rxd = cmd; + } else { + xfer->rxcnt = 0; + xfer->rxd = NULL; } } diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c index 16c46ed60837..19db3674a28f 100644 --- a/drivers/firmware/samsung/exynos-acpm.c +++ b/drivers/firmware/samsung/exynos-acpm.c @@ -7,11 +7,12 @@ #include <linux/bitfield.h> #include <linux/bitmap.h> -#include <linux/bits.h> +#include <linux/bitops.h> #include <linux/cleanup.h> #include <linux/container_of.h> #include <linux/delay.h> #include <linux/device.h> +#include <linux/find.h> #include <linux/firmware/samsung/exynos-acpm-protocol.h> #include <linux/io.h> #include <linux/iopoll.h> @@ -104,12 +105,15 @@ struct acpm_queue { * * @cmd: pointer to where the data shall be saved. * @n_cmd: number of 32-bit commands. - * @response: true if the client expects the RX data. + * @rxcnt: expected length of the response in 32-bit words. + * @completed: flag indicating if the firmware response has been fully + * processed. */ struct acpm_rx_data { u32 *cmd; size_t n_cmd; - bool response; + size_t rxcnt; + bool completed; }; #define ACPM_SEQNUM_MAX 64 @@ -199,31 +203,33 @@ static void acpm_get_saved_rx(struct acpm_chan *achan, const struct acpm_rx_data *rx_data = &achan->rx_data[tx_seqnum - 1]; u32 rx_seqnum; - if (!rx_data->response) + if (!rx_data->rxcnt) return; rx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, rx_data->cmd[0]); - if (rx_seqnum == tx_seqnum) { + if (rx_seqnum == tx_seqnum) memcpy(xfer->rxd, rx_data->cmd, xfer->rxcnt * sizeof(*xfer->rxd)); - clear_bit(rx_seqnum - 1, achan->bitmap_seqnum); - } } /** * acpm_get_rx() - get response from RX queue. * @achan: ACPM channel info. * @xfer: reference to the transfer to get response for. + * @native_match: pointer to a boolean set to true if the thread natively + * processed its own sequence number during this call. * * Return: 0 on success, -errno otherwise. */ -static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer) +static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer, + bool *native_match) { u32 rx_front, rx_seqnum, tx_seqnum, seqnum; const void __iomem *base, *addr; struct acpm_rx_data *rx_data; u32 i, val, mlen; - bool rx_set = false; + + *native_match = false; guard(mutex)(&achan->rx_lock); @@ -232,10 +238,8 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer) tx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, xfer->txd[0]); - if (i == rx_front) { - acpm_get_saved_rx(achan, xfer, tx_seqnum); + if (i == rx_front) return 0; - } base = achan->rx.base; mlen = achan->mlen; @@ -256,11 +260,16 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer) seqnum = rx_seqnum - 1; rx_data = &achan->rx_data[seqnum]; - if (rx_data->response) { + if (rx_data->rxcnt) { if (rx_seqnum == tx_seqnum) { __ioread32_copy(xfer->rxd, addr, xfer->rxcnt); - rx_set = true; - clear_bit(seqnum, achan->bitmap_seqnum); + /* + * Signal completion to the polling thread. + * Pairs with smp_load_acquire() in polling + * loop. + */ + smp_store_release(&rx_data->completed, true); + *native_match = true; } else { /* * The RX data corresponds to another request. @@ -268,10 +277,23 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer) * clear yet the bitmap. It will be cleared * after the response is copied to the request. */ - __ioread32_copy(rx_data->cmd, addr, xfer->rxcnt); + __ioread32_copy(rx_data->cmd, addr, + rx_data->rxcnt); + /* + * Signal completion to the polling thread. + * Pairs with smp_load_acquire() in polling + * loop. + */ + smp_store_release(&rx_data->completed, true); } } else { - clear_bit(seqnum, achan->bitmap_seqnum); + /* + * Signal completion to the polling thread. + * Pairs with smp_load_acquire() in polling loop. + */ + smp_store_release(&rx_data->completed, true); + if (rx_seqnum == tx_seqnum) + *native_match = true; } i = (i + 1) % achan->qlen; @@ -280,13 +302,6 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer) /* We saved all responses, mark RX empty. */ writel(rx_front, achan->rx.rear); - /* - * If the response was not in this iteration of the queue, check if the - * RX data was previously saved. - */ - if (!rx_set) - acpm_get_saved_rx(achan, xfer, tx_seqnum); - return 0; } @@ -301,6 +316,7 @@ static int acpm_dequeue_by_polling(struct acpm_chan *achan, const struct acpm_xfer *xfer) { struct device *dev = achan->acpm->dev; + bool native_match; ktime_t timeout; u32 seqnum; int ret; @@ -309,12 +325,25 @@ static int acpm_dequeue_by_polling(struct acpm_chan *achan, timeout = ktime_add_us(ktime_get(), ACPM_POLL_TIMEOUT_US); do { - ret = acpm_get_rx(achan, xfer); + ret = acpm_get_rx(achan, xfer, &native_match); if (ret) return ret; - if (!test_bit(seqnum - 1, achan->bitmap_seqnum)) + /* + * Safely check if our specific transaction has been processed. + * smp_load_acquire prevents the CPU from speculatively + * executing subsequent instructions before the transaction is + * synchronized. + */ + if (smp_load_acquire(&achan->rx_data[seqnum - 1].completed)) { + /* Retrieve payload if another thread cached it for us */ + if (!native_match) + acpm_get_saved_rx(achan, xfer, seqnum); + + /* Relinquish ownership of the sequence slot */ + clear_bit_unlock(seqnum - 1, achan->bitmap_seqnum); return 0; + } /* Determined experimentally. */ udelay(20); @@ -362,29 +391,48 @@ static int acpm_wait_for_queue_slots(struct acpm_chan *achan, u32 next_tx_front) * TX queue. * @achan: ACPM channel info. * @xfer: reference to the transfer being prepared. + * + * Return: 0 on success, -errno otherwise. */ -static void acpm_prepare_xfer(struct acpm_chan *achan, - const struct acpm_xfer *xfer) +static int acpm_prepare_xfer(struct acpm_chan *achan, + const struct acpm_xfer *xfer) { struct acpm_rx_data *rx_data; u32 *txd = (u32 *)xfer->txd; + unsigned long size = ACPM_SEQNUM_MAX - 1; + unsigned long bit = achan->seqnum; + + bit = find_next_zero_bit(achan->bitmap_seqnum, size, bit); + if (bit >= size) { + bit = find_first_zero_bit(achan->bitmap_seqnum, size); + if (bit >= size) { + dev_err_ratelimited(achan->acpm->dev, + "ACPM sequence number pool exhausted\n"); + return -EBUSY; + } + } - /* Prevent chan->seqnum from being re-used */ - do { - if (++achan->seqnum == ACPM_SEQNUM_MAX) - achan->seqnum = 1; - } while (test_bit(achan->seqnum - 1, achan->bitmap_seqnum)); + /* + * Execute the atomic set to formally claim the bit and establish + * LKMM Acquire semantics against the RX thread's clear_bit_unlock(). + * A loop is unnecessary because allocations are strictly serialized + * by tx_lock. + */ + if (WARN_ON_ONCE(test_and_set_bit_lock(bit, achan->bitmap_seqnum))) + return -EIO; + /* Flag the index based on seqnum. (seqnum: 1~63, bitmap: 0~62) */ + achan->seqnum = bit + 1; txd[0] |= FIELD_PREP(ACPM_PROTOCOL_SEQNUM, achan->seqnum); /* Clear data for upcoming responses */ - rx_data = &achan->rx_data[achan->seqnum - 1]; + rx_data = &achan->rx_data[bit]; + rx_data->completed = false; memset(rx_data->cmd, 0, sizeof(*rx_data->cmd) * rx_data->n_cmd); - if (xfer->rxd) - rx_data->response = true; + /* zero means no response expected */ + rx_data->rxcnt = xfer->rxcnt; - /* Flag the index based on seqnum. (seqnum: 1~63, bitmap: 0~62) */ - set_bit(achan->seqnum - 1, achan->bitmap_seqnum); + return 0; } /** @@ -444,7 +492,9 @@ int acpm_do_xfer(struct acpm_handle *handle, const struct acpm_xfer *xfer) if (ret) return ret; - acpm_prepare_xfer(achan, xfer); + ret = acpm_prepare_xfer(achan, xfer); + if (ret) + return ret; /* Write TX command. */ __iowrite32_copy(achan->tx.base + achan->mlen * tx_front, @@ -526,10 +576,11 @@ static int acpm_achan_alloc_cmds(struct acpm_chan *achan) /** * acpm_free_mbox_chans() - free mailbox channels. - * @acpm: pointer to driver data. + * @data: pointer to driver data. */ -static void acpm_free_mbox_chans(struct acpm_info *acpm) +static void acpm_free_mbox_chans(void *data) { + struct acpm_info *acpm = data; int i; for (i = 0; i < acpm->num_chans; i++) @@ -557,6 +608,10 @@ static int acpm_channels_init(struct acpm_info *acpm) if (!acpm->chans) return -ENOMEM; + ret = devm_add_action_or_reset(dev, acpm_free_mbox_chans, acpm); + if (ret) + return dev_err_probe(dev, ret, "Failed to add mbox free action.\n"); + chans_shmem = acpm->sram_base + readl(&shmem->chans); for (i = 0; i < acpm->num_chans; i++) { @@ -578,10 +633,8 @@ static int acpm_channels_init(struct acpm_info *acpm) cl->dev = dev; achan->chan = mbox_request_channel(cl, 0); - if (IS_ERR(achan->chan)) { - acpm_free_mbox_chans(acpm); + if (IS_ERR(achan->chan)) return PTR_ERR(achan->chan); - } } return 0; diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index dd77a93fd68d..1ae304c2f573 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1590,18 +1590,22 @@ static int smq_invalidate_mapping(struct dm_cache_policy *p, dm_cblock_t cblock) struct smq_policy *mq = to_smq_policy(p); struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); unsigned long flags; - - if (!e->allocated) - return -ENODATA; + int r = 0; spin_lock_irqsave(&mq->lock, flags); + if (!e->allocated) { + r = -ENODATA; + goto out; + } // FIXME: what if this block has pending background work? del_queue(mq, e); h_remove(&mq->table, e); free_entry(&mq->cache_alloc, e); + +out: spin_unlock_irqrestore(&mq->lock, flags); - return 0; + return r; } static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock) diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c index 8db970da9af9..1e8e8aba2542 100644 --- a/drivers/memory/atmel-ebi.c +++ b/drivers/memory/atmel-ebi.c @@ -628,10 +628,11 @@ static __maybe_unused int atmel_ebi_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(atmel_ebi_pm_ops, NULL, atmel_ebi_resume); static struct platform_driver atmel_ebi_driver = { + .probe = atmel_ebi_probe, .driver = { .name = "atmel-ebi", .of_match_table = atmel_ebi_id_table, .pm = &atmel_ebi_pm_ops, }, }; -builtin_platform_driver_probe(atmel_ebi_driver, atmel_ebi_probe); +builtin_platform_driver(atmel_ebi_driver); diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 8846550a8892..05444ecf3909 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1371,7 +1371,9 @@ static void mmc_select_driver_type(struct mmc_card *card) card->drive_strength = drive_strength; - if (drv_type) + if (fixed_drv_type >= 0 && drive_strength) + mmc_set_driver_type(card->host, drive_strength); + else if (drv_type) mmc_set_driver_type(card->host, drv_type); } diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c index c6eece4ec3fd..75c82ff20f17 100644 --- a/drivers/mmc/host/dw_mmc-rockchip.c +++ b/drivers/mmc/host/dw_mmc-rockchip.c @@ -441,6 +441,22 @@ static int dw_mci_common_parse_dt(struct dw_mci *host) return 0; } +static int dw_mci_rk2928_parse_dt(struct dw_mci *host) +{ + struct dw_mci_rockchip_priv_data *priv; + int err; + + err = dw_mci_common_parse_dt(host); + if (err) + return err; + + priv = host->priv; + + priv->internal_phase = false; + + return 0; +} + static int dw_mci_rk3288_parse_dt(struct dw_mci *host) { struct dw_mci_rockchip_priv_data *priv; @@ -514,6 +530,7 @@ static int dw_mci_rockchip_init(struct dw_mci *host) static const struct dw_mci_drv_data rk2928_drv_data = { .init = dw_mci_rockchip_init, + .parse_dt = dw_mci_rk2928_parse_dt, }; static const struct dw_mci_drv_data rk3288_drv_data = { diff --git a/drivers/mmc/host/litex_mmc.c b/drivers/mmc/host/litex_mmc.c index d2f19c2dc673..3655542ca998 100644 --- a/drivers/mmc/host/litex_mmc.c +++ b/drivers/mmc/host/litex_mmc.c @@ -16,6 +16,7 @@ #include <linux/interrupt.h> #include <linux/iopoll.h> #include <linux/litex.h> +#include <linux/math.h> #include <linux/mod_devicetable.h> #include <linux/module.h> #include <linux/platform_device.h> @@ -68,6 +69,9 @@ #define SD_SLEEP_US 5 #define SD_TIMEOUT_US 20000 +#define SD_INIT_DELAY_US 1000 +#define SD_INIT_CLK_HZ 400000 + #define SDIRQ_CARD_DETECT 1 #define SDIRQ_SD_TO_MEM_DONE 2 #define SDIRQ_MEM_TO_SD_DONE 4 @@ -436,11 +440,10 @@ static void litex_mmc_setclk(struct litex_mmc_host *host, unsigned int freq) struct device *dev = mmc_dev(host->mmc); u32 div; - div = freq ? host->ref_clk / freq : 256U; - div = roundup_pow_of_two(div); + div = freq ? DIV_ROUND_UP(host->ref_clk, freq) : 256U; div = clamp(div, 2U, 256U); dev_dbg(dev, "sd_clk_freq=%d: set to %d via div=%d\n", - freq, host->ref_clk / div, div); + freq, host->ref_clk / ((div + 1) & ~1U), div); litex_write16(host->sdphy + LITEX_PHY_CLOCKERDIV, div); host->sd_clk = freq; } @@ -450,6 +453,17 @@ static void litex_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) struct litex_mmc_host *host = mmc_priv(mmc); /* + * The SD specification requires at least 74 idle clocks before CMD0. + * These dummy cycles is generated by writing LITEX_PHY_INITIALIZE. + */ + if (ios->chip_select == MMC_CS_HIGH) { + litex_mmc_setclk(host, SD_INIT_CLK_HZ); + litex_write8(host->sdphy + LITEX_PHY_INITIALIZE, 1); + fsleep(SD_INIT_DELAY_US); + return; + } + + /* * NOTE: Ignore any ios->bus_width updates; they occur right after * the mmc core sends its own acmd6 bus-width change notification, * which is redundant since we snoop on the command flow and inject diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index f6ebb7bc7ede..838248bf8dd6 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -279,6 +279,7 @@ static const struct renesas_sdhi_of_data_with_quirks of_rza2_compatible = { static const struct of_device_id renesas_sdhi_internal_dmac_of_match[] = { { .compatible = "renesas,sdhi-r7s9210", .data = &of_rza2_compatible, }, { .compatible = "renesas,sdhi-mmc-r8a77470", .data = &of_rcar_gen3_compatible, }, + { .compatible = "renesas,sdhi-r8a774e1", .data = &of_r8a7795_compatible, }, { .compatible = "renesas,sdhi-r8a7795", .data = &of_r8a7795_compatible, }, { .compatible = "renesas,sdhi-r8a77961", .data = &of_r8a77961_compatible, }, { .compatible = "renesas,sdhi-r8a77965", .data = &of_r8a77965_compatible, }, diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 633462c0be5f..0882ce74e0c9 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -1918,14 +1918,14 @@ static int sdhci_msm_ice_init(struct sdhci_msm_host *msm_host, return 0; ice = devm_of_qcom_ice_get(dev); - if (ice == ERR_PTR(-EOPNOTSUPP)) { + if (IS_ERR(ice)) { + if (ice != ERR_PTR(-EOPNOTSUPP)) + return PTR_ERR(ice); + dev_warn(dev, "Disabling inline encryption support\n"); - ice = NULL; + return 0; } - if (IS_ERR_OR_NULL(ice)) - return PTR_ERR_OR_ZERO(ice); - msm_host->ice = ice; /* Initialize the blk_crypto_profile */ diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index 0b2158a7e409..b9ecd91f44ad 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -277,6 +277,7 @@ #define PHY_DELAY_CODE_MAX 0x7f #define PHY_DELAY_CODE_EMMC 0x17 #define PHY_DELAY_CODE_SD 0x55 +#define PHY_DELAY_CODE_SDIO 0x29 struct rk35xx_priv { struct reset_control *reset; @@ -1433,10 +1434,7 @@ static void sdhci_eic7700_set_clock(struct sdhci_host *host, unsigned int clock) clk_set_rate(pltfm_host->clk, clock); clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); - clk |= SDHCI_CLOCK_INT_EN; - sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL); - - dwcmshc_enable_card_clk(host); + sdhci_enable_clk(host, clk); } static void sdhci_eic7700_config_phy_delay(struct sdhci_host *host, int delay) @@ -1497,7 +1495,7 @@ static void sdhci_eic7700_config_phy(struct sdhci_host *host) static void sdhci_eic7700_reset(struct sdhci_host *host, u8 mask) { - sdhci_reset(host, mask); + dwcmshc_reset(host, mask); /* after reset all, the phy's config will be clear */ if (mask == SDHCI_RESET_ALL) @@ -1594,18 +1592,17 @@ static int sdhci_eic7700_phase_code_tuning(struct sdhci_host *host, u32 opcode) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host); - u32 sd_caps = MMC_CAP2_NO_MMC | MMC_CAP2_NO_SDIO; + u32 emmc_caps = MMC_CAP2_NO_SD | MMC_CAP2_NO_SDIO; int phase_code = -1; int code_range = -1; - bool is_sd = false; int code_min = -1; int code_max = -1; int cmd_error = 0; + bool is_emmc; int ret = 0; int i = 0; - if ((host->mmc->caps2 & sd_caps) == sd_caps) - is_sd = true; + is_emmc = (host->mmc->caps2 & emmc_caps) == emmc_caps; for (i = 0; i <= MAX_PHASE_CODE; i++) { /* Centered Phase code */ @@ -1614,8 +1611,8 @@ static int sdhci_eic7700_phase_code_tuning(struct sdhci_host *host, u32 opcode) host->ops->reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); if (ret) { - /* SD specific range tracking */ - if (is_sd && code_min != -1 && code_max != -1) { + /* SD/SDIO specific range tracking */ + if (!is_emmc && code_min != -1 && code_max != -1) { if (code_max - code_min > code_range) { code_range = code_max - code_min; phase_code = (code_min + code_max) / 2; @@ -1626,17 +1623,17 @@ static int sdhci_eic7700_phase_code_tuning(struct sdhci_host *host, u32 opcode) code_max = -1; } /* EMMC breaks after first valid range */ - if (!is_sd && code_min != -1 && code_max != -1) + if (is_emmc && code_min != -1 && code_max != -1) break; } else { /* Track valid phase code range */ if (code_min == -1) { code_min = i; - if (!is_sd) + if (is_emmc) continue; } code_max = i; - if (is_sd && i == MAX_PHASE_CODE) { + if (!is_emmc && i == MAX_PHASE_CODE) { if (code_max - code_min > code_range) { code_range = code_max - code_min; phase_code = (code_min + code_max) / 2; @@ -1646,19 +1643,19 @@ static int sdhci_eic7700_phase_code_tuning(struct sdhci_host *host, u32 opcode) } /* Handle tuning failure case */ - if ((is_sd && phase_code == -1) || - (!is_sd && code_min == -1 && code_max == -1)) { + if ((!is_emmc && phase_code == -1) || + (is_emmc && code_min == -1 && code_max == -1)) { pr_err("%s: phase code tuning failed!\n", mmc_hostname(host->mmc)); sdhci_writew(host, 0, priv->vendor_specific_area1 + DWCMSHC_AT_STAT); return -EIO; } - if (!is_sd) + if (is_emmc) phase_code = (code_min + code_max) / 2; sdhci_writew(host, phase_code, priv->vendor_specific_area1 + DWCMSHC_AT_STAT); - /* SD specific final verification */ - if (is_sd) { + /* SD/SDIO specific final verification */ + if (!is_emmc) { ret = mmc_send_tuning(host->mmc, opcode, &cmd_error); host->ops->reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); if (ret) { @@ -1756,9 +1753,9 @@ static void sdhci_eic7700_set_uhs_signaling(struct sdhci_host *host, unsigned in static void sdhci_eic7700_set_uhs_wrapper(struct sdhci_host *host, unsigned int timing) { - u32 sd_caps = MMC_CAP2_NO_MMC | MMC_CAP2_NO_SDIO; + u32 emmc_caps = MMC_CAP2_NO_SD | MMC_CAP2_NO_SDIO; - if ((host->mmc->caps2 & sd_caps) == sd_caps) + if ((host->mmc->caps2 & emmc_caps) != emmc_caps) sdhci_set_uhs_signaling(host, timing); else sdhci_eic7700_set_uhs_signaling(host, timing); @@ -1767,6 +1764,7 @@ static void sdhci_eic7700_set_uhs_wrapper(struct sdhci_host *host, unsigned int static int eic7700_init(struct device *dev, struct sdhci_host *host, struct dwcmshc_priv *dwc_priv) { u32 emmc_caps = MMC_CAP2_NO_SD | MMC_CAP2_NO_SDIO; + u32 sd_caps = MMC_CAP2_NO_MMC | MMC_CAP2_NO_SDIO; unsigned int val, hsp_int_status, hsp_pwr_ctrl; static const char * const clk_ids[] = {"axi"}; struct of_phandle_args args; @@ -1821,8 +1819,10 @@ static int eic7700_init(struct device *dev, struct sdhci_host *host, struct dwcm if ((host->mmc->caps2 & emmc_caps) == emmc_caps) dwc_priv->delay_line = PHY_DELAY_CODE_EMMC; - else + else if ((host->mmc->caps2 & sd_caps) == sd_caps) dwc_priv->delay_line = PHY_DELAY_CODE_SD; + else + dwc_priv->delay_line = PHY_DELAY_CODE_SDIO; if (!of_property_read_u32(dev->of_node, "eswin,drive-impedance-ohms", &val)) priv->drive_impedance = eic7700_convert_drive_impedance_ohm(dev, val); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 605be55f8d2d..e3bf901b10aa 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -3836,6 +3836,7 @@ int sdhci_resume_host(struct sdhci_host *host) host->pwr = 0; host->clock = 0; host->reinit_uhs = true; + mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios); mmc->ops->set_ios(mmc, &mmc->ios); } else { sdhci_init(host, (mmc->pm_flags & MMC_PM_KEEP_POWER)); diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index f0aa7d2f2171..985ef66dc333 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1386,8 +1386,8 @@ static void ad_churn_machine(struct port *port) { if (port->sm_vars & AD_PORT_CHURNED) { port->sm_vars &= ~AD_PORT_CHURNED; - port->sm_churn_actor_state = AD_CHURN_MONITOR; - port->sm_churn_partner_state = AD_CHURN_MONITOR; + WRITE_ONCE(port->sm_churn_actor_state, AD_CHURN_MONITOR); + WRITE_ONCE(port->sm_churn_partner_state, AD_CHURN_MONITOR); port->sm_churn_actor_timer_counter = __ad_timer_to_ticks(AD_ACTOR_CHURN_TIMER, 0); port->sm_churn_partner_timer_counter = @@ -1398,20 +1398,22 @@ static void ad_churn_machine(struct port *port) !(--port->sm_churn_actor_timer_counter) && port->sm_churn_actor_state == AD_CHURN_MONITOR) { if (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION) { - port->sm_churn_actor_state = AD_NO_CHURN; + WRITE_ONCE(port->sm_churn_actor_state, AD_NO_CHURN); } else { - port->churn_actor_count++; - port->sm_churn_actor_state = AD_CHURN; + WRITE_ONCE(port->churn_actor_count, + port->churn_actor_count + 1); + WRITE_ONCE(port->sm_churn_actor_state, AD_CHURN); } } if (port->sm_churn_partner_timer_counter && !(--port->sm_churn_partner_timer_counter) && port->sm_churn_partner_state == AD_CHURN_MONITOR) { if (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) { - port->sm_churn_partner_state = AD_NO_CHURN; + WRITE_ONCE(port->sm_churn_partner_state, AD_NO_CHURN); } else { - port->churn_partner_count++; - port->sm_churn_partner_state = AD_CHURN; + WRITE_ONCE(port->churn_partner_count, + port->churn_partner_count + 1); + WRITE_ONCE(port->sm_churn_partner_state, AD_CHURN); } } } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 82e779f7916b..8e75453ce0ef 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4621,11 +4621,11 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd slave_dev = __dev_get_by_name(net, ifr->ifr_slave); - slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev); - if (!slave_dev) return -ENODEV; + slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev); + switch (cmd) { case SIOCBONDENSLAVE: res = bond_enslave(bond_dev, slave_dev, NULL); diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index c7d3e0602c83..90365d3f7ebf 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -82,10 +82,10 @@ static int bond_fill_slave_info(struct sk_buff *skb, goto nla_put_failure_rcu; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE, - ad_port->sm_churn_actor_state)) + READ_ONCE(ad_port->sm_churn_actor_state))) goto nla_put_failure_rcu; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE, - ad_port->sm_churn_partner_state)) + READ_ONCE(ad_port->sm_churn_partner_state))) goto nla_put_failure_rcu; } rcu_read_unlock(); diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 3714aab1a3d9..3607b62f9b63 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -221,13 +221,13 @@ static void bond_info_show_slave(struct seq_file *seq, seq_printf(seq, "Aggregator ID: %d\n", agg->aggregator_identifier); seq_printf(seq, "Actor Churn State: %s\n", - bond_3ad_churn_desc(port->sm_churn_actor_state)); + bond_3ad_churn_desc(READ_ONCE(port->sm_churn_actor_state))); seq_printf(seq, "Partner Churn State: %s\n", - bond_3ad_churn_desc(port->sm_churn_partner_state)); + bond_3ad_churn_desc(READ_ONCE(port->sm_churn_partner_state))); seq_printf(seq, "Actor Churned Count: %d\n", - port->churn_actor_count); + READ_ONCE(port->churn_actor_count)); seq_printf(seq, "Partner Churned Count: %d\n", - port->churn_partner_count); + READ_ONCE(port->churn_partner_count)); if (capable(CAP_NET_ADMIN)) { seq_puts(seq, "details actor lacp pdu:\n"); diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index cecd66251dba..eab6a98d62b9 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2936,7 +2936,7 @@ static void airoha_metadata_dst_free(struct airoha_gdm_port *port) if (!port->dsa_meta[i]) continue; - metadata_dst_free(port->dsa_meta[i]); + dst_release(&port->dsa_meta[i]->dst); } } diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 911808ab13a7..4f3076d4ea34 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1407,8 +1407,10 @@ static int pcnet32_poll(struct napi_struct *napi, int budget) pcnet32_restart(dev, CSR0_START); netif_wake_queue(dev); } + spin_unlock_irqrestore(&lp->lock, flags); if (work_done < budget && napi_complete_done(napi, work_done)) { + spin_lock_irqsave(&lp->lock, flags); /* clear interrupt masks */ val = lp->a->read_csr(ioaddr, CSR3); val &= 0x00ff; @@ -1416,9 +1418,9 @@ static int pcnet32_poll(struct napi_struct *napi, int budget) /* Set interrupt enable. */ lp->a->write_csr(ioaddr, CSR0, CSR0_INTEN); + spin_unlock_irqrestore(&lp->lock, flags); } - spin_unlock_irqrestore(&lp->lock, flags); return work_done; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 008c34cff7b4..35e1f8f663c7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -14388,13 +14388,28 @@ static void bnxt_unlock_sp(struct bnxt *bp) netdev_unlock(bp->dev); } +/* Same as bnxt_lock_sp() with additional rtnl_lock */ +static void bnxt_rtnl_lock_sp(struct bnxt *bp) +{ + clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + rtnl_lock(); + netdev_lock(bp->dev); +} + +static void bnxt_rtnl_unlock_sp(struct bnxt *bp) +{ + set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + netdev_unlock(bp->dev); + rtnl_unlock(); +} + /* Only called from bnxt_sp_task() */ static void bnxt_reset(struct bnxt *bp, bool silent) { - bnxt_lock_sp(bp); + bnxt_rtnl_lock_sp(bp); if (test_bit(BNXT_STATE_OPEN, &bp->state)) bnxt_reset_task(bp, silent); - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); } /* Only called from bnxt_sp_task() */ @@ -14402,9 +14417,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) { int i; - bnxt_lock_sp(bp); + bnxt_rtnl_lock_sp(bp); if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); return; } /* Disable and flush TPA before resetting the RX ring */ @@ -14443,7 +14458,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) } if (bp->flags & BNXT_FLAG_TPA) bnxt_set_tpa(bp, true); - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); } static void bnxt_fw_fatal_close(struct bnxt *bp) @@ -15358,15 +15373,17 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING; fallthrough; case BNXT_FW_RESET_STATE_OPENING: - while (!netdev_trylock(bp->dev)) { + while (!rtnl_trylock()) { bnxt_queue_fw_reset_work(bp, HZ / 10); return; } + netdev_lock(bp->dev); rc = bnxt_open(bp->dev); if (rc) { netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); bnxt_fw_reset_abort(bp, rc); netdev_unlock(bp->dev); + rtnl_unlock(); goto ulp_start; } @@ -15386,6 +15403,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_dl_health_fw_status_update(bp, true); } netdev_unlock(bp->dev); + rtnl_unlock(); bnxt_ulp_start(bp); bnxt_reenable_sriov(bp); netdev_lock(bp->dev); @@ -16379,7 +16397,7 @@ err_reset: rc); napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); - bnxt_reset_task(bp, true); + netif_close(dev); return rc; } @@ -17230,6 +17248,7 @@ static int bnxt_resume(struct device *device) struct bnxt *bp = netdev_priv(dev); int rc = 0; + rtnl_lock(); netdev_lock(dev); rc = pci_enable_device(bp->pdev); if (rc) { @@ -17274,6 +17293,7 @@ static int bnxt_resume(struct device *device) resume_exit: netdev_unlock(bp->dev); + rtnl_unlock(); if (!rc) { bnxt_ulp_start(bp); bnxt_reenable_sriov(bp); @@ -17445,6 +17465,7 @@ static void bnxt_io_resume(struct pci_dev *pdev) int err; netdev_info(bp->dev, "PCI Slot Resume\n"); + rtnl_lock(); netdev_lock(netdev); err = bnxt_hwrm_func_qcaps(bp); @@ -17462,6 +17483,7 @@ static void bnxt_io_resume(struct pci_dev *pdev) netif_device_attach(netdev); netdev_unlock(netdev); + rtnl_unlock(); if (!err) { bnxt_ulp_start(bp); bnxt_reenable_sriov(bp); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f89aa94ce020..6ebde65d7f1b 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -5594,6 +5594,7 @@ static int fec_resume(struct device *dev) if (fep->rpm_active) pm_runtime_force_resume(dev); + pinctrl_pm_select_default_state(&fep->pdev->dev); ret = fec_enet_clk_enable(ndev, true); if (ret) { rtnl_unlock(); @@ -5610,8 +5611,6 @@ static int fec_resume(struct device *dev) val &= ~(FEC_ECR_MAGICEN | FEC_ECR_SLEEP); writel(val, fep->hwp + FEC_ECNTRL); fep->wol_flag &= ~FEC_WOL_FLAG_SLEEP_ON; - } else { - pinctrl_pm_select_default_state(&fep->pdev->dev); } fec_restart(ndev); netif_tx_lock_bh(ndev); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index de3fbd3d15d6..65397daae4c2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -1145,6 +1145,7 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int slot); int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc); int rvu_cpt_init(struct rvu *rvu); +u32 rvu_get_cpt_chan_mask(struct rvu *rvu); #define NDC_AF_BANK_MASK GENMASK_ULL(7, 0) #define NDC_AF_BANK_LINE_MASK GENMASK_ULL(31, 16) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 607d0cf1a778..d301a3f0f87a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -701,6 +701,19 @@ void npc_set_mcam_action(struct rvu *rvu, struct npc_mcam *mcam, return rvu_write64(rvu, blkaddr, reg, cfg); } +u32 rvu_get_cpt_chan_mask(struct rvu *rvu) +{ + /* For cn10k the upper two bits of the channel number are + * cpt channel number. with masking out these bits in the + * mcam entry, same entry used for NIX will allow packets + * received from cpt for parsing. + */ + if (!is_rvu_otx2(rvu)) + return NIX_CHAN_CPT_X2P_MASK; + else + return 0xFFFu; +} + void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan, u8 *mac_addr) { @@ -750,7 +763,7 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, eth_broadcast_addr((u8 *)&req.mask.dmac); req.features = BIT_ULL(NPC_DMAC); req.channel = chan; - req.chan_mask = 0xFFFU; + req.chan_mask = rvu_get_cpt_chan_mask(rvu); req.intf = pfvf->nix_rx_intf; req.op = action.op; req.hdr.pcifunc = 0; /* AF is requester */ @@ -845,11 +858,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, * mcam entry, same entry used for NIX will allow packets * received from cpt for parsing. */ - if (!is_rvu_otx2(rvu)) { - req.chan_mask = NIX_CHAN_CPT_X2P_MASK; - } else { - req.chan_mask = 0xFFFU; - } + req.chan_mask = rvu_get_cpt_chan_mask(rvu); if (chan_cnt > 1) { if (!is_power_of_2(chan_cnt)) { @@ -1053,16 +1062,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, ether_addr_copy(req.mask.dmac, mac_addr); req.features = BIT_ULL(NPC_DMAC); - /* For cn10k the upper two bits of the channel number are - * cpt channel number. with masking out these bits in the - * mcam entry, same entry used for NIX will allow packets - * received from cpt for parsing. - */ - if (!is_rvu_otx2(rvu)) - req.chan_mask = NIX_CHAN_CPT_X2P_MASK; - else - req.chan_mask = 0xFFFU; - + req.chan_mask = rvu_get_cpt_chan_mask(rvu); req.channel = chan; req.intf = pfvf->nix_rx_intf; req.entry = index; @@ -2192,8 +2192,8 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) goto free_entry_cntr_map; /* Alloc memory for saving target device of mcam rule */ - mcam->entry2target_pffunc = kmalloc_array(mcam->total_entries, - sizeof(u16), GFP_KERNEL); + mcam->entry2target_pffunc = kcalloc(mcam->total_entries, + sizeof(u16), GFP_KERNEL); if (!mcam->entry2target_pffunc) goto free_cntr_refcnt; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 6ae9cdcb608b..34f1e066707b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -1820,7 +1820,7 @@ process_flow: /* ignore chan_mask in case pf func is not AF, revisit later */ if (!is_pffunc_af(req->hdr.pcifunc)) - req->chan_mask = 0xFFF; + req->chan_mask = rvu_get_cpt_chan_mask(rvu); err = npc_check_unsupported_flows(rvu, req->features, req->intf); if (err) { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index ee623476e5ff..f9fbf0c17648 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -3473,7 +3473,7 @@ static void otx2_ndc_sync(struct otx2_nic *pf) req->nix_lf_rx_sync = 1; req->npa_lf_sync = 1; - if (!otx2_sync_mbox_msg(mbox)) + if (otx2_sync_mbox_msg(mbox)) dev_err(pf->dev, "NDC sync operation failed\n"); mutex_unlock(&mbox->lock); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8d225bc9f063..7d771168b990 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4491,7 +4491,7 @@ static int mtk_free_dev(struct mtk_eth *eth) for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) { if (!eth->dsa_meta[i]) break; - metadata_dst_free(eth->dsa_meta[i]); + dst_release(ð->dsa_meta[i]->dst); } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index c89417c1a1f9..e2895972cc82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1002,12 +1002,13 @@ static void cmd_work_handler(struct work_struct *work) ent->callback(-EBUSY, ent->context); mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); + complete(&ent->slotted); cmd_ent_put(ent); } else { ent->ret = -EBUSY; complete(&ent->done); + complete(&ent->slotted); } - complete(&ent->slotted); return; } alloc_ret = cmd_alloc_index(cmd, ent); @@ -1017,13 +1018,14 @@ static void cmd_work_handler(struct work_struct *work) ent->callback(-EAGAIN, ent->context); mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); + complete(&ent->slotted); cmd_ent_put(ent); } else { ent->ret = -EAGAIN; complete(&ent->done); + complete(&ent->slotted); } up(&cmd->vars.sem); - complete(&ent->slotted); return; } } else { diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index f3332417162e..ffac22883e49 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1219,6 +1219,36 @@ static void lan743x_mac_set_address(struct lan743x_adapter *adapter, "MAC address set to %pM\n", addr); } +static void lan743x_mac_rx_enable_fse(struct lan743x_adapter *adapter) +{ + u32 mac_rx; + bool rxen; + + mac_rx = lan743x_csr_read(adapter, MAC_RX); + if (mac_rx & MAC_RX_FSE_) + return; + + rxen = mac_rx & MAC_RX_RXEN_; + if (rxen) { + mac_rx &= ~MAC_RX_RXEN_; + lan743x_csr_write(adapter, MAC_RX, mac_rx); + lan743x_csr_wait_for_bit(adapter, MAC_RX, MAC_RX_RXD_, + 1, 1000, 20000, 100); + } + + /* Per AN2948, hardware prevents modification of the FSE bit while the + * MAC receiver is enabled (RXEN bit set). Use separate register write + * to assert the FSE bit before enabling the RXEN bit in MAC_RX + */ + mac_rx |= MAC_RX_FSE_; + lan743x_csr_write(adapter, MAC_RX, mac_rx); + + if (rxen) { + mac_rx |= MAC_RX_RXEN_; + lan743x_csr_write(adapter, MAC_RX, mac_rx); + } +} + static int lan743x_mac_init(struct lan743x_adapter *adapter) { bool mac_address_valid = true; @@ -1258,6 +1288,8 @@ static int lan743x_mac_init(struct lan743x_adapter *adapter) lan743x_mac_set_address(adapter, adapter->mac_address); eth_hw_addr_set(netdev, adapter->mac_address); + lan743x_mac_rx_enable_fse(adapter); + return 0; } diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 160d94a7cee6..1573c8f9c993 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -182,6 +182,7 @@ #define MAC_RX (0x104) #define MAC_RX_MAX_SIZE_SHIFT_ (16) #define MAC_RX_MAX_SIZE_MASK_ (0x3FFF0000) +#define MAC_RX_FSE_ BIT(2) #define MAC_RX_RXD_ BIT(1) #define MAC_RX_RXEN_ BIT(0) diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index ef13109c49cf..55105d34bc79 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -239,6 +239,8 @@ static void rtase_tx_clear(struct rtase_private *tp) rtase_tx_clear_range(ring, ring->dirty_idx, RTASE_NUM_DESC); ring->cur_idx = 0; ring->dirty_idx = 0; + + netdev_tx_reset_subqueue(tp->dev, i); } } @@ -1563,8 +1565,9 @@ static void rtase_dump_tally_counter(const struct rtase_private *tp) rtase_w32(tp, RTASE_DTCCR0, cmd); rtase_w32(tp, RTASE_DTCCR0, cmd | RTASE_COUNTER_DUMP); - err = read_poll_timeout(rtase_r32, val, !(val & RTASE_COUNTER_DUMP), - 10, 250, false, tp, RTASE_DTCCR0); + err = read_poll_timeout_atomic(rtase_r32, val, + !(val & RTASE_COUNTER_DUMP), + 10, 250, false, tp, RTASE_DTCCR0); if (err == -ETIMEDOUT) netdev_err(tp->dev, "error occurred in dump tally counter\n"); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index c6563367d382..715180c3a1b3 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -632,7 +632,7 @@ static int geneve_post_decap_hint(const struct sock *sk, struct sk_buff *skb, uh = udp_hdr(skb); uh->len = htons(skb->len - gro_hint->nested_tp_offset); if (uh->check) { - len = skb->len - gro_hint->nested_nh_offset; + len = skb->len - gro_hint->nested_tp_offset; skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; if (gro_hint->nested_is_v6) uh->check = ~udp_v6_check(len, &ipv6h->saddr, diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index bd970f753beb..b94b9c433a21 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -822,6 +822,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c) return -EINVAL; } + sfp->i2c_block_size = sfp->i2c_max_block_size; return 0; } diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index 2042369379ff..3e76f4e21094 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -661,7 +661,7 @@ static int vxlan_vni_update(struct vxlan_dev *vxlan, if (ret) return ret; - if (changed) + if (*changed) vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); return 0; @@ -759,8 +759,7 @@ static int vxlan_vni_add(struct vxlan_dev *vxlan, err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed, extack); - if (changed) - vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); + vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); return err; } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/ap.c b/drivers/net/wireless/intel/iwlwifi/mld/ap.c index 5c59acc8c4c5..6598d9333333 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/ap.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/ap.c @@ -9,7 +9,6 @@ #include "ap.h" #include "hcmd.h" #include "tx.h" -#include "power.h" #include "key.h" #include "phy.h" #include "iwl-utils.h" @@ -273,9 +272,6 @@ int iwl_mld_start_ap_ibss(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx; int ret; - if (vif->type == NL80211_IFTYPE_AP) - iwl_mld_send_ap_tx_power_constraint_cmd(mld, vif, link); - ret = iwl_mld_update_beacon_template(mld, vif, link); if (ret) return ret; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index da6fd7471568..3c8daddc0bcb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -1150,6 +1150,13 @@ int iwl_mld_assign_vif_chanctx(struct ieee80211_hw *hw, if (iwl_mld_can_activate_link(mld, vif, link)) { iwl_mld_tlc_update_phy(mld, vif, link); + /* FW requires AP_TX_POWER_CONSTRAINTS_CMD before link + * activation for AP and after link activation for STA, + * for an unknown reason. + */ + if (vif->type == NL80211_IFTYPE_AP) + iwl_mld_send_ap_tx_power_constraint_cmd(mld, vif, link); + ret = iwl_mld_activate_link(mld, link); if (ret) goto err; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/power.c b/drivers/net/wireless/intel/iwlwifi/mld/power.c index 49b0d9f8f865..266fe16bb95d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/power.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/power.c @@ -366,7 +366,7 @@ iwl_mld_send_ap_tx_power_constraint_cmd(struct iwl_mld *mld, lockdep_assert_wiphy(mld->wiphy); - if (!mld_link->active) + if (!mld_link->active && vif->type != NL80211_IFTYPE_AP) return; if (link->chanreq.oper.chan->band != NL80211_BAND_6GHZ) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index f05df3a3300e..6e507d6dcdd2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2025 Intel Corporation + * Copyright (C) 2012-2014, 2018-2026 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -459,9 +459,14 @@ static void iwl_mvm_phy_filter_init(struct iwl_mvm *mvm, static void iwl_mvm_uats_init(struct iwl_mvm *mvm) { + struct iwl_mcc_allowed_ap_type_cmd_v1 *cmd __free(kfree) = NULL; int cmd_id = WIDE_ID(REGULATORY_AND_NVM_GROUP, MCC_ALLOWED_AP_TYPE_CMD); - struct iwl_mcc_allowed_ap_type_cmd_v1 cmd = {}; + struct iwl_host_cmd hcmd = { + .id = cmd_id, + .len[0] = sizeof(*cmd), + .dataflags[0] = IWL_HCMD_DFL_NOCOPY, + }; u8 cmd_ver; int ret; @@ -485,14 +490,25 @@ static void iwl_mvm_uats_init(struct iwl_mvm *mvm) if (!mvm->fwrt.ap_type_cmd_valid) return; + /* Since we free the command immediately after iwl_mvm_send_cmd, we + * must send this command in SYNC mode. + */ + lockdep_assert_held(&mvm->mutex); + + cmd = kzalloc_obj(*cmd); + if (!cmd) + return; + BUILD_BUG_ON(sizeof(mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map) != - sizeof(cmd.mcc_to_ap_type_map)); + sizeof(cmd->mcc_to_ap_type_map)); - memcpy(cmd.mcc_to_ap_type_map, + memcpy(cmd->mcc_to_ap_type_map, mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map, sizeof(mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map)); - ret = iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, sizeof(cmd), &cmd); + hcmd.data[0] = cmd; + + ret = iwl_mvm_send_cmd(mvm, &hcmd); if (ret < 0) IWL_ERR(mvm, "failed to send MCC_ALLOWED_AP_TYPE_CMD (%d)\n", ret); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index ae177477b201..384bed95835d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1416,6 +1416,12 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_rf_cfg *cfg, fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_FW_RESET_HANDSHAKE); + /* Those firmware versions claim to support the fw_reset_handshake + * but they are buggy. + */ + if (IWL_UCODE_MAJOR(mvm->fw->ucode_ver) <= 77) + trans->conf.fw_reset_handshake = false; + trans->conf.queue_alloc_cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, WIDE_ID(DATA_PATH_GROUP, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index dc99e7ac4726..eb3c5a6dd088 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1225,33 +1225,41 @@ static int _iwl_pci_resume(struct device *device, bool restore) if (!trans->op_mode) return 0; - /* - * Scratch value was altered, this means the device was powered off, we - * need to reset it completely. - * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, - * but not bits [15:8]. So if we have bits set in lower word, assume - * the device is alive. - * Alternatively, if the scratch value is 0xFFFFFFFF, then we no longer - * have access to the device and consider it powered off. - * For older devices, just try silently to grab the NIC. - */ - if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { - u32 scratch = iwl_read32(trans, CSR_FUNC_SCRATCH); - - if (!(scratch & CSR_FUNC_SCRATCH_POWER_OFF_MASK) || - scratch == ~0U) - device_was_powered_off = true; - } else { + if (test_bit(STATUS_DEVICE_ENABLED, &trans->status)) { /* - * bh are re-enabled by iwl_trans_pcie_release_nic_access, - * so re-enable them if _iwl_trans_pcie_grab_nic_access fails. + * Scratch value was altered, this means the device was powered + * off, we need to reset it completely. + * Note: MAC (bits 0:7) will be cleared upon suspend even with + * wowlan, but not bits [15:8]. So if we have bits set in lower + * word, assume the device is alive. + * Alternatively, if the scratch value is 0xFFFFFFFF, then we + * no longer have access to the device and consider it powered + * off. + * For older devices, just try silently to grab the NIC. */ - local_bh_disable(); - if (_iwl_trans_pcie_grab_nic_access(trans, true)) { - iwl_trans_pcie_release_nic_access(trans); + if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { + u32 scratch = iwl_read32(trans, CSR_FUNC_SCRATCH); + + if (!(scratch & CSR_FUNC_SCRATCH_POWER_OFF_MASK) || + scratch == ~0U) { + IWL_DEBUG_WOWLAN(trans, + "Scratch 0x%08x indicates device was powered off\n", + scratch); + device_was_powered_off = true; + } } else { - device_was_powered_off = true; - local_bh_enable(); + /* + * bh are re-enabled by iwl_trans_pcie_release_nic_access, + * so re-enable them if _iwl_trans_pcie_grab_nic_access + * fails. + */ + local_bh_disable(); + if (_iwl_trans_pcie_grab_nic_access(trans, true)) { + iwl_trans_pcie_release_nic_access(trans); + } else { + device_was_powered_off = true; + local_bh_enable(); + } } } diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index 915a4f6defc9..84cb527f59cc 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -19,6 +19,8 @@ static DEFINE_SPINLOCK(vclock_hash_lock); static DEFINE_READ_MOSTLY_HASHTABLE(vclock_hash, 8); +DEFINE_STATIC_SRCU(vclock_srcu); + static void ptp_vclock_hash_add(struct ptp_vclock *vclock) { spin_lock(&vclock_hash_lock); @@ -37,7 +39,7 @@ static void ptp_vclock_hash_del(struct ptp_vclock *vclock) spin_unlock(&vclock_hash_lock); - synchronize_rcu(); + synchronize_srcu(&vclock_srcu); } static int ptp_vclock_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) @@ -276,14 +278,16 @@ ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index) { unsigned int hash = vclock_index % HASH_SIZE(vclock_hash); struct ptp_vclock *vclock; - u64 ns; u64 vclock_ns = 0; + int srcu_idx; + u64 ns; ns = ktime_to_ns(*hwtstamp); - rcu_read_lock(); + srcu_idx = srcu_read_lock(&vclock_srcu); - hlist_for_each_entry_rcu(vclock, &vclock_hash[hash], vclock_hash_node) { + hlist_for_each_entry_srcu(vclock, &vclock_hash[hash], vclock_hash_node, + srcu_read_lock_held(&vclock_srcu)) { if (vclock->clock->index != vclock_index) continue; @@ -294,7 +298,7 @@ ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index) break; } - rcu_read_unlock(); + srcu_read_unlock(&vclock_srcu, srcu_idx); return ns_to_ktime(vclock_ns); } diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c index 77763a107edb..fc080e56f50d 100644 --- a/drivers/soc/imx/soc-imx8m.c +++ b/drivers/soc/imx/soc-imx8m.c @@ -247,7 +247,7 @@ static int imx8m_soc_probe(struct platform_device *pdev) if (ret) return ret; - data = device_get_match_data(dev); + data = of_machine_get_match_data(imx8_soc_match); if (data) { soc_dev_attr->soc_id = data->name; ret = imx8m_soc_prepare(pdev, data->ocotp_compatible); diff --git a/drivers/soc/qcom/ice.c b/drivers/soc/qcom/ice.c index b203bc685cad..5f20108aa03e 100644 --- a/drivers/soc/qcom/ice.c +++ b/drivers/soc/qcom/ice.c @@ -16,6 +16,7 @@ #include <linux/of.h> #include <linux/of_platform.h> #include <linux/platform_device.h> +#include <linux/xarray.h> #include <linux/firmware/qcom/qcom_scm.h> @@ -108,11 +109,15 @@ struct qcom_ice { void __iomem *base; struct clk *core_clk; + struct clk *iface_clk; bool use_hwkm; bool hwkm_init_complete; u8 hwkm_version; }; +static DEFINE_XARRAY(ice_handles); +static DEFINE_MUTEX(ice_mutex); + static bool qcom_ice_check_supported(struct qcom_ice *ice) { u32 regval = qcom_ice_readl(ice, QCOM_ICE_REG_VERSION); @@ -312,8 +317,13 @@ int qcom_ice_resume(struct qcom_ice *ice) err = clk_prepare_enable(ice->core_clk); if (err) { - dev_err(dev, "failed to enable core clock (%d)\n", - err); + dev_err(dev, "Failed to enable core clock: %d\n", err); + return err; + } + + err = clk_prepare_enable(ice->iface_clk); + if (err) { + dev_err(dev, "Failed to enable iface clock: %d\n", err); return err; } qcom_ice_hwkm_init(ice); @@ -323,6 +333,7 @@ EXPORT_SYMBOL_GPL(qcom_ice_resume); int qcom_ice_suspend(struct qcom_ice *ice) { + clk_disable_unprepare(ice->iface_clk); clk_disable_unprepare(ice->core_clk); ice->hwkm_init_complete = false; @@ -559,7 +570,7 @@ static struct qcom_ice *qcom_ice_create(struct device *dev, if (!qcom_scm_ice_available()) { dev_warn(dev, "ICE SCM interface not found\n"); - return NULL; + return ERR_PTR(-EOPNOTSUPP); } engine = devm_kzalloc(dev, sizeof(*engine), GFP_KERNEL); @@ -580,10 +591,16 @@ static struct qcom_ice *qcom_ice_create(struct device *dev, if (!engine->core_clk) engine->core_clk = devm_clk_get_optional_enabled(dev, "ice"); if (!engine->core_clk) + engine->core_clk = devm_clk_get_optional_enabled(dev, "core"); + if (!engine->core_clk) engine->core_clk = devm_clk_get_enabled(dev, NULL); if (IS_ERR(engine->core_clk)) return ERR_CAST(engine->core_clk); + engine->iface_clk = devm_clk_get_optional_enabled(dev, "iface"); + if (IS_ERR(engine->iface_clk)) + return ERR_CAST(engine->iface_clk); + if (!qcom_ice_check_supported(engine)) return ERR_PTR(-EOPNOTSUPP); @@ -631,6 +648,8 @@ static struct qcom_ice *of_qcom_ice_get(struct device *dev) return qcom_ice_create(&pdev->dev, base); } + guard(mutex)(&ice_mutex); + /* * If the consumer node does not provider an 'ice' reg range * (legacy DT binding), then it must at least provide a phandle @@ -639,20 +658,21 @@ static struct qcom_ice *of_qcom_ice_get(struct device *dev) struct device_node *node __free(device_node) = of_parse_phandle(dev->of_node, "qcom,ice", 0); if (!node) - return NULL; + return ERR_PTR(-EOPNOTSUPP); pdev = of_find_device_by_node(node); if (!pdev) { dev_err(dev, "Cannot find device node %s\n", node->name); - return ERR_PTR(-EPROBE_DEFER); + return ERR_PTR(-ENODEV); } - ice = platform_get_drvdata(pdev); - if (!ice) { - dev_err(dev, "Cannot get ice instance from %s\n", - dev_name(&pdev->dev)); + ice = xa_load(&ice_handles, pdev->dev.of_node->phandle); + if (IS_ERR_OR_NULL(ice)) { platform_device_put(pdev); - return ERR_PTR(-EPROBE_DEFER); + if (!ice) + return ERR_PTR(-EPROBE_DEFER); + else + return ice; } link = device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_SUPPLIER); @@ -691,8 +711,7 @@ static void devm_of_qcom_ice_put(struct device *dev, void *res) * phandle via 'qcom,ice' property to an ICE DT, the ICE instance will already * be created and so this function will return that instead. * - * Return: ICE pointer on success, NULL if there is no ICE data provided by the - * consumer or ERR_PTR() on error. + * Return: ICE pointer on success, ERR_PTR() on error. */ struct qcom_ice *devm_of_qcom_ice_get(struct device *dev) { @@ -703,7 +722,7 @@ struct qcom_ice *devm_of_qcom_ice_get(struct device *dev) return ERR_PTR(-ENOMEM); ice = of_qcom_ice_get(dev); - if (!IS_ERR_OR_NULL(ice)) { + if (!IS_ERR(ice)) { *dr = ice; devres_add(dev, dr); } else { @@ -716,24 +735,40 @@ EXPORT_SYMBOL_GPL(devm_of_qcom_ice_get); static int qcom_ice_probe(struct platform_device *pdev) { + unsigned long phandle = pdev->dev.of_node->phandle; struct qcom_ice *engine; void __iomem *base; + guard(mutex)(&ice_mutex); + base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) { dev_warn(&pdev->dev, "ICE registers not found\n"); + /* Store the error pointer for devm_of_qcom_ice_get() */ + xa_store(&ice_handles, phandle, (__force void *)base, GFP_KERNEL); return PTR_ERR(base); } engine = qcom_ice_create(&pdev->dev, base); - if (IS_ERR(engine)) + if (IS_ERR(engine)) { + /* Store the error pointer for devm_of_qcom_ice_get() */ + xa_store(&ice_handles, phandle, engine, GFP_KERNEL); return PTR_ERR(engine); + } - platform_set_drvdata(pdev, engine); + xa_store(&ice_handles, phandle, engine, GFP_KERNEL); return 0; } +static void qcom_ice_remove(struct platform_device *pdev) +{ + unsigned long phandle = pdev->dev.of_node->phandle; + + guard(mutex)(&ice_mutex); + xa_store(&ice_handles, phandle, NULL, GFP_KERNEL); +} + static const struct of_device_id qcom_ice_of_match_table[] = { { .compatible = "qcom,inline-crypto-engine" }, { }, @@ -742,6 +777,7 @@ MODULE_DEVICE_TABLE(of, qcom_ice_of_match_table); static struct platform_driver qcom_ice_driver = { .probe = qcom_ice_probe, + .remove = qcom_ice_remove, .driver = { .name = "qcom-ice", .of_match_table = qcom_ice_of_match_table, diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c index a3d11b1f90fa..06747e90c230 100644 --- a/drivers/tee/optee/supp.c +++ b/drivers/tee/optee/supp.c @@ -10,7 +10,11 @@ struct optee_supp_req { struct list_head link; + int id; + bool in_queue; + bool processed; + u32 func; u32 ret; size_t num_params; @@ -19,6 +23,9 @@ struct optee_supp_req { struct completion c; }; +/* It is temporary request used for revoked pending request in supp->idr. */ +#define INVALID_REQ_PTR ((struct optee_supp_req *)ERR_PTR(-EBADF)) + void optee_supp_init(struct optee_supp *supp) { memset(supp, 0, sizeof(*supp)); @@ -39,21 +46,23 @@ void optee_supp_release(struct optee_supp *supp) { int id; struct optee_supp_req *req; - struct optee_supp_req *req_tmp; mutex_lock(&supp->mutex); - /* Abort all request retrieved by supplicant */ + /* Abort all request */ idr_for_each_entry(&supp->idr, req, id) { idr_remove(&supp->idr, id); - req->ret = TEEC_ERROR_COMMUNICATION; - complete(&req->c); - } + /* Skip if request was already marked invalid */ + if (IS_ERR(req)) + continue; - /* Abort all queued requests */ - list_for_each_entry_safe(req, req_tmp, &supp->reqs, link) { - list_del(&req->link); - req->in_queue = false; + /* For queued requests where supplicant has not seen it */ + if (req->in_queue) { + list_del(&req->link); + req->in_queue = false; + } + + req->processed = true; req->ret = TEEC_ERROR_COMMUNICATION; complete(&req->c); } @@ -100,8 +109,16 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, /* Insert the request in the request list */ mutex_lock(&supp->mutex); + req->id = idr_alloc(&supp->idr, req, 1, 0, GFP_KERNEL); + if (req->id < 0) { + mutex_unlock(&supp->mutex); + kfree(req); + return TEEC_ERROR_OUT_OF_MEMORY; + } + list_add_tail(&req->link, &supp->reqs); req->in_queue = true; + req->processed = false; mutex_unlock(&supp->mutex); /* Tell an eventual waiter there's a new request */ @@ -117,21 +134,43 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, if (wait_for_completion_killable(&req->c)) { mutex_lock(&supp->mutex); if (req->in_queue) { + /* Supplicant has not seen this request yet. */ + idr_remove(&supp->idr, req->id); list_del(&req->link); req->in_queue = false; + + ret = TEEC_ERROR_COMMUNICATION; + } else if (req->processed) { + /* + * Supplicant has processed this request. Ignore the + * kill signal for now and submit the result. req is not + * in supp->reqs (removed by supp_pop_entry()) nor in + * supp->idr (removed by supp_pop_req()). + */ + ret = req->ret; + } else { + /* + * Supplicant is in the middle of processing this + * request. Replace req with INVALID_REQ_PTR so that + * the ID remains busy, causing optee_supp_send() to + * fail on the next call to supp_pop_req() with this ID. + */ + idr_replace(&supp->idr, INVALID_REQ_PTR, req->id); + ret = TEEC_ERROR_COMMUNICATION; } + mutex_unlock(&supp->mutex); - req->ret = TEEC_ERROR_COMMUNICATION; + } else { + ret = req->ret; } - ret = req->ret; kfree(req); return ret; } static struct optee_supp_req *supp_pop_entry(struct optee_supp *supp, - int num_params, int *id) + int num_params) { struct optee_supp_req *req; @@ -153,10 +192,6 @@ static struct optee_supp_req *supp_pop_entry(struct optee_supp *supp, return ERR_PTR(-EINVAL); } - *id = idr_alloc(&supp->idr, req, 1, 0, GFP_KERNEL); - if (*id < 0) - return ERR_PTR(-ENOMEM); - list_del(&req->link); req->in_queue = false; @@ -214,7 +249,6 @@ int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params, struct optee *optee = tee_get_drvdata(teedev); struct optee_supp *supp = &optee->supp; struct optee_supp_req *req = NULL; - int id; size_t num_meta; int rc; @@ -224,15 +258,11 @@ int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params, while (true) { mutex_lock(&supp->mutex); - req = supp_pop_entry(supp, *num_params - num_meta, &id); + req = supp_pop_entry(supp, *num_params - num_meta); + if (req) + break; /* Keep mutex held. */ mutex_unlock(&supp->mutex); - if (req) { - if (IS_ERR(req)) - return PTR_ERR(req); - break; - } - /* * If we didn't get a request we'll block in * wait_for_completion() to avoid needless spinning. @@ -245,6 +275,13 @@ int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params, return -ERESTARTSYS; } + /* supp->mutex held and req != NULL. */ + + if (IS_ERR(req)) { + mutex_unlock(&supp->mutex); + return PTR_ERR(req); + } + if (num_meta) { /* * tee-supplicant support meta parameters -> requsts can be @@ -252,13 +289,11 @@ int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params, */ param->attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT | TEE_IOCTL_PARAM_ATTR_META; - param->u.value.a = id; + param->u.value.a = req->id; param->u.value.b = 0; param->u.value.c = 0; } else { - mutex_lock(&supp->mutex); - supp->req_id = id; - mutex_unlock(&supp->mutex); + supp->req_id = req->id; } *func = req->func; @@ -266,6 +301,7 @@ int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params, memcpy(param + num_meta, req->param, sizeof(struct tee_param) * req->num_params); + mutex_unlock(&supp->mutex); return 0; } @@ -297,12 +333,17 @@ static struct optee_supp_req *supp_pop_req(struct optee_supp *supp, if (!req) return ERR_PTR(-ENOENT); + /* optee_supp_thrd_req() already returned to optee. */ + if (IS_ERR(req)) + goto failed_req; + if ((num_params - nm) != req->num_params) return ERR_PTR(-EINVAL); + *num_meta = nm; +failed_req: idr_remove(&supp->idr, id); supp->req_id = -1; - *num_meta = nm; return req; } @@ -328,10 +369,9 @@ int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params, mutex_lock(&supp->mutex); req = supp_pop_req(supp, num_params, param, &num_meta); - mutex_unlock(&supp->mutex); - if (IS_ERR(req)) { - /* Something is wrong, let supplicant restart. */ + mutex_unlock(&supp->mutex); + /* Something is wrong, let supplicant handel it. */ return PTR_ERR(req); } @@ -355,9 +395,10 @@ int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params, } } req->ret = ret; - + req->processed = true; /* Let the requesting thread continue */ complete(&req->c); + mutex_unlock(&supp->mutex); return 0; } diff --git a/drivers/tee/qcomtee/core.c b/drivers/tee/qcomtee/core.c index b1cb50e434f0..60fe3b5776e3 100644 --- a/drivers/tee/qcomtee/core.c +++ b/drivers/tee/qcomtee/core.c @@ -306,8 +306,10 @@ int qcomtee_object_user_init(struct qcomtee_object *object, break; case QCOMTEE_OBJECT_TYPE_CB: object->ops = ops; - if (!object->ops->dispatch) - return -EINVAL; + if (!object->ops->dispatch) { + ret = -EINVAL; + break; + } /* If failed, "no-name". */ object->name = kvasprintf_const(GFP_KERNEL, fmt, ap); diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index ef9642d72672..1aac50c7c1de 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -530,11 +530,24 @@ static int params_to_user(struct tee_ioctl_param __user *uparams, return 0; } +static void free_params(struct tee_param *params, size_t num_params) +{ + size_t n; + + if (!params) + return; + + for (n = 0; n < num_params; n++) + if (tee_param_is_memref(params + n) && params[n].u.memref.shm) + tee_shm_put(params[n].u.memref.shm); + + kfree(params); +} + static int tee_ioctl_open_session(struct tee_context *ctx, struct tee_ioctl_buf_data __user *ubuf) { int rc; - size_t n; struct tee_ioctl_buf_data buf; struct tee_ioctl_open_session_arg __user *uarg; struct tee_ioctl_open_session_arg arg; @@ -595,16 +608,7 @@ out: */ if (rc && have_session && ctx->teedev->desc->ops->close_session) ctx->teedev->desc->ops->close_session(ctx, arg.session); - - if (params) { - /* Decrease ref count for all valid shared memory pointers */ - for (n = 0; n < arg.num_params; n++) - if (tee_param_is_memref(params + n) && - params[n].u.memref.shm) - tee_shm_put(params[n].u.memref.shm); - kfree(params); - } - + free_params(params, arg.num_params); return rc; } @@ -612,7 +616,6 @@ static int tee_ioctl_invoke(struct tee_context *ctx, struct tee_ioctl_buf_data __user *ubuf) { int rc; - size_t n; struct tee_ioctl_buf_data buf; struct tee_ioctl_invoke_arg __user *uarg; struct tee_ioctl_invoke_arg arg; @@ -657,14 +660,7 @@ static int tee_ioctl_invoke(struct tee_context *ctx, } rc = params_to_user(uparams, arg.num_params, params); out: - if (params) { - /* Decrease ref count for all valid shared memory pointers */ - for (n = 0; n < arg.num_params; n++) - if (tee_param_is_memref(params + n) && - params[n].u.memref.shm) - tee_shm_put(params[n].u.memref.shm); - kfree(params); - } + free_params(params, arg.num_params); return rc; } @@ -672,7 +668,6 @@ static int tee_ioctl_object_invoke(struct tee_context *ctx, struct tee_ioctl_buf_data __user *ubuf) { int rc; - size_t n; struct tee_ioctl_buf_data buf; struct tee_ioctl_object_invoke_arg __user *uarg; struct tee_ioctl_object_invoke_arg arg; @@ -716,14 +711,7 @@ static int tee_ioctl_object_invoke(struct tee_context *ctx, } rc = params_to_user(uparams, arg.num_params, params); out: - if (params) { - /* Decrease ref count for all valid shared memory pointers */ - for (n = 0; n < arg.num_params; n++) - if (tee_param_is_memref(params + n) && - params[n].u.memref.shm) - tee_shm_put(params[n].u.memref.shm); - kfree(params); - } + free_params(params, arg.num_params); return rc; } @@ -846,9 +834,15 @@ static int tee_ioctl_supp_recv(struct tee_context *ctx, return -ENOMEM; rc = params_from_user(ctx, params, num_params, uarg->params); - if (rc) - goto out; + if (rc) { + free_params(params, num_params); + return rc; + } + /* + * supp_recv() may consume and replace the supplied parameters, so the + * final cleanup cannot use free_params() like the other ioctl paths. + */ rc = ctx->teedev->desc->ops->supp_recv(ctx, &func, &num_params, params); if (rc) goto out; diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index e9ea9f80cfd9..6742b3579c86 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -435,7 +435,7 @@ register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags, num_pages = iov_iter_npages(iter, INT_MAX); if (!num_pages) { ret = ERR_PTR(-ENOMEM); - goto err_ctx_put; + goto err_free_shm; } shm->pages = kzalloc_objs(*shm->pages, num_pages); diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index bc037db46624..9c0973a7ffc3 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -177,14 +177,14 @@ static int ufs_qcom_ice_init(struct ufs_qcom_host *host) int i; ice = devm_of_qcom_ice_get(dev); - if (ice == ERR_PTR(-EOPNOTSUPP)) { + if (IS_ERR(ice)) { + if (ice != ERR_PTR(-EOPNOTSUPP)) + return PTR_ERR(ice); + dev_warn(dev, "Disabling inline encryption support\n"); - ice = NULL; + return 0; } - if (IS_ERR_OR_NULL(ice)) - return PTR_ERR_OR_ZERO(ice); - host->ice = ice; /* Initialize the blk_crypto_profile */ diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 27ab7bd844ec..c6240dccbb0f 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1455,6 +1455,9 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, if (atomic_add_return(bios, &io->pending_bios)) return; if (z_erofs_in_atomic()) { + /* See `sync_decompress` in sysfs-fs-erofs for more details */ + if (sbi->sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO) + sbi->sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON; #ifdef CONFIG_EROFS_FS_PCPU_KTHREAD struct kthread_worker *worker; @@ -1471,9 +1474,6 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, #else queue_work(z_erofs_workqueue, &io->u.work); #endif - /* See `sync_decompress` in sysfs-fs-erofs for more details */ - if (sbi->sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO) - sbi->sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON; return; } gfp_flag = memalloc_noio_save(); diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index a72db36096ca..e1a02a2c8406 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -716,7 +716,7 @@ static int z_erofs_map_sanity_check(struct inode *inode, } if (map->m_algorithmformat < Z_EROFS_COMPRESSION_MAX) { - if (sbi->available_compr_algs ^ BIT(map->m_algorithmformat)) { + if (!(sbi->available_compr_algs & BIT(map->m_algorithmformat))) { erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu", map->m_algorithmformat, EROFS_I(inode)->nid); return -EFSCORRUPTED; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3134bb17f3e3..d7c399763ad9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -927,7 +927,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) } if (nfs_write_need_commit(hdr)) { struct nfs_open_context *ctx = - hdr->req->wb_lock_context->open_context; + req->wb_lock_context->open_context; /* Reset wb_nio, since the write was successful. */ req->wb_nio = 0; diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 0f5c18520eff..b193dde4810d 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -711,11 +711,16 @@ out: */ static int smb2_oplock_break_noti(struct oplock_info *opinfo) { - struct ksmbd_conn *conn = opinfo->conn; + struct ksmbd_conn *conn; struct oplock_break_info *br_info; int ret = 0; - struct ksmbd_work *work = ksmbd_alloc_work_struct(); + struct ksmbd_work *work; + + conn = READ_ONCE(opinfo->conn); + if (!conn) + return 0; + work = ksmbd_alloc_work_struct(); if (!work) return -ENOMEM; @@ -815,11 +820,15 @@ out: */ static int smb2_lease_break_noti(struct oplock_info *opinfo) { - struct ksmbd_conn *conn = opinfo->conn; + struct ksmbd_conn *conn; struct ksmbd_work *work; struct lease_break_info *br_info; struct lease *lease = opinfo->o_lease; + conn = READ_ONCE(opinfo->conn); + if (!conn) + return 0; + work = ksmbd_alloc_work_struct(); if (!work) return -ENOMEM; diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 620bcfbbfd92..3eb3b1711acb 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -7322,6 +7322,17 @@ int smb2_cancel(struct ksmbd_work *work) le64_to_cpu(hdr->Id.AsyncId)) continue; + /* + * A cancelled deferred byte-range lock frees its + * file_lock and takes the smb2_lock() early-exit that + * skips release_async_work(), so the work stays on + * conn->async_requests with a live cancel_fn pointing + * at the freed file_lock. Re-firing it on a second + * SMB2_CANCEL is a use-after-free. + */ + if (iter->state == KSMBD_WORK_CANCELLED) + break; + ksmbd_debug(SMB, "smb2 with AsyncId %llu cancelled command = 0x%x\n", le64_to_cpu(hdr->Id.AsyncId), diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 4d2d33df6231..ba3355a6057a 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -1390,19 +1390,19 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp) struct ksmbd_lock *smb_lock; unsigned int old_f_state; + write_lock(&global_ft.lock); if (!fp->is_durable || fp->conn || fp->tcon) { + write_unlock(&global_ft.lock); pr_err("Invalid durable fd [%p:%p]\n", fp->conn, fp->tcon); return -EBADF; } if (has_file_id(fp->volatile_id)) { + write_unlock(&global_ft.lock); pr_err("Still in use durable fd: %llu\n", fp->volatile_id); return -EBADF; } - old_f_state = fp->f_state; - fp->f_state = FP_NEW; - /* * Initialize fp's connection binding before publishing fp into the * session's file table. If __open_id() is ordered first, a @@ -1413,11 +1413,17 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp) */ fp->conn = ksmbd_conn_get(conn); fp->tcon = work->tcon; + write_unlock(&global_ft.lock); + + old_f_state = fp->f_state; + fp->f_state = FP_NEW; __open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID); if (!has_file_id(fp->volatile_id)) { + write_lock(&global_ft.lock); fp->conn = NULL; fp->tcon = NULL; + write_unlock(&global_ft.lock); ksmbd_conn_put(conn); fp->f_state = old_f_state; return -EBADF; diff --git a/fs/xfs/scrub/cow_repair.c b/fs/xfs/scrub/cow_repair.c index bffc4666ce60..c25716fc4fee 100644 --- a/fs/xfs/scrub/cow_repair.c +++ b/fs/xfs/scrub/cow_repair.c @@ -300,18 +300,15 @@ xrep_cow_find_bad( * on the debugging knob, replace everything in the CoW fork. */ if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) || - XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) { + XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock, xc->irec.br_blockcount); - if (error) - return error; - } out_sa: xchk_ag_free(sc, &sc->sa); out_pag: xfs_perag_put(pag); - return 0; + return error; } /* @@ -385,12 +382,9 @@ xrep_cow_find_bad_rt( * CoW fork and then scan for staging extents in the refcountbt. */ if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) || - XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) { + XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock, xc->irec.br_blockcount); - if (error) - goto out_rtg; - } out_sr: xchk_rtgroup_btcur_free(&sc->sr); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 46e234863644..96af6b62ce39 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -409,6 +409,26 @@ xfs_ioc_ag_geometry( return 0; } +static void +xfs_rtgroup_report_write_pointer( + struct xfs_rtgroup *rtg, + struct xfs_rtgroup_geometry *rgeo) +{ + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); + if (rtg->rtg_open_zone) { + rgeo->rg_writepointer = rtg->rtg_open_zone->oz_allocated; + } else { + xfs_rgblock_t highest_rgbno = xfs_rtrmap_highest_rgbno(rtg); + + if (highest_rgbno == NULLRGBLOCK) + rgeo->rg_writepointer = 0; + else + rgeo->rg_writepointer = highest_rgbno + 1; + } + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); + rgeo->rg_flags |= XFS_RTGROUP_GEOM_WRITEPOINTER; +} + STATIC int xfs_ioc_rtgroup_geometry( struct xfs_mount *mp, @@ -416,7 +436,6 @@ xfs_ioc_rtgroup_geometry( { struct xfs_rtgroup *rtg; struct xfs_rtgroup_geometry rgeo; - xfs_rgblock_t highest_rgbno; int error; if (copy_from_user(&rgeo, arg, sizeof(rgeo))) @@ -433,28 +452,16 @@ xfs_ioc_rtgroup_geometry( return -EINVAL; error = xfs_rtgroup_get_geometry(rtg, &rgeo); - xfs_rtgroup_put(rtg); if (error) - return error; - - if (xfs_has_zoned(mp)) { - xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); - if (rtg->rtg_open_zone) { - rgeo.rg_writepointer = rtg->rtg_open_zone->oz_allocated; - } else { - highest_rgbno = xfs_rtrmap_highest_rgbno(rtg); - if (highest_rgbno == NULLRGBLOCK) - rgeo.rg_writepointer = 0; - else - rgeo.rg_writepointer = highest_rgbno + 1; - } - xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); - rgeo.rg_flags |= XFS_RTGROUP_GEOM_WRITEPOINTER; - } + goto out_put_rtg; + if (xfs_has_zoned(mp)) + xfs_rtgroup_report_write_pointer(rtg, &rgeo); if (copy_to_user(arg, &rgeo, sizeof(rgeo))) - return -EFAULT; - return 0; + error = -EFAULT; +out_put_rtg: + xfs_rtgroup_put(rtg); + return error; } /* diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index b24195f570cd..7aa51826b1ca 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1149,9 +1149,12 @@ xfs_mountfs( * blocks. */ error = xfs_fs_reserve_ag_blocks(mp); - if (error && error == -ENOSPC) + if (error) { + if (error != -ENOSPC) + goto out_rtunmount; xfs_warn(mp, - "ENOSPC reserving per-AG metadata pool, log recovery may fail."); +"ENOSPC reserving per-AG metadata pool, log recovery may fail."); + } error = xfs_log_mount_finish(mp); xfs_fs_unreserve_ag_blocks(mp); if (error) { diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 221e55887a2a..d92993367ab6 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -118,7 +118,6 @@ xfs_fs_map_blocks( struct xfs_bmbt_irec imap; xfs_fileoff_t offset_fsb, end_fsb; loff_t limit; - int bmapi_flags = XFS_BMAPI_ENTIRE; int nimaps = 1; uint lock_flags; int error = 0; @@ -172,14 +171,18 @@ xfs_fs_map_blocks( offset_fsb = XFS_B_TO_FSBT(mp, offset); lock_flags = xfs_ilock_data_map_shared(ip); + /* request mappings for the specified range only */ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, - &imap, &nimaps, bmapi_flags); + &imap, &nimaps, 0); + if (error) { + xfs_iunlock(ip, lock_flags); + goto out_unlock; + } seq = xfs_iomap_inode_sequence(ip, 0); ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK); - if (!error && write && - (!nimaps || imap.br_startblock == HOLESTARTBLOCK)) { + if (write && (!nimaps || imap.br_startblock == HOLESTARTBLOCK)) { if (offset + length > XFS_ISIZE(ip)) end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb); else if (nimaps && imap.br_startblock == HOLESTARTBLOCK) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index db23a0f231d6..251dec48f0e3 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -949,16 +949,16 @@ xfs_reflink_end_cow( * repeatedly cycles the ILOCK to allocate one transaction per remapped * extent. * - * If we're being called by writeback then the pages will still - * have PageWriteback set, which prevents races with reflink remapping - * and truncate. Reflink remapping prevents races with writeback by - * taking the iolock and mmaplock before flushing the pages and - * remapping, which means there won't be any further writeback or page - * cache dirtying until the reflink completes. + * If we're being called by writeback then the folios will still + * have the writeback flag set, which prevents races with reflink + * remapping and truncate. Reflink remapping prevents races with + * writeback by taking the iolock and mmaplock before flushing + * the folios and remapping, which means there won't be any further + * writeback or page cache dirtying until the reflink completes. * * We should never have two threads issuing writeback for the same file * region. There are also have post-eof checks in the writeback - * preparation code so that we don't bother writing out pages that are + * preparation code so that we don't bother writing out folios that are * about to be truncated. * * If we're being called as part of directio write completion, the dio diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index c8a1d5c0332c..f03211e4354a 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -400,7 +400,7 @@ retry: /* * If the inode was already deleted, skip over it. */ - if (error == -ENOENT) { + if (error == -ENOENT || error == -EINVAL) { iter->rec_idx++; goto retry; } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5957bc25efa8..2abaf99321e9 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -153,8 +153,6 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list); int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); -int get_huge_page_for_hwpoison(unsigned long pfn, int flags, - bool *migratable_cleared); void folio_putback_hugetlb(struct folio *folio); void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); void hugetlb_fix_reserve_counts(struct inode *inode); @@ -421,12 +419,6 @@ static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, return 0; } -static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, - bool *migratable_cleared) -{ - return 0; -} - static inline void folio_putback_hugetlb(struct folio *folio) { } diff --git a/include/linux/mm.h b/include/linux/mm.h index 06bbe9eba636..fc2acedf0b76 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4975,8 +4975,6 @@ extern int soft_offline_page(unsigned long pfn, int flags); */ extern const struct attribute_group memory_failure_attr_group; extern void memory_failure_queue(unsigned long pfn, int flags); -extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, - bool *migratable_cleared); void num_poisoned_pages_inc(unsigned long pfn); void num_poisoned_pages_sub(unsigned long pfn, long i); #else @@ -4984,12 +4982,6 @@ static inline void memory_failure_queue(unsigned long pfn, int flags) { } -static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, - bool *migratable_cleared) -{ - return 0; -} - static inline void num_poisoned_pages_inc(unsigned long pfn) { } diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 763eea4d80d8..2d2b9f8cdda4 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -20,6 +20,7 @@ #include <linux/rcupdate_trace.h> #include <linux/tracepoint-defs.h> #include <linux/static_call.h> +#include <linux/cfi.h> struct module; struct tracepoint; @@ -389,6 +390,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) void __probestub_##_name(void *__data, proto) \ { \ } \ + /* \ + * Annotate the probestub 'CFI_NOSEAL' to stop objtool from \ + * requesting the kernel remove the ENDBR, because the only \ + * references to the function are in the __tracepoint section, \ + * that objtool doesn't scan. \ + */ \ + CFI_NOSEAL(__probestub_##_name); \ DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); \ DEFINE_RUST_DO_TRACE(_name, TP_PROTO(proto), TP_ARGS(args)) diff --git a/include/net/act_api.h b/include/net/act_api.h index d11b79107930..fd2967ee08f7 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -45,6 +45,7 @@ struct tc_action { struct tc_cookie __rcu *user_cookie; struct tcf_chain __rcu *goto_chain; u32 tcfa_flags; + struct rcu_head tcfa_rcu; u8 hw_stats; u8 used_hw_stats; bool used_hw_stats_valid; diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 5172afee5494..e0a1f2293679 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -33,6 +33,7 @@ /* L2CAP defaults */ #define L2CAP_DEFAULT_MTU 672 #define L2CAP_DEFAULT_MIN_MTU 48 +#define L2CAP_SIG_MTU 48 /* BR/EDR signaling MTU */ #define L2CAP_DEFAULT_FLUSH_TO 0xFFFF #define L2CAP_EFS_DEFAULT_FLUSH_TO 0xFFFFFFFF #define L2CAP_DEFAULT_TX_WINDOW 63 diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a02e569813d2..e517eaaa177b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1824,8 +1824,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); int ip_vs_bind_scheduler(struct ip_vs_service *svc, struct ip_vs_scheduler *scheduler); -void ip_vs_unbind_scheduler(struct ip_vs_service *svc, - struct ip_vs_scheduler *sched); +void ip_vs_unbind_scheduler(struct ip_vs_service *svc); struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); struct ip_vs_conn * diff --git a/include/net/mptcp.h b/include/net/mptcp.h index f7263fe2a2e4..ee70f597a4de 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -27,7 +27,9 @@ struct mptcp_ext { u32 subflow_seq; u16 data_len; __sum16 csum; - u8 use_map:1, + + struct_group(flags, + u8 use_map:1, dsn64:1, data_fin:1, use_ack:1, @@ -35,9 +37,10 @@ struct mptcp_ext { mpc_map:1, frozen:1, reset_transient:1; - u8 reset_reason:4, + u8 reset_reason:4, csum_reqd:1, infinite_map:1; + ); /* end of flags group */ }; #define MPTCPOPT_HMAC_LEN 20 diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index f58ee15cd858..cb7b82f2cbc7 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -15,7 +15,6 @@ struct tcf_pedit_parms { struct tc_pedit_key *tcfp_keys; struct tcf_pedit_key_ex *tcfp_keys_ex; int action; - u32 tcfp_off_max_hint; unsigned char tcfp_nkeys; unsigned char tcfp_flags; struct rcu_head rcu; diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index cab5cadca8ef..5203977ed35d 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -470,6 +470,7 @@ struct tee_ioctl_object_invoke_arg { __u32 op; __u32 ret; __u32 num_params; + __u32 :32; /* num_params tells the actual number of element in params */ struct tee_ioctl_param params[]; }; diff --git a/io_uring/net.c b/io_uring/net.c index 8df15b639358..ee848eb65ec9 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -842,7 +842,8 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) } /* bits to clear in old and inherit in new cflags on bundle retry */ -#define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE) +#define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE|\ + IORING_CQE_F_BUF_MORE) /* * Finishes io_recv and io_recvmsg. diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 5c33ab20cc20..c9e14fda3d6f 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1811,9 +1811,9 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, * Compute add/delete mask to/from effective_cpus * * For valid partition: - * addmask = exclusive_cpus & ~newmask + * addmask = effective_xcpus & ~newmask * & parent->effective_xcpus - * delmask = newmask & ~exclusive_cpus + * delmask = newmask & ~effective_xcpus * & parent->effective_xcpus * * For invalid partition: @@ -1825,11 +1825,11 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, deleting = cpumask_and(tmp->delmask, newmask, parent->effective_xcpus); } else { - cpumask_andnot(tmp->addmask, xcpus, newmask); + cpumask_andnot(tmp->addmask, cs->effective_xcpus, newmask); adding = cpumask_and(tmp->addmask, tmp->addmask, parent->effective_xcpus); - cpumask_andnot(tmp->delmask, newmask, xcpus); + cpumask_andnot(tmp->delmask, newmask, cs->effective_xcpus); deleting = cpumask_and(tmp->delmask, tmp->delmask, parent->effective_xcpus); } @@ -1868,7 +1868,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, part_error = PERR_NOCPUS; deleting = false; adding = cpumask_and(tmp->addmask, - xcpus, parent->effective_xcpus); + cs->effective_xcpus, parent->effective_xcpus); } } else { /* @@ -1890,7 +1890,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, part_error = PERR_NOCPUS; if (is_partition_valid(cs)) adding = cpumask_and(tmp->addmask, - xcpus, parent->effective_xcpus); + cs->effective_xcpus, + parent->effective_xcpus); } else if (is_partition_invalid(cs) && !cpumask_empty(xcpus) && cpumask_subset(xcpus, parent->effective_xcpus)) { struct cgroup_subsys_state *css; diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 65631e577ee9..5d2d19473a82 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4402,11 +4402,13 @@ void scx_cgroup_move_task(struct task_struct *p) return; /* - * @p must have ops.cgroup_prep_move() called on it and thus - * cgrp_moving_from set. + * scx_cgroup_can_attach() sets cgrp_moving_from only when the task's + * cgroup changes. Migration keys off css rather than cgroup identity, + * so it can hand an unchanged-cgroup task here with cgrp_moving_from + * NULL. Nothing to report to the BPF scheduler then, so skip it and + * keep prep_move and move paired. */ - if (SCX_HAS_OP(sch, cgroup_move) && - !WARN_ON_ONCE(!p->scx.cgrp_moving_from)) + if (SCX_HAS_OP(sch, cgroup_move) && p->scx.cgrp_moving_from) SCX_CALL_OP_TASK(sch, cgroup_move, task_rq(p), p, p->scx.cgrp_moving_from, tg_cgrp(task_group(p))); diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index e0d3a0da26af..44c22d4e7881 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -962,8 +962,6 @@ static int parse_probe_vars(char *orig_arg, const struct fetch_type *t, code->op = FETCH_OP_COMM; return 0; } - /* backward compatibility */ - ctx->offset = 0; goto inval; } @@ -188,10 +188,13 @@ cleanup: /* Expose all pages to the buddy, they are useless for CMA. */ if (!test_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags)) { - for (r = 0; r < allocrange; r++) { + for (r = 0; r < cma->nranges; r++) { + unsigned long start_pfn; + cmr = &cma->ranges[r]; + start_pfn = r <= allocrange ? early_pfn[r] : cmr->early_pfn; end_pfn = cmr->base_pfn + cmr->count; - for (pfn = early_pfn[r]; pfn < end_pfn; pfn++) + for (pfn = start_pfn; pfn < end_pfn; pfn++) free_reserved_page(pfn_to_page(pfn)); } } diff --git a/mm/cma_debug.c b/mm/cma_debug.c index 5ae38f5abbcc..523ba4a0f9f7 100644 --- a/mm/cma_debug.c +++ b/mm/cma_debug.c @@ -205,7 +205,8 @@ static int __init cma_debugfs_init(void) cma_debugfs_root = debugfs_create_dir("cma", NULL); for (i = 0; i < cma_area_count; i++) - cma_debugfs_add_one(&cma_areas[i], cma_debugfs_root); + if (test_bit(CMA_ACTIVATED, &cma_areas[i].flags)) + cma_debugfs_add_one(&cma_areas[i], cma_debugfs_root); return 0; } diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c index 8c6d613425c1..c3e4c871b0bb 100644 --- a/mm/damon/ops-common.c +++ b/mm/damon/ops-common.c @@ -32,9 +32,9 @@ struct folio *damon_get_folio(unsigned long pfn) return NULL; folio = page_folio(page); - if (!folio_test_lru(folio) || !folio_try_get(folio)) + if (!folio_try_get(folio)) return NULL; - if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) { + if (unlikely(page_folio(page) != folio) || !folio_test_lru(folio)) { folio_put(folio); folio = NULL; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 970e077019b7..653f2dc03403 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3015,9 +3015,9 @@ static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud, if (!folio_test_referenced(folio) && pud_young(old_pud)) folio_set_referenced(folio); folio_remove_rmap_pud(folio, page, vma); - folio_put(folio); add_mm_counter(vma->vm_mm, mm_counter_file(folio), -HPAGE_PUD_NR); + folio_put(folio); } void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, @@ -3133,7 +3133,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, if (!folio_test_referenced(folio) && pmd_young(old_pmd)) folio_set_referenced(folio); folio_remove_rmap_pmd(folio, page, vma); + add_mm_counter(mm, mm_counter_file(folio), -HPAGE_PMD_NR); folio_put(folio); + return; } add_mm_counter(mm, mm_counter_file(folio), -HPAGE_PMD_NR); return; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4b80b167cc9c..c921287489de 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -118,6 +118,9 @@ static int hugetlb_acct_memory(struct hstate *h, long delta); static void hugetlb_vma_lock_free(struct vm_area_struct *vma); static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); +static int __huge_pmd_unshare(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, + bool check_locks); static void hugetlb_unshare_pmds(struct vm_area_struct *vma, unsigned long start, unsigned long end, bool take_locks); static struct resv_map *vma_resv_map(struct vm_area_struct *vma); @@ -4974,6 +4977,7 @@ again: addr, dst_vma); folio_put(pte_folio); if (ret) { + restore_reserve_on_error(h, dst_vma, addr, new_folio); folio_put(new_folio); break; } @@ -6270,6 +6274,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, folio_put(*foliop); *foliop = NULL; if (ret) { + restore_reserve_on_error(h, dst_vma, dst_addr, folio); folio_put(folio); goto out; } @@ -6891,6 +6896,31 @@ out: return pte; } +static int __huge_pmd_unshare(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, + bool check_locks) +{ + unsigned long sz = huge_page_size(hstate_vma(vma)); + struct mm_struct *mm = vma->vm_mm; + pgd_t *pgd = pgd_offset(mm, addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); + + if (sz != PMD_SIZE) + return 0; + if (!ptdesc_pmd_is_shared(virt_to_ptdesc(ptep))) + return 0; + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + if (check_locks) + hugetlb_vma_assert_locked(vma); + pud_clear(pud); + + tlb_unshare_pmd_ptdesc(tlb, virt_to_ptdesc(ptep), addr); + + mm_dec_nr_pmds(mm); + return 1; +} + /** * huge_pmd_unshare - Unmap a pmd table if it is shared by multiple users * @tlb: the current mmu_gather. @@ -6910,24 +6940,7 @@ out: int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - unsigned long sz = huge_page_size(hstate_vma(vma)); - struct mm_struct *mm = vma->vm_mm; - pgd_t *pgd = pgd_offset(mm, addr); - p4d_t *p4d = p4d_offset(pgd, addr); - pud_t *pud = pud_offset(p4d, addr); - - if (sz != PMD_SIZE) - return 0; - if (!ptdesc_pmd_is_shared(virt_to_ptdesc(ptep))) - return 0; - i_mmap_assert_write_locked(vma->vm_file->f_mapping); - hugetlb_vma_assert_locked(vma); - pud_clear(pud); - - tlb_unshare_pmd_ptdesc(tlb, virt_to_ptdesc(ptep), addr); - - mm_dec_nr_pmds(mm); - return 1; + return __huge_pmd_unshare(tlb, vma, addr, ptep, /*check_locks=*/true); } /* @@ -6961,6 +6974,13 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, return NULL; } +static int __huge_pmd_unshare(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, + bool check_locks) +{ + return 0; +} + int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { @@ -7141,17 +7161,6 @@ int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison return ret; } -int get_huge_page_for_hwpoison(unsigned long pfn, int flags, - bool *migratable_cleared) -{ - int ret; - - spin_lock_irq(&hugetlb_lock); - ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared); - spin_unlock_irq(&hugetlb_lock); - return ret; -} - /** * folio_putback_hugetlb - unisolate a hugetlb folio * @folio: the isolated hugetlb folio @@ -7269,7 +7278,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, if (!ptep) continue; ptl = huge_pte_lock(h, mm, ptep); - huge_pmd_unshare(&tlb, vma, address, ptep); + __huge_pmd_unshare(&tlb, vma, address, ptep, take_locks); spin_unlock(ptl); } huge_pmd_unshare_flush(&tlb, vma); diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 4a077d231d3a..133b46dfb09f 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -207,6 +207,8 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr, /* Remapping the head page requires r/w */ if (unlikely(walk->nr_walked == 0 && walk->vmemmap_head)) { + VM_WARN_ON_ONCE(!PageHead((const struct page *)addr)); + list_del(&walk->vmemmap_head->lru); /* @@ -218,6 +220,8 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr, entry = mk_pte(walk->vmemmap_head, PAGE_KERNEL); } else { + VM_WARN_ON_ONCE(!PageTail((const struct page *)addr)); + /* * Remap the tail pages as read-only to catch illegal write * operation to the tail pages. @@ -232,33 +236,28 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr, static void vmemmap_restore_pte(pte_t *pte, unsigned long addr, struct vmemmap_remap_walk *walk) { - struct page *page; - struct page *from, *to; - - page = list_first_entry(walk->vmemmap_pages, struct page, lru); - list_del(&page->lru); + struct page *src = pte_page(ptep_get(pte)), *dst; /* - * Initialize tail pages in the newly allocated vmemmap page. - * - * There is folio-scope metadata that is encoded in the first few - * tail pages. - * - * Use the value last tail page in the page with the head page - * to initialize the rest of tail pages. + * When rolling back vmemmap_remap_free(), keep the copied head page + * mapping and restore only PTEs currently pointing at the shared tail + * page. */ - from = compound_head((struct page *)addr) + - PAGE_SIZE / sizeof(struct page) - 1; - to = page_to_virt(page); - for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++, to++) - *to = *from; + if (walk->vmemmap_tail && walk->vmemmap_tail != src) + return; + + VM_WARN_ON_ONCE(PageHead((const struct page *)addr)); + + dst = list_first_entry(walk->vmemmap_pages, struct page, lru); + list_del(&dst->lru); + copy_page(page_to_virt(dst), page_to_virt(src)); /* * Makes sure that preceding stores to the page contents become visible * before the set_pte_at() write. */ smp_wmb(); - set_pte_at(&init_mm, addr, pte, mk_pte(page, PAGE_KERNEL)); + set_pte_at(&init_mm, addr, pte, mk_pte(dst, PAGE_KERNEL)); } /** @@ -324,6 +323,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, */ walk = (struct vmemmap_remap_walk) { .remap_pte = vmemmap_restore_pte, + .vmemmap_tail = vmemmap_tail, .vmemmap_pages = vmemmap_pages, .flags = 0, }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 177732fef010..1a4fd2504bcd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2011,6 +2011,7 @@ struct memcg_stock_pcp { struct work_struct work; unsigned long flags; + uint8_t drain_idx; }; static DEFINE_PER_CPU_ALIGNED(struct memcg_stock_pcp, memcg_stock) = { @@ -2194,7 +2195,9 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) if (!success) { i = empty_slot; if (i == -1) { - i = get_random_u32_below(NR_MEMCG_STOCK); + i = stock->drain_idx++; + if (stock->drain_idx == NR_MEMCG_STOCK) + stock->drain_idx = 0; drain_stock(stock, i); } css_get(&memcg->css); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ee42d4361309..d47aef256a32 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1966,20 +1966,19 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio) folio_free_raw_hwp(folio, true); } -/* - * Called from hugetlb code with hugetlb_lock held. - */ -int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, +static int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared) { struct page *page = pfn_to_page(pfn); - struct folio *folio = page_folio(page); + struct folio *folio; bool count_increased = false; int ret, rc; + spin_lock_irq(&hugetlb_lock); + folio = page_folio(page); if (!folio_test_hugetlb(folio)) { ret = MF_HUGETLB_NON_HUGEPAGE; - goto out; + goto out_unlock; } else if (flags & MF_COUNT_INCREASED) { ret = MF_HUGETLB_IN_USED; count_increased = true; @@ -1995,13 +1994,13 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, } else { ret = MF_HUGETLB_RETRY; if (!(flags & MF_NO_RETRY)) - goto out; + goto out_unlock; } rc = hugetlb_update_hwpoison(folio, page); if (rc >= MF_HUGETLB_FOLIO_PRE_POISONED) { ret = rc; - goto out; + goto out_unlock; } /* @@ -2013,8 +2012,10 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, *migratable_cleared = true; } + spin_unlock_irq(&hugetlb_lock); return ret; -out: +out_unlock: + spin_unlock_irq(&hugetlb_lock); if (count_increased) folio_put(folio); return ret; diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 180bad42fc79..80cc8be5725f 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -14,6 +14,8 @@ #include <linux/userfaultfd_k.h> #include <linux/mmu_notifier.h> #include <linux/hugetlb.h> +#include <linux/file.h> +#include <linux/cleanup.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include "internal.h" @@ -66,7 +68,7 @@ static const struct vm_uffd_ops *vma_uffd_ops(struct vm_area_struct *vma) { if (vma_is_anonymous(vma)) return &anon_uffd_ops; - return vma->vm_ops ? vma->vm_ops->uffd_ops : NULL; + return vma->vm_ops->uffd_ops; } static __always_inline @@ -443,16 +445,80 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr) return ret; } -static int mfill_copy_folio_retry(struct mfill_state *state, +#define MFILL_RETRY_STATE_VMA_FLAGS \ + append_vma_flags(__VMA_UFFD_FLAGS, VMA_SHARED_BIT) + +/* + * VMA state saved before dropping the locks in mfill_copy_folio_retry(). + * Used to detect VMA replacement or incompatible changes after reacquiring the + * locks. + */ +struct mfill_retry_state { + const struct vm_uffd_ops *ops; + struct file *file; + vma_flags_t flags; + pgoff_t pgoff; +}; + +static void mfill_retry_state_save(struct mfill_retry_state *s, + struct vm_area_struct *vma) +{ + s->flags = vma_flags_and_mask(&vma->flags, MFILL_RETRY_STATE_VMA_FLAGS); + s->ops = vma_uffd_ops(vma); + s->pgoff = vma->vm_pgoff; + + if (vma->vm_file) + s->file = get_file(vma->vm_file); +} + +static bool mfill_retry_state_changed(struct mfill_retry_state *state, + struct vm_area_struct *vma) +{ + vma_flags_t flags = vma_flags_and_mask(&vma->flags, + MFILL_RETRY_STATE_VMA_FLAGS); + + /* Have any UFFD flags (missing, WP, minor) changed? */ + if (!vma_flags_same_pair(&state->flags, &flags)) + return true; + + /* VMA type or effective uffd_ops changed while the lock was dropped */ + if (state->ops != vma_uffd_ops(vma)) + return true; + + /* VMA was anonymous before; changed only if it no longer is */ + if (!state->file) + return !vma_is_anonymous(vma); + + /* VMA was file backed, but file, inode or offset has changed */ + if (!vma->vm_file || vma->vm_file->f_inode != state->file->f_inode || + state->file != vma->vm_file || vma->vm_pgoff != state->pgoff) + return true; + + return false; +} + +static void mfill_retry_state_put(struct mfill_retry_state *s) +{ + if (s->file) + fput(s->file); +} + +DEFINE_FREE(retry_put, struct mfill_retry_state *, + if (_T) mfill_retry_state_put(_T)); + +static int mfill_copy_folio_retry(struct mfill_state *mfill_state, struct folio *folio) { - const struct vm_uffd_ops *orig_ops = vma_uffd_ops(state->vma); - unsigned long src_addr = state->src_addr; + struct mfill_retry_state retry_state = { 0 }; + struct mfill_retry_state *for_free __free(retry_put) = &retry_state; + unsigned long src_addr = mfill_state->src_addr; void *kaddr; int err; + mfill_retry_state_save(&retry_state, mfill_state->vma); + /* retry copying with mm_lock dropped */ - mfill_put_vma(state); + mfill_put_vma(mfill_state); kaddr = kmap_local_folio(folio, 0); err = copy_from_user(kaddr, (const void __user *) src_addr, PAGE_SIZE); @@ -463,19 +529,14 @@ static int mfill_copy_folio_retry(struct mfill_state *state, flush_dcache_folio(folio); /* reget VMA and PMD, they could change underneath us */ - err = mfill_get_vma(state); + err = mfill_get_vma(mfill_state); if (err) return err; - /* - * The VMA type may have changed while the lock was dropped - * (e.g. replaced with a hugetlb mapping), making the caller's - * ops pointer stale. - */ - if (vma_uffd_ops(state->vma) != orig_ops) + if (mfill_retry_state_changed(&retry_state, mfill_state->vma)) return -EAGAIN; - err = mfill_establish_pmd(state); + err = mfill_establish_pmd(mfill_state); if (err) return err; @@ -491,6 +552,11 @@ static int __mfill_atomic_pte(struct mfill_state *state, struct folio *folio; int ret; + if (!ops) { + VM_WARN_ONCE(1, "UFFDIO_COPY for unsupported VMA"); + return -EOPNOTSUPP; + } + folio = ops->alloc_folio(state->vma, state->dst_addr); if (!folio) return -ENOMEM; diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c index e116d308a8df..37eaff3f7b69 100644 --- a/net/6lowpan/iphc.c +++ b/net/6lowpan/iphc.c @@ -1086,12 +1086,12 @@ static u8 lowpan_iphc_mcast_ctx_addr_compress(u8 **hc_ptr, const struct lowpan_iphc_ctx *ctx, const struct in6_addr *ipaddr) { - u8 data[6]; + u8 data[6] = {}; /* flags/scope, reserved (RIID) */ memcpy(data, &ipaddr->s6_addr[1], 2); /* group ID */ - memcpy(&data[1], &ipaddr->s6_addr[11], 4); + memcpy(&data[2], &ipaddr->s6_addr[12], 4); lowpan_push_hc_data(hc_ptr, data, 6); return LOWPAN_IPHC_DAM_00; diff --git a/net/802/garp.c b/net/802/garp.c index 6f563b6797d9..c7a39f298ad6 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -453,7 +453,7 @@ static int garp_pdu_parse_attr(struct garp_applicant *app, struct sk_buff *skb, if (!pskb_may_pull(skb, ga->len)) return -1; skb_pull(skb, ga->len); - dlen = sizeof(*ga) - ga->len; + dlen = ga->len - sizeof(*ga); if (attrtype > app->app->maxattr) return 0; diff --git a/net/802/mrp.c b/net/802/mrp.c index ff0e80574e6b..160a3b14569c 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -703,6 +703,12 @@ static int mrp_pdu_parse_vecattr(struct mrp_applicant *app, valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) & MRP_VECATTR_HDR_LEN_MASK); + /* If valen is 0, only a LeaveAllEvent is present; FirstValue and + * Vector fields are absent per IEEE 802.1ak. + */ + if (valen == 0) + return 0; + /* The VectorAttribute structure in a PDU carries event information * about one or more attributes having consecutive values. Only the * value for the first attribute is contained in the structure. So @@ -753,6 +759,9 @@ static int mrp_pdu_parse_vecattr(struct mrp_applicant *app, vaevents %= __MRP_VECATTR_EVENT_MAX; vaevent = vaevents; mrp_pdu_parse_vecattr_event(app, skb, vaevent); + valen--; + mrp_attrvalue_inc(mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen); } return 0; } diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 30493ea3c010..078fb7a6efa5 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -393,7 +393,7 @@ static void aarp_purge(void) */ static struct aarp_entry *aarp_alloc(void) { - struct aarp_entry *a = kmalloc_obj(*a, GFP_ATOMIC); + struct aarp_entry *a = kzalloc_obj(*a, GFP_ATOMIC); if (!a) return NULL; diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 0de5df690bd0..5c5f53ff30e8 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -206,14 +206,11 @@ static int bnep_ctrl_set_mcfilter(struct bnep_session *s, u8 *data, int len) return 0; } -static int bnep_rx_control(struct bnep_session *s, void *data, int len) +static int bnep_rx_control_cmd(struct bnep_session *s, u8 cmd, void *data, + int len) { - u8 cmd = *(u8 *)data; int err = 0; - data++; - len--; - switch (cmd) { case BNEP_CMD_NOT_UNDERSTOOD: case BNEP_SETUP_CONN_RSP: @@ -254,6 +251,14 @@ static int bnep_rx_control(struct bnep_session *s, void *data, int len) return err; } +static int bnep_rx_control(struct bnep_session *s, void *data, int len) +{ + if (len < 1) + return -EILSEQ; + + return bnep_rx_control_cmd(s, *(u8 *)data, data + 1, len - 1); +} + static int bnep_rx_extension(struct bnep_session *s, struct sk_buff *skb) { struct bnep_ext_hdr *h; @@ -299,19 +304,26 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) { struct net_device *dev = s->dev; struct sk_buff *nskb; + u8 *data; u8 type, ctrl_type; dev->stats.rx_bytes += skb->len; - type = *(u8 *) skb->data; - skb_pull(skb, 1); - ctrl_type = *(u8 *)skb->data; + data = skb_pull_data(skb, sizeof(type)); + if (!data) + goto badframe; + type = *data; if ((type & BNEP_TYPE_MASK) >= sizeof(__bnep_rx_hlen)) goto badframe; if ((type & BNEP_TYPE_MASK) == BNEP_CONTROL) { - if (bnep_rx_control(s, skb->data, skb->len) < 0) { + data = skb_pull_data(skb, sizeof(ctrl_type)); + if (!data) + goto badframe; + ctrl_type = *data; + + if (bnep_rx_control_cmd(s, ctrl_type, skb->data, skb->len) < 0) { dev->stats.tx_errors++; kfree_skb(skb); return 0; @@ -324,24 +336,27 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) /* Verify and pull ctrl message since it's already processed */ switch (ctrl_type) { - case BNEP_SETUP_CONN_REQ: - /* Pull: ctrl type (1 b), len (1 b), data (len bytes) */ - if (!skb_pull(skb, 2 + *(u8 *)(skb->data + 1) * 2)) + case BNEP_SETUP_CONN_REQ: { + u8 uuid_size; + + /* Pull uuid_size and the dst/src service UUIDs. */ + data = skb_pull_data(skb, sizeof(uuid_size)); + if (!data) + goto badframe; + uuid_size = *data; + if (!skb_pull(skb, uuid_size + uuid_size)) goto badframe; break; + } case BNEP_FILTER_MULTI_ADDR_SET: - case BNEP_FILTER_NET_TYPE_SET: { - u8 *hdr; - - /* Pull ctrl type (1 b) + len (2 b) */ - hdr = skb_pull_data(skb, 3); - if (!hdr) + case BNEP_FILTER_NET_TYPE_SET: + /* Pull: len (2 b), data (len bytes) */ + data = skb_pull_data(skb, sizeof(u16)); + if (!data) goto badframe; - /* Pull data (len bytes); length is big-endian */ - if (!skb_pull(skb, get_unaligned_be16(&hdr[1]))) + if (!skb_pull(skb, get_unaligned_be16(data))) goto badframe; break; - } default: kfree_skb(skb); return 0; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index aeccd8084cba..df23245d6ccd 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1725,6 +1725,11 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) /* Generate Broadcast ID */ get_random_bytes(bid, sizeof(bid)); len = eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); + if (adv->adv_data_len > sizeof(ad) - len) { + bt_dev_err(hdev, "No room for Broadcast Announcement"); + return -EINVAL; + } + memcpy(ad + len, adv->adv_data, adv->adv_data_len); hci_set_adv_instance_data(hdev, adv->instance, len + adv->adv_data_len, ad, 0, NULL); diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 041ce9adc378..8957ce7c21b7 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -83,10 +83,12 @@ static void bt_host_release(struct device *dev) { struct hci_dev *hdev = to_hci_dev(dev); - if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { hci_release_dev(hdev); - else + } else { + cleanup_srcu_struct(&hdev->srcu); kfree(hdev); + } module_put(THIS_MODULE); } diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 876649556d3c..3abd8111dda8 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -337,12 +337,20 @@ static int iso_connect_bis(struct sock *sk) struct iso_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; + bdaddr_t src, dst; + u8 src_type, bc_sid; int err; - BT_DBG("%pMR (SID 0x%2.2x)", &iso_pi(sk)->src, iso_pi(sk)->bc_sid); + lock_sock(sk); + bacpy(&src, &iso_pi(sk)->src); + bacpy(&dst, &iso_pi(sk)->dst); + src_type = iso_pi(sk)->src_type; + bc_sid = iso_pi(sk)->bc_sid; + release_sock(sk); + + BT_DBG("%pMR (SID 0x%2.2x)", &src, bc_sid); - hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src, - iso_pi(sk)->src_type); + hdev = hci_get_route(&dst, &src, src_type); if (!hdev) return -EHOSTUNREACH; @@ -430,12 +438,19 @@ static int iso_connect_cis(struct sock *sk) struct iso_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; + bdaddr_t src, dst; + u8 src_type; int err; - BT_DBG("%pMR -> %pMR", &iso_pi(sk)->src, &iso_pi(sk)->dst); + lock_sock(sk); + bacpy(&src, &iso_pi(sk)->src); + bacpy(&dst, &iso_pi(sk)->dst); + src_type = iso_pi(sk)->src_type; + release_sock(sk); - hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src, - iso_pi(sk)->src_type); + BT_DBG("%pMR -> %pMR", &src, &dst); + + hdev = hci_get_route(&dst, &src, src_type); if (!hdev) return -EHOSTUNREACH; @@ -1082,7 +1097,7 @@ static int iso_sock_rebind_bc(struct sock *sk, struct sockaddr_iso *sa, * ordering. */ release_sock(sk); - hci_dev_lock(bis->hdev); + hci_dev_lock(hdev); lock_sock(sk); if (!iso_pi(sk)->conn || iso_pi(sk)->conn->hcon != bis) { @@ -1212,18 +1227,25 @@ static int iso_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, static int iso_listen_bis(struct sock *sk) { - struct hci_dev *hdev; - int err = 0; struct iso_conn *conn; struct hci_conn *hcon; + struct hci_dev *hdev; + bdaddr_t src, dst; + u8 src_type, bc_sid; + int err = 0; + + lock_sock(sk); + bacpy(&src, &iso_pi(sk)->src); + bacpy(&dst, &iso_pi(sk)->dst); + src_type = iso_pi(sk)->src_type; + bc_sid = iso_pi(sk)->bc_sid; + release_sock(sk); - BT_DBG("%pMR -> %pMR (SID 0x%2.2x)", &iso_pi(sk)->src, - &iso_pi(sk)->dst, iso_pi(sk)->bc_sid); + BT_DBG("%pMR -> %pMR (SID 0x%2.2x)", &src, &dst, bc_sid); write_lock(&iso_sk_list.lock); - if (__iso_get_sock_listen_by_sid(&iso_pi(sk)->src, &iso_pi(sk)->dst, - iso_pi(sk)->bc_sid)) + if (__iso_get_sock_listen_by_sid(&src, &dst, bc_sid)) err = -EADDRINUSE; write_unlock(&iso_sk_list.lock); @@ -1231,8 +1253,7 @@ static int iso_listen_bis(struct sock *sk) if (err) return err; - hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src, - iso_pi(sk)->src_type); + hdev = hci_get_route(&dst, &src, src_type); if (!hdev) return -EHOSTUNREACH; @@ -1568,9 +1589,16 @@ static void iso_conn_big_sync(struct sock *sk) { int err; struct hci_dev *hdev; + bdaddr_t src, dst; + u8 src_type; - hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src, - iso_pi(sk)->src_type); + lock_sock(sk); + bacpy(&src, &iso_pi(sk)->src); + bacpy(&dst, &iso_pi(sk)->dst); + src_type = iso_pi(sk)->src_type; + release_sock(sk); + + hdev = hci_get_route(&dst, &src, src_type); if (!hdev) return; @@ -1595,6 +1623,7 @@ static void iso_conn_big_sync(struct sock *sk) release_sock(sk); hci_dev_unlock(hdev); + hci_dev_put(hdev); } static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg, diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 45b175399e8d..c4ccfbda9d78 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5643,6 +5643,15 @@ static inline void l2cap_sig_send_rej(struct l2cap_conn *conn, u16 ident) l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); } +static inline void l2cap_sig_send_mtu_rej(struct l2cap_conn *conn, u8 ident) +{ + struct l2cap_cmd_rej_mtu rej; + + rej.reason = cpu_to_le16(L2CAP_REJ_MTU_EXCEEDED); + rej.max_mtu = cpu_to_le16(L2CAP_SIG_MTU); + l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); +} + static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { @@ -5655,6 +5664,43 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (hcon->type != ACL_LINK) goto drop; + /* + * Bluetooth Core v5.4, Vol 3, Part A, Section 4: the BR/EDR + * signaling channel has a fixed signaling MTU (MTUsig) whose + * minimum and default is 48 octets. Section 4.1 says that on + * an MTUExceeded command reject the identifier "shall match + * the first request command in the L2CAP packet" and that + * packets containing only response commands "shall be + * silently discarded". + * + * Linux intentionally deviates from that prescription: + * + * 1. Silently discarding desynchronizes the peer. The + * remote stack never learns its responses were dropped, + * so any state machine waiting on a paired response + * stalls until its own timer fires. + * + * 2. Locating "the first request command" requires walking + * command headers past MTUsig, i.e. processing bytes + * from a packet we have already decided is too large to + * process. + * + * Reject every over-MTUsig signaling packet with one + * L2CAP_REJ_MTU_EXCEEDED command reject. The reject's + * reason field is what tells the peer that the whole packet + * was discarded; the identifier value is informational, so + * we use the identifier from the first command header, a + * single fixed-offset byte read. + */ + if (skb->len > L2CAP_SIG_MTU) { + u8 ident = skb->data[1]; + + BT_DBG("signaling packet exceeds MTU: %u > %u", + skb->len, L2CAP_SIG_MTU); + l2cap_sig_send_mtu_rej(conn, ident); + goto drop; + } + while (skb->len >= L2CAP_CMD_HDR_SIZE) { u16 len; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index de5bd6b637b2..f4aa814a0397 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -8638,6 +8638,12 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, if (!cur_len) continue; + /* If the current field length would exceed the total data + * length, then it's invalid. + */ + if (i + cur_len >= len) + return false; + if (data[i + 1] == EIR_FLAGS && (!is_adv_data || flags_managed(adv_flags))) return false; @@ -8654,12 +8660,6 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, if (data[i + 1] == EIR_APPEARANCE && appearance_managed(adv_flags)) return false; - - /* If the current field length would exceed the total data - * length, then it's invalid. - */ - if (i + cur_len >= len) - return false; } return true; @@ -9114,8 +9114,9 @@ static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("%s", hdev->name); - expected_len = struct_size(cp, data, cp->adv_data_len + cp->scan_rsp_len); - if (expected_len != data_len) + expected_len = struct_size(cp, data, cp->adv_data_len + + cp->scan_rsp_len); + if (expected_len > data_len) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, MGMT_STATUS_INVALID_PARAMS); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index d11bd5337d57..364b9381c2dc 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1431,10 +1431,15 @@ static int rfcomm_apply_pn(struct rfcomm_dlc *d, int cr, struct rfcomm_pn *pn) static int rfcomm_recv_pn(struct rfcomm_session *s, int cr, struct sk_buff *skb) { - struct rfcomm_pn *pn = (void *) skb->data; + struct rfcomm_pn *pn; struct rfcomm_dlc *d; - u8 dlci = pn->dlci; + u8 dlci; + + pn = skb_pull_data(skb, sizeof(*pn)); + if (!pn) + return -EILSEQ; + dlci = pn->dlci; BT_DBG("session %p state %ld dlci %d", s, s->state, dlci); if (!dlci) @@ -1483,8 +1488,8 @@ static int rfcomm_recv_pn(struct rfcomm_session *s, int cr, struct sk_buff *skb) static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_buff *skb) { - struct rfcomm_rpn *rpn = (void *) skb->data; - u8 dlci = __get_dlci(rpn->dlci); + struct rfcomm_rpn *rpn; + u8 dlci; u8 bit_rate = 0; u8 data_bits = 0; @@ -1495,15 +1500,16 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ u8 xoff_char = 0; u16 rpn_mask = RFCOMM_RPN_PM_ALL; - BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x", - dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl, - rpn->xon_char, rpn->xoff_char, rpn->param_mask); + if (len == 1) { + rpn = skb_pull_data(skb, 1); + if (!rpn) + return -EILSEQ; - if (!cr) - return 0; + dlci = __get_dlci(rpn->dlci); + + if (!cr) + return 0; - if (len == 1) { - /* This is a request, return default (according to ETSI TS 07.10) settings */ bit_rate = RFCOMM_RPN_BR_9600; data_bits = RFCOMM_RPN_DATA_8; stop_bits = RFCOMM_RPN_STOP_1; @@ -1514,6 +1520,19 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ goto rpn_out; } + rpn = skb_pull_data(skb, sizeof(*rpn)); + if (!rpn) + return -EILSEQ; + + dlci = __get_dlci(rpn->dlci); + + BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x", + dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl, + rpn->xon_char, rpn->xoff_char, rpn->param_mask); + + if (!cr) + return 0; + /* Check for sane values, ignore/accept bit_rate, 8 bits, 1 stop bit, * no parity, no flow control lines, normal XON/XOFF chars */ @@ -1589,9 +1608,14 @@ rpn_out: static int rfcomm_recv_rls(struct rfcomm_session *s, int cr, struct sk_buff *skb) { - struct rfcomm_rls *rls = (void *) skb->data; - u8 dlci = __get_dlci(rls->dlci); + struct rfcomm_rls *rls; + u8 dlci; + rls = skb_pull_data(skb, sizeof(*rls)); + if (!rls) + return -EILSEQ; + + dlci = __get_dlci(rls->dlci); BT_DBG("dlci %d cr %d status 0x%x", dlci, cr, rls->status); if (!cr) @@ -1608,10 +1632,15 @@ static int rfcomm_recv_rls(struct rfcomm_session *s, int cr, struct sk_buff *skb static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb) { - struct rfcomm_msc *msc = (void *) skb->data; + struct rfcomm_msc *msc; struct rfcomm_dlc *d; - u8 dlci = __get_dlci(msc->dlci); + u8 dlci; + + msc = skb_pull_data(skb, sizeof(*msc)); + if (!msc) + return -EILSEQ; + dlci = __get_dlci(msc->dlci); BT_DBG("dlci %d cr %d v24 0x%x", dlci, cr, msc->v24_sig); d = rfcomm_dlc_get(s, dlci); @@ -1644,17 +1673,19 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb static int rfcomm_recv_mcc(struct rfcomm_session *s, struct sk_buff *skb) { - struct rfcomm_mcc *mcc = (void *) skb->data; + struct rfcomm_mcc *mcc; u8 type, cr, len; + mcc = skb_pull_data(skb, sizeof(*mcc)); + if (!mcc) + return -EILSEQ; + cr = __test_cr(mcc->type); type = __get_mcc_type(mcc->type); len = __get_mcc_len(mcc->len); BT_DBG("%p type 0x%x cr %d", s, type, cr); - skb_pull(skb, 2); - switch (type) { case RFCOMM_PN: rfcomm_recv_pn(s, cr, skb); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index bd7d959c6e9e..805ed5d28ed6 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -122,7 +122,7 @@ static struct sock *__rfcomm_get_listen_sock_by_addr(u8 channel, bdaddr_t *src) } /* Find socket with channel and source bdaddr. - * Returns closest match. + * Returns closest match with an extra reference held. */ static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src) { @@ -136,15 +136,25 @@ static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t * if (rfcomm_pi(sk)->channel == channel) { /* Exact match. */ - if (!bacmp(&rfcomm_pi(sk)->src, src)) + if (!bacmp(&rfcomm_pi(sk)->src, src)) { + sock_hold(sk); break; + } /* Closest match */ - if (!bacmp(&rfcomm_pi(sk)->src, BDADDR_ANY)) + if (!bacmp(&rfcomm_pi(sk)->src, BDADDR_ANY)) { + if (sk1) + sock_put(sk1); + sk1 = sk; + sock_hold(sk1); + } } } + if (sk && sk1) + sock_put(sk1); + read_unlock(&rfcomm_sk_list.lock); return sk ? sk : sk1; @@ -941,6 +951,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * { struct sock *sk, *parent; bdaddr_t src, dst; + bool defer_setup = false; int result = 0; BT_DBG("session %p channel %d", s, channel); @@ -954,6 +965,11 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * lock_sock(parent); + if (parent->sk_state != BT_LISTEN) + goto done; + + defer_setup = test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags); + /* Check for backlog size */ if (sk_acceptq_is_full(parent)) { BT_DBG("backlog full %d", parent->sk_ack_backlog); @@ -981,9 +997,11 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * done: release_sock(parent); - if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags)) + if (defer_setup) parent->sk_state_change(parent); + sock_put(parent); + return result; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index f1799c6a6f87..140869e5b2df 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -312,11 +312,21 @@ static int sco_connect(struct sock *sk) struct sco_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; + bdaddr_t src, dst; + struct bt_codec codec; + __u16 setting; int err, type; - BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst); + lock_sock(sk); + bacpy(&src, &sco_pi(sk)->src); + bacpy(&dst, &sco_pi(sk)->dst); + setting = sco_pi(sk)->setting; + codec = sco_pi(sk)->codec; + release_sock(sk); + + BT_DBG("%pMR -> %pMR", &src, &dst); - hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR); + hdev = hci_get_route(&dst, &src, BDADDR_BREDR); if (!hdev) return -EHOSTUNREACH; @@ -327,7 +337,7 @@ static int sco_connect(struct sock *sk) else type = SCO_LINK; - switch (sco_pi(sk)->setting & SCO_AIRMODE_MASK) { + switch (setting & SCO_AIRMODE_MASK) { case SCO_AIRMODE_TRANSP: if (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev)) { err = -EOPNOTSUPP; @@ -336,8 +346,8 @@ static int sco_connect(struct sock *sk) break; } - hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, - sco_pi(sk)->setting, &sco_pi(sk)->codec, + hcon = hci_connect_sco(hdev, type, &dst, + setting, &codec, READ_ONCE(sk->sk_sndtimeo)); if (IS_ERR(hcon)) { err = PTR_ERR(hcon); diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index 7dfbcdfc30e5..c9e229af0366 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -31,6 +31,9 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par) const struct arphdr *ap; struct arphdr _ah; + if (skb_ensure_writable(skb, sizeof(_ah) + ETH_ALEN)) + return EBT_DROP; + ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); if (ap == NULL) return EBT_DROP; diff --git a/net/core/sock.c b/net/core/sock.c index b37b664b6eb9..d097025c116a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2676,8 +2676,12 @@ void sock_wfree(struct sk_buff *skb) int old; if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { + void (*sk_write_space)(struct sock *sk); + + sk_write_space = READ_ONCE(sk->sk_write_space); + if (sock_flag(sk, SOCK_RCU_FREE) && - sk->sk_write_space == sock_def_write_space) { + sk_write_space == sock_def_write_space) { rcu_read_lock(); free = __refcount_sub_and_test(len, &sk->sk_wmem_alloc, &old); @@ -2693,7 +2697,7 @@ void sock_wfree(struct sk_buff *skb) * after sk_write_space() call */ WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc)); - sk->sk_write_space(sk); + sk_write_space(sk); len = 1; } /* diff --git a/net/devlink/core.c b/net/devlink/core.c index eeb6a71f5f56..fe9f6a0a67d5 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -518,6 +518,8 @@ void devlink_free(struct devlink *devlink) { ASSERT_DEVLINK_NOT_REGISTERED(devlink); + devlink_rel_put(devlink); + WARN_ON(!list_empty(&devlink->trap_policer_list)); WARN_ON(!list_empty(&devlink->trap_group_list)); WARN_ON(!list_empty(&devlink->trap_list)); diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index b514e43766ef..a28dfd8490c5 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -35,10 +35,8 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr) rcu_read_lock(); sn = rcu_dereference(hsr->self_node); - if (!sn) { - WARN_ONCE(1, "HSR: No self node\n"); + if (!sn) goto out; - } if (ether_addr_equal(addr, sn->macaddress_A) || ether_addr_equal(addr, sn->macaddress_B)) diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index 0c07662b44c0..4df76ff50699 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -255,6 +255,11 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev) pr_debug("package xmit\n"); + if (skb->protocol != htons(ETH_P_IPV6)) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + WARN_ON_ONCE(skb->len > IPV6_MIN_MTU); /* We must take a copy of the skb before we modify/replace the ipv6 diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index dbcd37dfdc15..5b934ce8d98a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1148,6 +1148,9 @@ static bool reqsk_queue_hash_req(struct request_sock *req) /* The timer needs to be setup after a successful insertion. */ req->timeout = tcp_timeout_init((struct sock *)req); timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); + + preempt_disable_nested(); + mod_timer(&req->rsk_timer, jiffies + req->timeout); /* before letting lookups find us, make sure all req fields @@ -1155,6 +1158,9 @@ static bool reqsk_queue_hash_req(struct request_sock *req) */ smp_wmb(); refcount_set(&req->rsk_refcnt, 2 + 1); + + preempt_enable_nested(); + return true; } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index be8815ce3ac2..09d745112c15 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -530,6 +530,10 @@ int ip_options_get(struct net *net, struct ip_options_rcu **optp, kfree(opt); return -EINVAL; } + if (opt->opt.srr && !ns_capable(net->user_ns, CAP_NET_RAW)) { + kfree(opt); + return -EPERM; + } kfree(*optp); *optp = opt; return 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0ac2bf4f8759..70f6cbd4ef73 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2011,6 +2011,14 @@ try_again: } WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); + + /* + * skb->dev still aliases the UDP rx dev_scratch (its charge was freed + * on dequeue above); a sockmap verdict program may deref it via + * bpf_sk_lookup_*(), so clear it -> bpf_skc_lookup() uses skb->sk + */ + skb->dev = NULL; + return recv_actor(sk, skb); } diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 67a42e01dfc3..be6dac8a8566 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -243,16 +243,16 @@ static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca) { unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr); - spin_lock(&acaddr_hash_lock); + spin_lock_bh(&acaddr_hash_lock); hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]); - spin_unlock(&acaddr_hash_lock); + spin_unlock_bh(&acaddr_hash_lock); } static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca) { - spin_lock(&acaddr_hash_lock); + spin_lock_bh(&acaddr_hash_lock); hlist_del_init_rcu(&aca->aca_addr_lst); - spin_unlock(&acaddr_hash_lock); + spin_unlock_bh(&acaddr_hash_lock); } static void aca_get(struct ifacaddr6 *aca) @@ -371,10 +371,10 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) aca->aca_next = idev->ac_list; rcu_assign_pointer(idev->ac_list, aca); - write_unlock_bh(&idev->lock); - ipv6_add_acaddr_hash(net, aca); + write_unlock_bh(&idev->lock); + ip6_ins_rt(net, f6i); addrconf_join_solict(idev->dev, &aca->aca_addr); @@ -649,8 +649,8 @@ void ipv6_anycast_cleanup(void) { int i; - spin_lock(&acaddr_hash_lock); + spin_lock_bh(&acaddr_hash_lock); for (i = 0; i < IN6_ADDR_HSIZE; i++) WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); - spin_unlock(&acaddr_hash_lock); + spin_unlock_bh(&acaddr_hash_lock); } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 3330adcf26db..d9b855d5191b 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1424,9 +1424,9 @@ out: static void __mld_query_work(struct sk_buff *skb) { struct mld2_query *mlh2 = NULL; - const struct in6_addr *group; unsigned long max_delay; struct inet6_dev *idev; + struct in6_addr group; struct ifmcaddr6 *ma; struct mld_msg *mld; int group_type; @@ -1458,8 +1458,8 @@ static void __mld_query_work(struct sk_buff *skb) goto kfree_skb; mld = (struct mld_msg *)icmp6_hdr(skb); - group = &mld->mld_mca; - group_type = ipv6_addr_type(group); + group = mld->mld_mca; + group_type = ipv6_addr_type(&group); if (group_type != IPV6_ADDR_ANY && !(group_type&IPV6_ADDR_MULTICAST)) @@ -1509,7 +1509,7 @@ static void __mld_query_work(struct sk_buff *skb) } } else { for_each_mc_mclock(idev, ma) { - if (!ipv6_addr_equal(group, &ma->mca_addr)) + if (!ipv6_addr_equal(&group, &ma->mca_addr)) continue; if (ma->mca_flags & MAF_TIMER_RUNNING) { /* gsquery <- gsquery && mark */ diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index c0a0075e2590..2dbe44715df3 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -191,6 +191,9 @@ static bool nft_fib6_info_nh_uses_dev(struct fib6_info *rt, if (nft_fib6_info_nh_dev_match(nh_dev, dev)) return true; + + if (!READ_ONCE(rt->fib6_nsiblings)) + return false; } return false; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 99d6582f41de..e0b1915be1a6 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1045,64 +1045,76 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd, { struct pppol2tp_ioc_stats stats; struct l2tp_session *session; + int err = 0; + + session = pppol2tp_sock_to_session(sock->sk); + /* Validate session presence and magic integrity ONLY for commands + * that belong to L2TP and require a valid session. + */ switch (cmd) { case PPPIOCGMRU: case PPPIOCGFLAGS: - session = sock->sk->sk_user_data; + case PPPIOCSMRU: + case PPPIOCSFLAGS: + case PPPIOCGL2TPSTATS: if (!session) return -ENOTCONN; - if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) + if (session->magic != L2TP_SESSION_MAGIC) { + l2tp_session_put(session); return -EBADF; + } + break; + default: + break; + } + switch (cmd) { + case PPPIOCGMRU: + case PPPIOCGFLAGS: /* Not defined for tunnels */ - if (!session->session_id && !session->peer_session_id) - return -ENOSYS; + if (!session->session_id && !session->peer_session_id) { + err = -ENOSYS; + break; + } - if (put_user(0, (int __user *)arg)) - return -EFAULT; + if (put_user(0, (int __user *)arg)) { + err = -EFAULT; + break; + } break; case PPPIOCSMRU: case PPPIOCSFLAGS: - session = sock->sk->sk_user_data; - if (!session) - return -ENOTCONN; - - if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) - return -EBADF; - /* Not defined for tunnels */ - if (!session->session_id && !session->peer_session_id) - return -ENOSYS; + if (!session->session_id && !session->peer_session_id) { + err = -ENOSYS; + break; + } - if (!access_ok((int __user *)arg, sizeof(int))) - return -EFAULT; + if (!access_ok((int __user *)arg, sizeof(int))) { + err = -EFAULT; + break; + } break; case PPPIOCGL2TPSTATS: - session = sock->sk->sk_user_data; - if (!session) - return -ENOTCONN; - - if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) - return -EBADF; - /* Session 0 represents the parent tunnel */ if (!session->session_id && !session->peer_session_id) { u32 session_id; - int err; if (copy_from_user(&stats, (void __user *)arg, - sizeof(stats))) - return -EFAULT; + sizeof(stats))) { + err = -EFAULT; + break; + } session_id = stats.session_id; err = pppol2tp_tunnel_copy_stats(&stats, session->tunnel); if (err < 0) - return err; + break; stats.session_id = session_id; } else { @@ -1112,15 +1124,21 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd, stats.tunnel_id = session->tunnel->tunnel_id; stats.using_ipsec = l2tp_tunnel_uses_xfrm(session->tunnel); - if (copy_to_user((void __user *)arg, &stats, sizeof(stats))) - return -EFAULT; + if (copy_to_user((void __user *)arg, &stats, sizeof(stats))) { + err = -EFAULT; + break; + } break; default: - return -ENOIOCTLCMD; + err = -ENOIOCTLCMD; + break; } - return 0; + if (session) + l2tp_session_put(session); + + return err; } /***************************************************************************** diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b487d2330f25..ea7f63e1fc17 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2181,7 +2181,9 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, case IEEE80211_RADIOTAP_ANTENNA: /* this can appear multiple times, keep a bitmap */ - info->control.antennas |= BIT(*iterator.this_arg); + /* control.antennas is only a 2-bit bitmap */ + if (*iterator.this_arg < 2) + info->control.antennas |= BIT(*iterator.this_arg); break; case IEEE80211_RADIOTAP_DATA_RETRIES: diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8a1c5698983c..b3ea7854818f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -566,12 +566,17 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); + struct tcp_sock *tp = tcp_sk(sk); unsigned int dss_size = 0; struct mptcp_ext *mpext; unsigned int ack_size; bool ret = false; - u64 ack_seq; + /* Zero `use_ack` and `use_map` flags with one shot. */ + BUILD_BUG_ON(sizeof_field(struct mptcp_ext, flags) != sizeof(u16)); + BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_ext, flags), + sizeof(u16))); + *(u16 *)&opts->ext_copy.flags = 0; opts->csum_reqd = READ_ONCE(msk->csum_enabled); mpext = skb ? mptcp_get_ext(skb) : NULL; @@ -595,20 +600,16 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, /* passive sockets msk will set the 'can_ack' after accept(), even * if the first subflow may have the already the remote key handy */ - opts->ext_copy.use_ack = 0; if (!READ_ONCE(msk->can_ack)) { *size = ALIGN(dss_size, 4); return ret; } - ack_seq = READ_ONCE(msk->ack_seq); if (READ_ONCE(msk->use_64bit_ack)) { ack_size = TCPOLEN_MPTCP_DSS_ACK64; - opts->ext_copy.data_ack = ack_seq; opts->ext_copy.ack64 = 1; } else { ack_size = TCPOLEN_MPTCP_DSS_ACK32; - opts->ext_copy.data_ack32 = (uint32_t)ack_seq; opts->ext_copy.ack64 = 0; } opts->ext_copy.use_ack = 1; @@ -618,6 +619,12 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, if (dss_size == 0) ack_size += TCPOLEN_MPTCP_DSS_BASE; + /* The caller is __tcp_transmit_skb(), and will compute the new rcv + * wnd soon: ensure that the window can shrink. + */ + if (skb) + tp->rcv_wnd = tp->rcv_nxt - tp->rcv_wup; + dss_size += ack_size; *size = ALIGN(dss_size, 4); @@ -658,7 +665,6 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - bool drop_other_suboptions = false; unsigned int opt_size = *size; struct mptcp_addr_info addr; bool echo; @@ -669,36 +675,20 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * */ if (!mptcp_pm_should_add_signal(msk) || (opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) || - !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &addr, - &echo, &drop_other_suboptions)) + !skb || !skb_is_tcp_pure_ack(skb) || + !mptcp_pm_add_addr_signal(msk, opt_size, remaining, &addr, &echo)) return false; - /* - * Later on, mptcp_write_options() will enforce mutually exclusion with - * DSS, bail out if such option is set and we can't drop it. - */ - if (drop_other_suboptions) - remaining += opt_size; - else if (opts->suboptions & OPTION_MPTCP_DSS) - return false; + remaining += opt_size; len = mptcp_add_addr_len(addr.family, echo, !!addr.port); if (remaining < len) return false; *size = len; - if (drop_other_suboptions) { - pr_debug("drop other suboptions\n"); - opts->suboptions = 0; - - /* note that e.g. DSS could have written into the memory - * aliased by ahmac, we must reset the field here - * to avoid appending the hmac even for ADD_ADDR echo - * options - */ - opts->ahmac = 0; - *size -= opt_size; - } + pr_debug("drop other suboptions\n"); + opts->suboptions = 0; + *size -= opt_size; opts->addr = addr; opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) { @@ -708,6 +698,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * &opts->addr); } else { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX); + opts->ahmac = 0; } pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d\n", opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port)); @@ -1297,19 +1288,14 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) return true; } -static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) +static u64 mptcp_set_rwin(struct mptcp_sock *msk, struct tcp_sock *tp, + struct tcphdr *th, u64 ack_seq) { const struct sock *ssk = (const struct sock *)tp; - struct mptcp_subflow_context *subflow; - u64 ack_seq, rcv_wnd_old, rcv_wnd_new; - struct mptcp_sock *msk; + u64 rcv_wnd_old, rcv_wnd_new; u32 new_win; u64 win; - subflow = mptcp_subflow_ctx(ssk); - msk = mptcp_sk(subflow->conn); - - ack_seq = READ_ONCE(msk->ack_seq); rcv_wnd_new = ack_seq + tp->rcv_wnd; rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent); @@ -1362,7 +1348,7 @@ raise_win: update_wspace: WRITE_ONCE(msk->old_wspace, tp->rcv_wnd); - subflow->rcv_wnd_sent = rcv_wnd_new; + return rcv_wnd_new; } static void mptcp_track_rwin(struct tcp_sock *tp) @@ -1474,13 +1460,25 @@ void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp, *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); if (mpext->use_ack) { + struct mptcp_sock *msk; + u64 ack_seq; + + /* DSS option is set only by mptcp_established_options, + * the caller is __tcp_transmit_skb() and ssk is always + * not NULL. + */ + subflow = mptcp_subflow_ctx(ssk); + msk = mptcp_sk(subflow->conn); + ack_seq = READ_ONCE(msk->ack_seq); if (mpext->ack64) { - put_unaligned_be64(mpext->data_ack, ptr); + put_unaligned_be64(ack_seq, ptr); ptr += 2; } else { - put_unaligned_be32(mpext->data_ack32, ptr); + put_unaligned_be32(ack_seq, ptr); ptr += 1; } + subflow->rcv_wnd_sent = mptcp_set_rwin(msk, tp, th, + ack_seq); } if (mpext->use_map) { @@ -1708,9 +1706,6 @@ mp_capable_done: i += 4; } } - - if (tp) - mptcp_set_rwin(tp, th); } __be32 mptcp_get_reset_option(const struct sk_buff *skb) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 3e770c7407e1..470501470fe5 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -887,10 +887,9 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) } } -bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, - unsigned int opt_size, unsigned int remaining, - struct mptcp_addr_info *addr, bool *echo, - bool *drop_other_suboptions) +bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int opt_size, + unsigned int remaining, + struct mptcp_addr_info *addr, bool *echo) { bool skip_add_addr = false; int ret = false; @@ -908,10 +907,7 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, * plain dup-ack from TCP perspective. The other MPTCP-relevant info, * if any, will be carried by the 'original' TCP ack */ - if (skb && skb_is_tcp_pure_ack(skb)) { - remaining += opt_size; - *drop_other_suboptions = true; - } + remaining += opt_size; *echo = mptcp_pm_should_add_signal_echo(msk); if (*echo) { @@ -929,9 +925,6 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, if (remaining < mptcp_add_addr_len(family, *echo, port)) { struct net *net = sock_net((struct sock *)msk); - if (!*drop_other_suboptions) - goto out_unlock; - if (*echo) { MPTCP_INC_STATS(net, MPTCP_MIB_ECHOADDTXDROP); } else { diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 8cbc1920afb4..0d3a95e676f1 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -408,19 +408,21 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) local.flags = entry.flags; local.ifindex = entry.ifindex; + spin_lock_bh(&msk->pm.lock); + msk->pm.extra_subflows++; + spin_unlock_bh(&msk->pm.lock); + lock_sock(sk); err = __mptcp_subflow_connect(sk, &local, &addr_r); release_sock(sk); - if (err) + if (err) { GENL_SET_ERR_MSG_FMT(info, "connect error: %d", err); - spin_lock_bh(&msk->pm.lock); - if (err) + spin_lock_bh(&msk->pm.lock); mptcp_userspace_pm_delete_local_addr(msk, &entry); - else - msk->pm.extra_subflows++; - spin_unlock_bh(&msk->pm.lock); + spin_unlock_bh(&msk->pm.lock); + } create_err: sock_put(sk); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a72a6ad6ee8b..cb9515f505aa 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2276,6 +2276,10 @@ static bool mptcp_move_skbs(struct sock *sk) mptcp_backlog_spooled(sk, moved, &skbs); } mptcp_data_unlock(sk); + + if (enqueued && mptcp_epollin_ready(sk)) + sk->sk_data_ready(sk); + return enqueued; } @@ -2865,6 +2869,10 @@ static void __mptcp_retrans(struct sock *sk) msk->bytes_retrans += len; dfrag->already_sent = max(dfrag->already_sent, len); + /* With csum enabled retransmission can send new data. */ + if (after64(dfrag->already_sent + dfrag->data_seq, msk->snd_nxt)) + WRITE_ONCE(msk->snd_nxt, dfrag->already_sent + dfrag->data_seq); + reset_timer: mptcp_check_and_set_pending(sk); @@ -4420,6 +4428,8 @@ static int __mptcp_read_sock(struct sock *sk, read_descriptor_t *desc, } mptcp_eat_recv_skb(sk, skb); + if (!desc->count) + break; } if (noack) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index e4f5aba24da7..b93b878478d2 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1229,10 +1229,9 @@ static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list) return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1; } -bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, - unsigned int opt_size, unsigned int remaining, - struct mptcp_addr_info *addr, bool *echo, - bool *drop_other_suboptions); +bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int opt_size, + unsigned int remaining, + struct mptcp_addr_info *addr, bool *echo); bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 87b5796d0135..fcf6feb2a9eb 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -241,15 +241,19 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err; lock_sock(ssk); - sock_set_timestamping(ssk, optname, timestamping); + err = sock_set_timestamping(ssk, optname, timestamping); release_sock(ssk); + + if (err < 0 && ret == 0) + ret = err; } release_sock(sk); - return 0; + return ret; } static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, @@ -813,10 +817,11 @@ static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err; - ret = tcp_setsockopt(ssk, level, optname, optval, optlen); - if (ret) - break; + err = tcp_setsockopt(ssk, level, optname, optval, optlen); + if (err < 0 && ret == 0) + ret = err; } if (!ret) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index bd9cae44d214..16daba8cac83 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1898,7 +1898,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, if (ret_hooks >= 0) ip_vs_unregister_hooks(ipvs, u->af); if (svc != NULL) { - ip_vs_unbind_scheduler(svc, sched); + ip_vs_unbind_scheduler(svc); ip_vs_service_free(svc); } ip_vs_scheduler_put(sched); @@ -1962,9 +1962,8 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) old_sched = rcu_dereference_protected(svc->scheduler, 1); if (sched != old_sched) { if (old_sched) { - ip_vs_unbind_scheduler(svc, old_sched); - RCU_INIT_POINTER(svc->scheduler, NULL); - /* Wait all svc->sched_data users */ + ip_vs_unbind_scheduler(svc); + /* Wait all svc->scheduler/sched_data users */ synchronize_rcu(); } /* Bind the new scheduler */ @@ -1972,6 +1971,10 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) ret = ip_vs_bind_scheduler(svc, sched); if (ret) { ip_vs_scheduler_put(sched); + /* Try to restore the old_sched */ + if (old_sched && + !ip_vs_bind_scheduler(svc, old_sched)) + old_sched = NULL; goto out; } } @@ -2027,7 +2030,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* Unbind scheduler */ old_sched = rcu_dereference_protected(svc->scheduler, 1); - ip_vs_unbind_scheduler(svc, old_sched); + ip_vs_unbind_scheduler(svc); ip_vs_scheduler_put(old_sched); /* Unbind persistence engine, keep svc->pe */ diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index c6e421c4e299..24adc38942a0 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -56,19 +56,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, /* * Unbind a service with its scheduler */ -void ip_vs_unbind_scheduler(struct ip_vs_service *svc, - struct ip_vs_scheduler *sched) +void ip_vs_unbind_scheduler(struct ip_vs_service *svc) { - struct ip_vs_scheduler *cur_sched; + struct ip_vs_scheduler *sched; - cur_sched = rcu_dereference_protected(svc->scheduler, 1); - /* This check proves that old 'sched' was installed */ - if (!cur_sched) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (!sched) return; + /* Reset the scheduler before initiating any RCU callbacks */ + rcu_assign_pointer(svc->scheduler, NULL); + smp_wmb(); /* paired with smp_rmb() in ip_vs_schedule() */ if (sched->done_service) sched->done_service(svc); - /* svc->scheduler can be set to NULL only by caller */ } diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 522183b9a604..2ebe4cb47cf6 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -203,7 +203,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, if (parse_dcc(data, data_limit, &dcc_ip, &dcc_port, &addr_beg_p, &addr_end_p)) { pr_debug("unable to parse dcc command\n"); - continue; + goto out; } pr_debug("DCC bound ip/port: %pI4:%u\n", @@ -217,7 +217,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n", &tuple->src.u3.ip, &dcc_ip, dcc_port); - continue; + goto out; } exp = nf_ct_expect_alloc(ct); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 036c8586f49b..ed00114f65f3 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -22,6 +22,8 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_synproxy.h> +static DEFINE_MUTEX(synproxy_mutex); + unsigned int synproxy_net_id; EXPORT_SYMBOL_GPL(synproxy_net_id); @@ -769,26 +771,31 @@ static const struct nf_hook_ops ipv4_synproxy_ops[] = { int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net) { - int err; + int err = 0; + mutex_lock(&synproxy_mutex); if (snet->hook_ref4 == 0) { err = nf_register_net_hooks(net, ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); if (err) - return err; + goto out; } snet->hook_ref4++; - return 0; +out: + mutex_unlock(&synproxy_mutex); + return err; } EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_init); void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net) { + mutex_lock(&synproxy_mutex); snet->hook_ref4--; if (snet->hook_ref4 == 0) nf_unregister_net_hooks(net, ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); + mutex_unlock(&synproxy_mutex); } EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_fini); @@ -1193,27 +1200,32 @@ static const struct nf_hook_ops ipv6_synproxy_ops[] = { int nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net) { - int err; + int err = 0; + mutex_lock(&synproxy_mutex); if (snet->hook_ref6 == 0) { err = nf_register_net_hooks(net, ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops)); if (err) - return err; + goto out; } snet->hook_ref6++; - return 0; +out: + mutex_unlock(&synproxy_mutex); + return err; } EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_init); void nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net) { + mutex_lock(&synproxy_mutex); snet->hook_ref6--; if (snet->hook_ref6 == 0) nf_unregister_net_hooks(net, ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops)); + mutex_unlock(&synproxy_mutex); } EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_fini); #endif /* CONFIG_IPV6 */ diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 2316c77f4228..dfd41fc8d9b8 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -19,7 +19,6 @@ struct nft_byteorder { u8 sreg; u8 dreg; enum nft_byteorder_ops op:8; - u8 len; u8 size; }; @@ -28,13 +27,8 @@ void nft_byteorder_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_byteorder *priv = nft_expr_priv(expr); - u32 *src = ®s->data[priv->sreg]; + const u32 *src = ®s->data[priv->sreg]; u32 *dst = ®s->data[priv->dreg]; - u16 *s16, *d16; - unsigned int i; - - s16 = (void *)src; - d16 = (void *)dst; switch (priv->size) { case 8: { @@ -43,18 +37,14 @@ void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->op) { case NFT_BYTEORDER_NTOH: - for (i = 0; i < priv->len / 8; i++) { - src64 = nft_reg_load64(&src[i]); - nft_reg_store64(&dst64[i], - be64_to_cpu((__force __be64)src64)); - } + src64 = nft_reg_load64(src); + + nft_reg_store64(dst64, be64_to_cpu((__force __be64)src64)); break; case NFT_BYTEORDER_HTON: - for (i = 0; i < priv->len / 8; i++) { - src64 = (__force __u64) - cpu_to_be64(nft_reg_load64(&src[i])); - nft_reg_store64(&dst64[i], src64); - } + src64 = (__force __u64)cpu_to_be64(nft_reg_load64(src)); + + nft_reg_store64(dst64, src64); break; } break; @@ -62,24 +52,20 @@ void nft_byteorder_eval(const struct nft_expr *expr, case 4: switch (priv->op) { case NFT_BYTEORDER_NTOH: - for (i = 0; i < priv->len / 4; i++) - dst[i] = ntohl((__force __be32)src[i]); + *dst = ntohl((__force __be32)*src); break; case NFT_BYTEORDER_HTON: - for (i = 0; i < priv->len / 4; i++) - dst[i] = (__force __u32)htonl(src[i]); + *dst = (__force __u32)htonl(*src); break; } break; case 2: switch (priv->op) { case NFT_BYTEORDER_NTOH: - for (i = 0; i < priv->len / 2; i++) - d16[i] = ntohs((__force __be16)s16[i]); + nft_reg_store16(dst, ntohs(nft_reg_load_be16(src))); break; case NFT_BYTEORDER_HTON: - for (i = 0; i < priv->len / 2; i++) - d16[i] = (__force __u16)htons(s16[i]); + nft_reg_store_be16(dst, htons(nft_reg_load16(src))); break; } break; @@ -137,20 +123,22 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, if (err < 0) return err; - priv->len = len; + /* no longer support multi-reg conversions */ + if (len != size) + return -EOPNOTSUPP; err = nft_parse_register_load(ctx, tb[NFTA_BYTEORDER_SREG], &priv->sreg, - priv->len); + len); if (err < 0) return err; err = nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, - priv->len); + len); if (err < 0) return err; - if (nft_reg_overlap(priv->sreg, priv->dreg, priv->len)) + if (nft_reg_overlap(priv->sreg, priv->dreg, len)) return -EINVAL; return 0; @@ -167,10 +155,11 @@ static int nft_byteorder_dump(struct sk_buff *skb, goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op))) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len))) - goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size))) goto nla_put_failure; + /* compatibility for old userspace which permitted size != len */ + if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->size))) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index fa2cc556331c..357513c6dcea 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -78,7 +78,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, break; } - if (ct == NULL) + if (!ct || nf_ct_is_template(ct)) goto err; switch (priv->key) { @@ -180,12 +180,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr, tuple = &ct->tuplehash[priv->dir].tuple; switch (priv->key) { case NFT_CT_SRC: - memcpy(dest, tuple->src.u3.all, - nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); + memcpy(dest, tuple->src.u3.all, priv->len); return; case NFT_CT_DST: - memcpy(dest, tuple->dst.u3.all, - nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); + memcpy(dest, tuple->dst.u3.all, priv->len); return; case NFT_CT_PROTO_SRC: nft_reg_store16(dest, (__force u16)tuple->src.u.all); diff --git a/net/netfilter/nft_ct_fast.c b/net/netfilter/nft_ct_fast.c index e684c8a91848..ecf7b3a404be 100644 --- a/net/netfilter/nft_ct_fast.c +++ b/net/netfilter/nft_ct_fast.c @@ -30,7 +30,7 @@ void nft_ct_get_fast_eval(const struct nft_expr *expr, break; } - if (!ct) { + if (!ct || nf_ct_is_template(ct)) { regs->verdict.code = NFT_BREAK; return; } diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 0b987bc2132a..68f7cfbbee06 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -676,7 +676,7 @@ static void nft_tunnel_obj_destroy(const struct nft_ctx *ctx, { struct nft_tunnel_obj *priv = nft_obj_data(obj); - metadata_dst_free(priv->md); + dst_release(&priv->md->dst); } static struct nft_object_type nft_tunnel_obj_type; diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 466da23e36ff..b32d153e3a18 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -91,7 +91,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) if (info->queues_total > 1) { if (info->flags & NFQ_FLAG_CPU_FANOUT) { - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); queue = info->queuenum + cpu % info->queues_total; } else { diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 0c64c504f79d..4001de0c4959 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -656,6 +656,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) sends_out: vfree(ic->i_sends); + ic->i_sends = NULL; ack_dma_out: rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma, diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 332fd9695e54..04ea11c90e03 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -112,11 +112,6 @@ struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, } EXPORT_SYMBOL(tcf_action_set_ctrlact); -/* XXX: For standalone actions, we don't need a RCU grace period either, because - * actions are always connected to filters and filters are already destroyed in - * RCU callbacks, so after a RCU grace period actions are already disconnected - * from filters. Readers later can not find us. - */ static void free_tcf(struct tc_action *p) { struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1); @@ -129,7 +124,7 @@ static void free_tcf(struct tc_action *p) if (chain) tcf_chain_put_by_act(chain); - kfree(p); + kfree_rcu(p, tcfa_rcu); } static void offload_action_hw_count_set(struct tc_action *act, diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index bc20f08a2789..bd3b1da3cd63 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -16,6 +16,8 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/slab.h> +#include <linux/overflow.h> +#include <linux/unaligned.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/pkt_sched.h> @@ -242,7 +244,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, goto out_free_ex; } - nparms->tcfp_off_max_hint = 0; nparms->tcfp_flags = parm->flags; nparms->tcfp_nkeys = parm->nkeys; @@ -268,14 +269,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, BITS_PER_TYPE(int) - 1, nparms->tcfp_keys[i].shift); - /* The AT option can read a single byte, we can bound the actual - * value with uchar max. - */ - cur += (0xff & offmask) >> nparms->tcfp_keys[i].shift; - - /* Each key touches 4 bytes starting from the computed offset */ - nparms->tcfp_off_max_hint = - max(nparms->tcfp_off_max_hint, cur + 4); } p = to_pedit(*a); @@ -318,15 +311,12 @@ static void tcf_pedit_cleanup(struct tc_action *a) call_rcu(&parms->rcu, tcf_pedit_cleanup_rcu); } -static bool offset_valid(struct sk_buff *skb, int offset) +static bool offset_valid(struct sk_buff *skb, int offset, int len) { - if (offset > 0 && offset > skb->len) - return false; - - if (offset < 0 && -offset > skb_headroom(skb)) + if (offset < -(int)skb_headroom(skb)) return false; - return true; + return offset <= (int)skb->len - len; } static int pedit_l4_skb_offset(struct sk_buff *skb, int *hoffset, const int header_type) @@ -393,18 +383,10 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, struct tcf_pedit_key_ex *tkey_ex; struct tcf_pedit_parms *parms; struct tc_pedit_key *tkey; - u32 max_offset; int i; parms = rcu_dereference_bh(p->parms); - max_offset = (skb_transport_header_was_set(skb) ? - skb_transport_offset(skb) : - skb_network_offset(skb)) + - parms->tcfp_off_max_hint; - if (skb_ensure_writable(skb, min(skb->len, max_offset))) - goto done; - tcf_lastuse_update(&p->tcf_tm); tcf_action_update_bstats(&p->common, skb); @@ -412,10 +394,11 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, tkey_ex = parms->tcfp_keys_ex; for (i = parms->tcfp_nkeys; i > 0; i--, tkey++) { + int write_offset, write_len; int offset = tkey->off; int hoffset = 0; - u32 *ptr, hdata; - u32 val; + u32 cur_val, val; + u32 *ptr; int rc; if (tkey_ex) { @@ -433,13 +416,15 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, if (tkey->offmask) { u8 *d, _d; + int at_offset; - if (!offset_valid(skb, hoffset + tkey->at)) { + if (check_add_overflow(hoffset, (int)tkey->at, &at_offset) || + !offset_valid(skb, at_offset, sizeof(_d))) { pr_info_ratelimited("tc action pedit 'at' offset %d out of bounds\n", hoffset + tkey->at); goto bad; } - d = skb_header_pointer(skb, hoffset + tkey->at, + d = skb_header_pointer(skb, at_offset, sizeof(_d), &_d); if (!d) goto bad; @@ -451,31 +436,51 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, } } - if (!offset_valid(skb, hoffset + offset)) { - pr_info_ratelimited("tc action pedit offset %d out of bounds\n", hoffset + offset); + if (check_add_overflow(hoffset, offset, &write_offset)) { + pr_info_ratelimited("tc action pedit offset overflow\n"); goto bad; } - ptr = skb_header_pointer(skb, hoffset + offset, - sizeof(hdata), &hdata); - if (!ptr) + if (!offset_valid(skb, write_offset, sizeof(*ptr))) { + pr_info_ratelimited("tc action pedit offset %d out of bounds\n", + write_offset); goto bad; + } + + if (write_offset < 0) { + if (skb_cow(skb, -write_offset)) + goto bad; + if (write_offset + (int)sizeof(*ptr) > 0) { + if (skb_ensure_writable(skb, + min_t(int, skb->len, + write_offset + (int)sizeof(*ptr)))) + goto bad; + } + } else { + if (check_add_overflow(write_offset, (int)sizeof(*ptr), + &write_len)) + goto bad; + if (skb_ensure_writable(skb, min_t(int, skb->len, + write_len))) + goto bad; + } + + ptr = (u32 *)(skb->data + write_offset); + cur_val = get_unaligned(ptr); /* just do it, baby */ switch (cmd) { case TCA_PEDIT_KEY_EX_CMD_SET: val = tkey->val; break; case TCA_PEDIT_KEY_EX_CMD_ADD: - val = (*ptr + tkey->val) & ~tkey->mask; + val = (cur_val + tkey->val) & ~tkey->mask; break; default: pr_info_ratelimited("tc action pedit bad command (%d)\n", cmd); goto bad; } - *ptr = ((*ptr & tkey->mask) ^ val); - if (ptr == &hdata) - skb_store_bits(skb, hoffset + offset, ptr, 4); + put_unaligned((cur_val & tkey->mask) ^ val, ptr); } goto done; diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 2afb376299fe..d758f5c3e06e 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -266,15 +266,15 @@ static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *t lock_sock(sk); - rep = nlmsg_new(inet_assoc_attr_size(sk, assoc), GFP_KERNEL); - if (!rep) { - release_sock(sk); - return -ENOMEM; + if (ep != assoc->ep || assoc->base.dead) { + err = -ESTALE; + goto out_unlock; } - if (ep != assoc->ep) { - err = -EAGAIN; - goto out; + rep = nlmsg_new(inet_assoc_attr_size(sk, assoc), GFP_KERNEL); + if (!rep) { + err = -ENOMEM; + goto out_unlock; } err = inet_sctp_diag_fill(sk, assoc, rep, req, sk_user_ns(NETLINK_CB(skb).sk), @@ -289,8 +289,9 @@ static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *t return nlmsg_unicast(sock_net(skb->sk)->diag_nlsk, rep, NETLINK_CB(skb).portid); out: - release_sock(sk); kfree_skb(rep); +out_unlock: + release_sock(sk); return err; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index de86ac088289..85264862fb6b 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1730,6 +1730,7 @@ struct sctp_association *sctp_unpack_cookie( struct sctp_signed_cookie *cookie; struct sk_buff *skb = chunk->skb; struct sctp_cookie *bear_cookie; + struct sctp_chunkhdr *ch; enum sctp_scope scope; unsigned int len; ktime_t kt; @@ -1759,6 +1760,10 @@ struct sctp_association *sctp_unpack_cookie( cookie = chunk->subh.cookie_hdr; bear_cookie = &cookie->c; + ch = (struct sctp_chunkhdr *)(bear_cookie + 1); + if (ntohs(ch->length) > len - fixed_size) + goto malformed; + /* Verify the cookie's MAC, if cookie authentication is enabled. */ if (sctp_sk(ep->base.sk)->cookie_auth_enable) { u8 mac[SHA256_DIGEST_SIZE]; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8e89a870780c..9b23c11cbb9e 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2598,11 +2598,7 @@ static enum sctp_disposition sctp_sf_do_5_2_6_stale( */ sctp_add_cmd_sf(commands, SCTP_CMD_DEL_NON_PRIMARY, SCTP_NULL()); - /* If we've sent any data bundled with COOKIE-ECHO we will need to - * resend - */ - sctp_add_cmd_sf(commands, SCTP_CMD_T1_RETRAN, - SCTP_TRANSPORT(asoc->peer.primary_path)); + sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); /* Cast away the const modifier, as we want to just * rerun it through as a sideffect. diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dc71ed79be4a..0d9cd977c7b7 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2886,7 +2886,7 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) return -EAGAIN; } - WRITE_ONCE(u->inq_len, u->inq_len - skb->len); + WRITE_ONCE(u->inq_len, u->inq_len - unix_skb_len(skb)); #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (skb == u->oob_skb) { @@ -3063,11 +3063,12 @@ unlock: unix_detach_fds(&scm, skb); } - if (unix_skb_len(skb)) - break; - spin_lock(&sk->sk_receive_queue.lock); - WRITE_ONCE(u->inq_len, u->inq_len - skb->len); + WRITE_ONCE(u->inq_len, u->inq_len - chunk); + if (unix_skb_len(skb)) { + spin_unlock(&sk->sk_receive_queue.lock); + break; + } __skb_unlink(skb, &sk->sk_receive_queue); spin_unlock(&sk->sk_receive_queue.lock); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 5c1ecd5bfdbc..91516488a742 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -980,8 +980,10 @@ static int vmci_transport_recv_listen(struct sock *sk, err = -EINVAL; } - if (err < 0) + if (err < 0) { vsock_remove_pending(sk, pending); + sk_acceptq_removed(sk); + } release_sock(pending); vmci_transport_release_pending(pending); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7db9cd433801..76c537a6e8b5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6366,6 +6366,9 @@ nl80211_parse_rnr_elems(struct wiphy *wiphy, struct nlattr *attrs, if (ret) return ERR_PTR(ret); + if (num_elems >= 255) + return ERR_PTR(-EINVAL); + num_elems++; } @@ -6711,6 +6714,12 @@ static int nl80211_calculate_ap_params(struct cfg80211_ap_settings *params) return -EINVAL; } + if (!!params->he_cap != !!params->he_oper) + return -EINVAL; + + if (!!params->eht_cap != !!params->eht_oper) + return -EINVAL; + return 0; } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 358cbc9e43d8..27a56ee2e8f0 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1071,6 +1071,7 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev) struct cfg80211_scan_request_int *request; struct cfg80211_scan_request_int *rdev_req = rdev->scan_req; u32 n_channels = 0, idx, i; + int err; if (!(rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ)) { rdev_req->req.first_part = true; @@ -1100,8 +1101,14 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev) rdev_req->req.scan_6ghz = false; rdev_req->req.first_part = true; + err = rdev_scan(rdev, request); + if (err) { + kfree(request); + return err; + } + rdev->int_scan_req = request; - return rdev_scan(rdev, request); + return 0; } void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5e5786cd9af5..f8c8a8c9dfba 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -802,6 +802,7 @@ static int xsk_skb_metadata(struct sk_buff *skb, void *buffer, u32 hr) { struct xsk_tx_metadata *meta = NULL; + u16 csum_start, csum_offset; if (unlikely(pool->tx_metadata_len == 0)) return -EINVAL; @@ -811,13 +812,15 @@ static int xsk_skb_metadata(struct sk_buff *skb, void *buffer, return -EINVAL; if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) { - if (unlikely(meta->request.csum_start + - meta->request.csum_offset + + csum_start = READ_ONCE(meta->request.csum_start); + csum_offset = READ_ONCE(meta->request.csum_offset); + + if (unlikely(csum_start + csum_offset + sizeof(__sum16) > desc->len)) return -EINVAL; - skb->csum_start = hr + meta->request.csum_start; - skb->csum_offset = meta->request.csum_offset; + skb->csum_start = hr + csum_start; + skb->csum_offset = csum_offset; skb->ip_summed = CHECKSUM_PARTIAL; if (unlikely(pool->tx_sw_csum)) { diff --git a/scripts/kconfig/tests/err_repeated_inc/expected_stderr b/scripts/kconfig/tests/err_repeated_inc/expected_stderr index 95d90d6a93c5..53071430ea7d 100644 --- a/scripts/kconfig/tests/err_repeated_inc/expected_stderr +++ b/scripts/kconfig/tests/err_repeated_inc/expected_stderr @@ -1,2 +1,2 @@ -Kconfig.inc1:4: error: Repeated inclusion of Kconfig.inc3 -Kconfig.inc2:3: note: Location of first inclusion of Kconfig.inc3 +Kconfig.inc1:4: error: repeated inclusion of Kconfig.inc3 +Kconfig.inc2:3: note: location of first inclusion of Kconfig.inc3 diff --git a/tools/sched_ext/scx_show_state.py b/tools/sched_ext/scx_show_state.py index 02e43c184d43..446d82807f90 100644 --- a/tools/sched_ext/scx_show_state.py +++ b/tools/sched_ext/scx_show_state.py @@ -27,18 +27,25 @@ def read_static_key(name): def state_str(state): return prog['scx_enable_state_str'][state].string_().decode() +def read_root_ops_name(): + if root: + return root.ops.name.string_().decode() + return '' + +def read_root_field(name, default): + if root: + return getattr(root, name).value_() + return default + root = prog['scx_root'] enable_state = read_atomic("scx_enable_state_var") -if root: - print(f'ops : {root.ops.name.string_().decode()}') -else: - print('ops : ') +print(f'ops : {read_root_ops_name()}') print(f'enabled : {read_static_key("__scx_enabled")}') print(f'switching_all : {read_int("scx_switching_all")}') print(f'switched_all : {read_static_key("__scx_switched_all")}') print(f'enable_state : {state_str(enable_state)} ({enable_state})') -print(f'aborting : {prog["scx_aborting"].value_()}') -print(f'bypass_depth : {prog["scx_bypass_depth"].value_()}') +print(f'aborting : {read_root_field("aborting", False)}') +print(f'bypass_depth : {read_root_field("bypass_depth", 0)}') print(f'nr_rejected : {read_atomic("scx_nr_rejected")}') print(f'enable_seq : {read_atomic("scx_enable_seq")}') diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index a56f4153c64d..683b05062810 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -492,6 +492,16 @@ REMOTE_TEST_MATRIX=( " C1-5:P1 . C1-4:P1 C2-3 . . \ . . . P1 . . p1:5|c11:1-4|c12:5 \ p1:P1|c11:P1|c12:P-1" + # Narrowing cpuset.cpus to previously sibling-excluded CPUs should + # not return CPUs that were never actually owned. + " C1-4:P1 . C1-2:P1 C1-3:P2 . . \ + . . . C3 . . p1:4|c11:1-2|c12:3 \ + p1:P1|c11:P1|c12:P2 3" + # Expanding cpuset.cpus to include a previously sibling-excluded CPU + # after the sibling has become a member should correctly request it. + " C1-4:P1 . C1-2:P1 C1-3:P2 . . \ + . . P0 C2-3 . . p1:1,4|c11:1|c12:2-3 \ + p1:P1|c11:P0|c12:P2 2-3" ) # diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc index 4f5e8c665156..2a680c086047 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc @@ -20,7 +20,7 @@ check_error 'e:foo/^123456789012345678901234567890123456789012345678901234567890 check_error 'e:foo/^bar.1 syscalls/sys_enter_openat' # BAD_EVENT_NAME check_error 'e:foo/bar syscalls/sys_enter_openat arg=^dfd' # BAD_FETCH_ARG -check_error 'e:foo/bar syscalls/sys_enter_openat ^arg=$foo' # BAD_ATTACH_ARG +check_error 'e:foo/bar syscalls/sys_enter_openat arg=^$foo' # BAD_ATTACH_ARG if grep -q '<attached-group>\.<attached-event>.*\[if <filter>\]' README; then check_error 'e:foo/bar syscalls/sys_enter_openat if ^' # NO_EP_FILTER diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index f3da38c54d27..2ed7d803eb54 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -109,6 +109,7 @@ TEST_PROGS := \ test_vxlan_nh.sh \ test_vxlan_nolocalbypass.sh \ test_vxlan_under_vrf.sh \ + test_vxlan_vnifilter_notify.sh \ test_vxlan_vnifiltering.sh \ tfo_passive.sh \ traceroute.sh \ diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c index 3a86be9bda17..6268b5bf50be 100644 --- a/tools/testing/selftests/net/af_unix/scm_inq.c +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -8,8 +8,9 @@ #include "kselftest_harness.h" -#define NR_CHUNKS 100 -#define MSG_LEN 256 +#define NR_CHUNKS 100 +#define MSG_LEN 256 +#define NR_PARTIAL_READS 3 FIXTURE(scm_inq) { @@ -120,4 +121,53 @@ TEST_F(scm_inq, basic) recv_chunks(_metadata, self); } +TEST_F(scm_inq, partial_read) +{ + char buf[MSG_LEN * NR_PARTIAL_READS] = {}; + char cmsg_buf[CMSG_SPACE(sizeof(int))]; + struct msghdr msg = {}; + struct iovec iov = {}; + struct cmsghdr *cmsg; + int err, inq, ret, i; + int remain; + + err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int)); + if (variant->type != SOCK_STREAM) { + ASSERT_EQ(-ENOPROTOOPT, -errno); + return; + } + ASSERT_EQ(0, err); + + ret = send(self->fd[0], buf, sizeof(buf), 0); + ASSERT_EQ(sizeof(buf), ret); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsg_buf; + msg.msg_controllen = sizeof(cmsg_buf); + + iov.iov_base = buf; + iov.iov_len = MSG_LEN; + + for (i = 0; i < NR_PARTIAL_READS; i++) { + remain = MSG_LEN * (NR_PARTIAL_READS - 1 - i); + + memset(buf, 0, MSG_LEN); + memset(cmsg_buf, 0, sizeof(cmsg_buf)); + ret = recvmsg(self->fd[1], &msg, 0); + ASSERT_EQ(MSG_LEN, ret); + + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(NULL, cmsg); + ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg->cmsg_type); + ASSERT_EQ(remain, *(int *)CMSG_DATA(cmsg)); + + ret = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, ret); + ASSERT_EQ(remain, inq); + } +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 5acd12021e6e..4b3f71e66609 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -4100,6 +4100,10 @@ userspace_tests() chk_rm_nr 0 1 chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 + # check counters are not affected by errors at creation time + userspace_pm_add_sf $ns2 10.0.12.2 10 2>/dev/null + chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 kill_events_pids mptcp_lib_kill_group_wait $tests_pid fi diff --git a/tools/testing/selftests/net/test_vxlan_vnifilter_notify.sh b/tools/testing/selftests/net/test_vxlan_vnifilter_notify.sh new file mode 100755 index 000000000000..9d51a9e02ae0 --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_vnifilter_notify.sh @@ -0,0 +1,184 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2034,SC2154,SC2317,SC2329 +# +# Test for VXLAN vnifilter netlink notifications (RTM_NEWTUNNEL / +# RTM_DELTUNNEL). +# +# Verifies that: +# - Adding a new VNI sends a notification +# - Adding a new VNI with a remote sends a notification +# - Deleting a VNI sends a notification +# - Re-adding an existing VNI with the same attributes does not send +# a spurious notification +# - Updating an existing VNI's remote sends a notification +# - Deleting a non-existent VNI does not send a notification + +source lib.sh + +require_command bridge + +VXLAN_DEV=vxlan100 + +ALL_TESTS=" + test_vni_add_notify + test_vni_add_remote_notify + test_vni_del_notify + test_vni_readd_no_notify + test_vni_update_remote_notify + test_vni_del_nonexistent_no_notify +" + +setup_prepare() +{ + setup_ns NS1 + defer cleanup_all_ns + + ip -n "$NS1" link add $VXLAN_DEV type vxlan dstport 4789 \ + local 10.0.0.1 nolearning external vnifilter + ip -n "$NS1" link set $VXLAN_DEV up +} + +# Run bridge monitor in the background, execute a command, then count +# the notification lines. +# Usage: vni_notify_check <command> [args...] +# Sets: NOTIFY_COUNT with the number of notifications observed. +vni_notify_check() +{ + local tmpf cmd_ret monitor_pid + + tmpf=$(mktemp) + defer rm "$tmpf" + + defer_scope_push + ip netns exec "$NS1" bridge monitor vni > "$tmpf" 2>/dev/null & + monitor_pid=$! + defer kill_process "$monitor_pid" + + sleep 0.5 + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "iproute2 'bridge monitor vni' not supported" + return "$RET" + fi + + "$@" + cmd_ret=$? + sleep 0.2 + defer_scope_pop + + NOTIFY_COUNT=$(grep -c "$VXLAN_DEV" "$tmpf") + NOTIFY_COUNT=${NOTIFY_COUNT:-0} + return "$cmd_ret" +} + +# Adding a brand new VNI should produce a notification. +test_vni_add_notify() +{ + RET=0 + + vni_notify_check \ + bridge -n "$NS1" vni add vni 1000 dev "$VXLAN_DEV" + check_err $? "Failed to add VNI" + + [ "$NOTIFY_COUNT" -eq 1 ] + check_err $? "Expected 1 notification for VNI add, got $NOTIFY_COUNT" + + bridge -n "$NS1" vni delete vni 1000 dev "$VXLAN_DEV" 2>/dev/null + + log_test "VNI add sends notification" +} + +# Adding a VNI with a remote should produce a notification. +test_vni_add_remote_notify() +{ + RET=0 + + vni_notify_check \ + bridge -n "$NS1" vni add vni 4000 remote 10.0.0.2 dev "$VXLAN_DEV" + check_err $? "Failed to add VNI with remote" + + [ "$NOTIFY_COUNT" -eq 1 ] + check_err $? "Expected 1 notification for VNI add with remote, got $NOTIFY_COUNT" + + bridge -n "$NS1" vni delete vni 4000 dev "$VXLAN_DEV" + + log_test "VNI add with remote sends notification" +} + +# Deleting a VNI should produce a notification. +test_vni_del_notify() +{ + RET=0 + + bridge -n "$NS1" vni add vni 2000 dev "$VXLAN_DEV" + + vni_notify_check \ + bridge -n "$NS1" vni delete vni 2000 dev "$VXLAN_DEV" + check_err $? "Failed to delete VNI" + + [ "$NOTIFY_COUNT" -eq 1 ] + check_err $? "Expected 1 notification for VNI del, got $NOTIFY_COUNT" + + log_test "VNI delete sends notification" +} + +# Re-adding an existing VNI with the same attributes should not produce +# a notification. +test_vni_readd_no_notify() +{ + RET=0 + + bridge -n "$NS1" vni add vni 3000 dev "$VXLAN_DEV" + + vni_notify_check \ + bridge -n "$NS1" vni add vni 3000 dev "$VXLAN_DEV" + check_err $? "Failed to re-add VNI" + + [ "$NOTIFY_COUNT" -eq 0 ] + check_err $? "Expected 0 notifications for VNI re-add, got $NOTIFY_COUNT" + + bridge -n "$NS1" vni delete vni 3000 dev "$VXLAN_DEV" + + log_test "VNI re-add does not send spurious notification" +} + +# Updating an existing VNI's remote should produce a notification. +test_vni_update_remote_notify() +{ + RET=0 + + bridge -n "$NS1" vni add vni 5000 remote 10.0.0.2 dev "$VXLAN_DEV" + + vni_notify_check \ + bridge -n "$NS1" vni add vni 5000 remote 10.0.0.3 dev "$VXLAN_DEV" + check_err $? "Failed to update VNI remote" + + [ "$NOTIFY_COUNT" -eq 1 ] + check_err $? "Expected 1 notification for VNI remote update, got $NOTIFY_COUNT" + + bridge -n "$NS1" vni delete vni 5000 dev "$VXLAN_DEV" + + log_test "VNI remote update sends notification" +} + +# Deleting a non-existent VNI should not produce a notification. +test_vni_del_nonexistent_no_notify() +{ + RET=0 + + vni_notify_check \ + bridge -n "$NS1" vni delete vni 9999 dev "$VXLAN_DEV" 2>/dev/null + + [ "$NOTIFY_COUNT" -eq 0 ] + check_err $? "Expected 0 notifications for non-existent VNI del, got $NOTIFY_COUNT" + + log_test "Non-existent VNI delete does not send notification" +} + +trap defer_scopes_cleanup EXIT + +setup_prepare +tests_run + +exit "$EXIT_STATUS" diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 89489996fbc1..881f92d7a469 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3520,7 +3520,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm)) return; - WARN_ON_ONCE(!vcpu && !kvm_arch_allow_write_without_running_vcpu(kvm)); + WARN_ON_ONCE(!vcpu && refcount_read(&kvm->users_count) && + !kvm_arch_allow_write_without_running_vcpu(kvm)); #endif if (memslot && kvm_slot_dirty_track_enabled(memslot)) { |
