diff options
Diffstat (limited to 'arch')
34 files changed, 238 insertions, 132 deletions
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index cabdd8f4a248..e8e1c866e413 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1450,7 +1450,8 @@ ENTRY(efi_enter_kernel) @ running beyond the PoU, and so calling cache_off below from @ inside the PE/COFF loader allocated region is unsafe unless @ we explicitly clean it to the PoC. - adr r0, call_cache_fn @ region of code we will + ARM( adrl r0, call_cache_fn ) + THUMB( adr r0, call_cache_fn ) @ region of code we will adr r1, 0f @ run with MMU off bl cache_clean_flush bl cache_off diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 47982889d774..98da446aa0f2 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -1039,13 +1039,13 @@ compatible = "fsl,imx6q-fec"; reg = <0x02188000 0x4000>; interrupt-names = "int0", "pps"; - interrupts-extended = - <&intc 0 118 IRQ_TYPE_LEVEL_HIGH>, - <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <0 118 IRQ_TYPE_LEVEL_HIGH>, + <0 119 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET_REF>; clock-names = "ipg", "ahb", "ptp"; + gpr = <&gpr>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi index 93b89dc1f53b..b310f13a53f2 100644 --- a/arch/arm/boot/dts/imx6qp.dtsi +++ b/arch/arm/boot/dts/imx6qp.dtsi @@ -77,7 +77,6 @@ }; &fec { - /delete-property/interrupts-extended; interrupts = <0 118 IRQ_TYPE_LEVEL_HIGH>, <0 119 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index cc29869d12a3..bf85d6db4931 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -929,7 +929,11 @@ static inline void emit_a32_rsh_i64(const s8 dst[], rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSR operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for LSR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx); @@ -955,7 +959,11 @@ static inline void emit_a32_arsh_i64(const s8 dst[], rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do ARSH operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for ASR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx); @@ -992,21 +1000,35 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], arm_bpf_put_reg32(dst_hi, rd[0], ctx); } +static bool is_ldst_imm(s16 off, const u8 size) +{ + s16 off_max = 0; + + switch (size) { + case BPF_B: + case BPF_W: + off_max = 0xfff; + break; + case BPF_H: + off_max = 0xff; + break; + case BPF_DW: + /* Need to make sure off+4 does not overflow. */ + off_max = 0xfff - 4; + break; + } + return -off_max <= off && off <= off_max; +} + /* *(size *)(dst + off) = src */ static inline void emit_str_r(const s8 dst, const s8 src[], - s32 off, struct jit_ctx *ctx, const u8 sz){ + s16 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s32 off_max; s8 rd; rd = arm_bpf_get_reg32(dst, tmp[1], ctx); - if (sz == BPF_H) - off_max = 0xff; - else - off_max = 0xfff; - - if (off < 0 || off > off_max) { + if (!is_ldst_imm(off, sz)) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx); rd = tmp[0]; @@ -1035,18 +1057,12 @@ static inline void emit_str_r(const s8 dst, const s8 src[], /* dst = *(size*)(src + off) */ static inline void emit_ldx_r(const s8 dst[], const s8 src, - s32 off, struct jit_ctx *ctx, const u8 sz){ + s16 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *rd = is_stacked(dst_lo) ? tmp : dst; s8 rm = src; - s32 off_max; - - if (sz == BPF_H) - off_max = 0xff; - else - off_max = 0xfff; - if (off < 0 || off > off_max) { + if (!is_ldst_imm(off, sz)) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index dd6804a64f1a..fd4e1ce1daf9 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -36,7 +36,7 @@ #include <linux/mm.h> -struct start_info _xen_start_info; +static struct start_info _xen_start_info; struct start_info *xen_start_info = &_xen_start_info; EXPORT_SYMBOL(xen_start_info); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ebc622432831..c4ac0ac25a00 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -49,7 +49,9 @@ #ifndef CONFIG_BROKEN_GAS_INST #ifdef __ASSEMBLY__ -#define __emit_inst(x) .inst (x) +// The space separator is omitted so that __emit_inst(x) can be parsed as +// either an assembler directive or an assembler macro argument. +#define __emit_inst(x) .inst(x) #else #define __emit_inst(x) ".inst " __stringify((x)) "\n\t" #endif diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 354b11e27c07..033a48f30dbb 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -260,18 +260,7 @@ static int __aarch32_alloc_vdso_pages(void) if (ret) return ret; - ret = aarch32_alloc_kuser_vdso_page(); - if (ret) { - unsigned long c_vvar = - (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]); - unsigned long c_vdso = - (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]); - - free_page(c_vvar); - free_page(c_vdso); - } - - return ret; + return aarch32_alloc_kuser_vdso_page(); } #else static int __aarch32_alloc_vdso_pages(void) diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index a0765aa60ea9..1bff55aa2d54 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generated-y += syscall_table.h generic-y += extable.h -generic-y += hardirq.h generic-y += kvm_para.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 6404df613ea3..2b35f9bcf892 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -604,18 +604,19 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ local_irq_disable(); ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + pte = __pte(0); + if (ptep) + pte = *ptep; + local_irq_enable(); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. */ - if (!ptep) { - local_irq_enable(); + if (!pte_present(pte)) { if (page) put_page(page); return RESUME_GUEST; } - pte = *ptep; - local_irq_enable(); hpa = pte_pfn(pte) << PAGE_SHIFT; pte_size = PAGE_SIZE; if (shift) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 9f050064d2a2..aa12cd4078b3 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -815,18 +815,19 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, */ local_irq_disable(); ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + pte = __pte(0); + if (ptep) + pte = *ptep; + local_irq_enable(); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. */ - if (!ptep) { - local_irq_enable(); + if (!pte_present(pte)) { if (page) put_page(page); return RESUME_GUEST; } - pte = *ptep; - local_irq_enable(); /* If we're logging dirty pages, always map single pages */ large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a197258595ef..62f7bfeb709e 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -55,7 +55,7 @@ config RISCV select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MMIOWB select ARCH_HAS_DEBUG_VIRTUAL - select HAVE_EBPF_JIT + select HAVE_EBPF_JIT if MMU select EDAC_SUPPORT select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_SET_DIRECT_MAP diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index cc1985d8750a..d208a9fd6c52 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -110,6 +110,16 @@ static bool is_32b_int(s64 val) return -(1L << 31) <= val && val < (1L << 31); } +static bool in_auipc_jalr_range(s64 val) +{ + /* + * auipc+jalr can reach any signed PC-relative offset in the range + * [-2^31 - 2^11, 2^31 - 2^11). + */ + return (-(1L << 31) - (1L << 11)) <= val && + val < ((1L << 31) - (1L << 11)); +} + static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) { /* Note that the immediate from the add is sign-extended, @@ -380,20 +390,24 @@ static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx) *rd = RV_REG_T2; } -static void emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr, - struct rv_jit_context *ctx) +static int emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr, + struct rv_jit_context *ctx) { s64 upper, lower; if (rvoff && is_21b_int(rvoff) && !force_jalr) { emit(rv_jal(rd, rvoff >> 1), ctx); - return; + return 0; + } else if (in_auipc_jalr_range(rvoff)) { + upper = (rvoff + (1 << 11)) >> 12; + lower = rvoff & 0xfff; + emit(rv_auipc(RV_REG_T1, upper), ctx); + emit(rv_jalr(rd, RV_REG_T1, lower), ctx); + return 0; } - upper = (rvoff + (1 << 11)) >> 12; - lower = rvoff & 0xfff; - emit(rv_auipc(RV_REG_T1, upper), ctx); - emit(rv_jalr(rd, RV_REG_T1, lower), ctx); + pr_err("bpf-jit: target offset 0x%llx is out of range\n", rvoff); + return -ERANGE; } static bool is_signed_bpf_cond(u8 cond) @@ -407,18 +421,16 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) s64 off = 0; u64 ip; u8 rd; + int ret; if (addr && ctx->insns) { ip = (u64)(long)(ctx->insns + ctx->ninsns); off = addr - ip; - if (!is_32b_int(off)) { - pr_err("bpf-jit: target call addr %pK is out of range\n", - (void *)addr); - return -ERANGE; - } } - emit_jump_and_link(RV_REG_RA, off, !fixed, ctx); + ret = emit_jump_and_link(RV_REG_RA, off, !fixed, ctx); + if (ret) + return ret; rd = bpf_to_rv_reg(BPF_REG_0, ctx); emit(rv_addi(rd, RV_REG_A0, 0), ctx); return 0; @@ -429,7 +441,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || BPF_CLASS(insn->code) == BPF_JMP; - int s, e, rvoff, i = insn - ctx->prog->insnsi; + int s, e, rvoff, ret, i = insn - ctx->prog->insnsi; struct bpf_prog_aux *aux = ctx->prog->aux; u8 rd = -1, rs = -1, code = insn->code; s16 off = insn->off; @@ -699,7 +711,9 @@ out_be: /* JUMP off */ case BPF_JMP | BPF_JA: rvoff = rv_offset(i, off, ctx); - emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); + ret = emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); + if (ret) + return ret; break; /* IF (dst COND src) JUMP off */ @@ -801,7 +815,6 @@ out_be: case BPF_JMP | BPF_CALL: { bool fixed; - int ret; u64 addr; mark_call(ctx); @@ -826,7 +839,9 @@ out_be: break; rvoff = epilogue_offset(ctx); - emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); + ret = emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); + if (ret) + return ret; break; /* dst = imm64 */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 8191106bf7b9..bfb481134994 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -393,7 +393,7 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) if (psw_mchk_disabled(vcpu)) active_mask &= ~IRQ_PEND_MCHK_MASK; /* PV guest cpus can have a single interruption injected at a time. */ - if (kvm_s390_pv_cpu_is_protected(vcpu) && + if (kvm_s390_pv_cpu_get_handle(vcpu) && vcpu->arch.sie_block->iictl != IICTL_CODE_NONE) active_mask &= ~(IRQ_PEND_EXT_II_MASK | IRQ_PEND_IO_MASK | diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 19a81024fe16..5dcf9ff12828 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1939,6 +1939,9 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) start = slot + 1; } + if (start >= slots->used_slots) + return slots->used_slots - 1; + if (gfn >= memslots[start].base_gfn && gfn < memslots[start].base_gfn + memslots[start].npages) { atomic_set(&slots->lru_slot, start); diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index b9de2d4fa57e..8d2a68aea1fc 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -412,7 +412,7 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot) + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) return -EINVAL; /* We only have ZONE_NORMAL, so this is easy.. */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d6104ea8af0..1197b5596d5a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -149,7 +149,7 @@ config X86 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 - select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD + select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD select HAVE_ARCH_VMAP_STACK if X86_64 select HAVE_ARCH_WITHIN_STACK_FRAMES select HAVE_ASM_MODVERSIONS diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index b0da5320bcff..624f5d9b0f79 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -20,6 +20,7 @@ #include <linux/mm.h> #include <linux/hyperv.h> #include <linux/slab.h> +#include <linux/kernel.h> #include <linux/cpuhotplug.h> #include <linux/syscore_ops.h> #include <clocksource/hyperv_timer.h> @@ -419,11 +420,14 @@ void hyperv_cleanup(void) } EXPORT_SYMBOL_GPL(hyperv_cleanup); -void hyperv_report_panic(struct pt_regs *regs, long err) +void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) { static bool panic_reported; u64 guest_id; + if (in_die && !panic_on_oops) + return; + /* * We prefer to report panic on 'die' chain as we have proper * registers to report, but if we miss it (e.g. on BUG()) we need diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index cdcf48d52a12..8391c115c0ec 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -178,8 +178,10 @@ extern void efi_free_boot_services(void); extern pgd_t * __init efi_uv1_memmap_phys_prolog(void); extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd); +/* kexec external ABI */ struct efi_setup_data { u64 fw_vendor; + u64 __unused; u64 tables; u64 smbios; u64 reserved[8]; diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 6685e1218959..7063b5a43220 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -41,7 +41,7 @@ struct microcode_amd { unsigned int mpb[0]; }; -#define PATCH_MAX_SIZE PAGE_SIZE +#define PATCH_MAX_SIZE (3 * PAGE_SIZE) #ifdef CONFIG_MICROCODE_AMD extern void __init load_ucode_amd_bsp(unsigned int family); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 07e95dcb40ad..7e9a281e2660 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -237,27 +237,6 @@ enum ssb_mitigation { extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; -/* - * On VMEXIT we must ensure that no RSB predictions learned in the guest - * can be followed in the host, by overwriting the RSB completely. Both - * retpoline and IBRS mitigations for Spectre v2 need this; only on future - * CPUs with IBRS_ALL *might* it be avoided. - */ -static inline void vmexit_fill_RSB(void) -{ -#ifdef CONFIG_RETPOLINE - unsigned long loops; - - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE("jmp 910f", - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), - X86_FEATURE_RETPOLINE) - "910:" - : "=r" (loops), ASM_CALL_CONSTRAINT - : : "memory" ); -#endif -} - static __always_inline void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) { diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index bf08d4508ecb..a19a680542ce 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1119,35 +1119,53 @@ void switch_to_sld(unsigned long tifn) sld_update_msr(!(tifn & _TIF_SLD)); } -#define SPLIT_LOCK_CPU(model) {X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY} - /* - * The following processors have the split lock detection feature. But - * since they don't have the IA32_CORE_CAPABILITIES MSR, the feature cannot - * be enumerated. Enable it by family and model matching on these - * processors. + * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should + * only be trusted if it is confirmed that a CPU model implements a + * specific feature at a particular bit position. + * + * The possible driver data field values: + * + * - 0: CPU models that are known to have the per-core split-lock detection + * feature even though they do not enumerate IA32_CORE_CAPABILITIES. + * + * - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use + * bit 5 to enumerate the per-core split-lock detection feature. */ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { - SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_X), - SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_L), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1), {} }; void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) { - u64 ia32_core_caps = 0; + const struct x86_cpu_id *m; + u64 ia32_core_caps; + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return; - if (c->x86_vendor != X86_VENDOR_INTEL) + m = x86_match_cpu(split_lock_cpu_ids); + if (!m) return; - if (cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) { - /* Enumerate features reported in IA32_CORE_CAPABILITIES MSR. */ + + switch (m->driver_data) { + case 0: + break; + case 1: + if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) + return; rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); - } else if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) { - /* Enumerate split lock detection by family and model. */ - if (x86_match_cpu(split_lock_cpu_ids)) - ia32_core_caps |= MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT; + if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)) + return; + break; + default: + return; } - if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT) - split_lock_setup(); + split_lock_setup(); } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index caa032ce3fe3..ebf34c7bc8bc 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -227,8 +227,8 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); - pr_info("Hyper-V: features 0x%x, hints 0x%x\n", - ms_hyperv.features, ms_hyperv.hints); + pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n", + ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features); ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); @@ -263,6 +263,16 @@ static void __init ms_hyperv_init_platform(void) cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); } + /* + * Hyper-V expects to get crash register data or kmsg when + * crash enlightment is available and system crashes. Set + * crash_kexec_post_notifiers to be true to make sure that + * calling crash enlightment interface before running kdump + * kernel. + */ + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) + crash_kexec_post_notifiers = true; + #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 89049b343c7a..d8cc5223b7ce 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -578,6 +578,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) d->id = id; cpumask_set_cpu(cpu, &d->cpu_mask); + rdt_domain_reconfigure_cdp(r); + if (r->alloc_capable && domain_setup_ctrlval(r, d)) { kfree(d); return; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 181c992f448c..3dd13f3a8b23 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -601,5 +601,6 @@ bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r); bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r); +void rdt_domain_reconfigure_cdp(struct rdt_resource *r); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 064e9ef44cd6..5a359d9fcc05 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1859,6 +1859,19 @@ static int set_cache_qos_cfg(int level, bool enable) return 0; } +/* Restore the qos cfg state when a domain comes online */ +void rdt_domain_reconfigure_cdp(struct rdt_resource *r) +{ + if (!r->alloc_capable) + return; + + if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA]) + l2_qos_cfg_update(&r->alloc_enabled); + + if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA]) + l3_qos_cfg_update(&r->alloc_enabled); +} + /* * Enable or disable the MBA software controller * which helps user specify bandwidth in MBps. @@ -3072,7 +3085,8 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) * If the rdtgroup is a mon group and parent directory * is a valid "mon_groups" directory, remove the mon group. */ - if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) { + if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && + rdtgrp != &rdtgroup_default) { if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { ret = rdtgroup_ctrl_remove(kn, rdtgrp); diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index 4d732a444711..8d5cbe1bbb3b 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -81,7 +81,7 @@ #define UMIP_INST_SLDT 3 /* 0F 00 /0 */ #define UMIP_INST_STR 4 /* 0F 00 /1 */ -const char * const umip_insns[5] = { +static const char * const umip_insns[5] = { [UMIP_INST_SGDT] = "SGDT", [UMIP_INST_SIDT] = "SIDT", [UMIP_INST_SMSW] = "SMSW", diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index a789759b7261..4a3081e9f4b5 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -3,6 +3,10 @@ ccflags-y += -Iarch/x86/kvm ccflags-$(CONFIG_KVM_WERROR) += -Werror +ifeq ($(CONFIG_FRAME_POINTER),y) +OBJECT_FILES_NON_STANDARD_vmenter.o := y +endif + KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0e3fc311d7da..cf912b4aaba8 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/highmem.h> #include <linux/psp-sev.h> +#include <linux/pagemap.h> #include <linux/swap.h> #include "x86.h" @@ -1117,7 +1118,7 @@ int __init sev_hardware_setup(void) /* Maximum number of encrypted guests supported simultaneously */ max_sev_asid = cpuid_ecx(0x8000001F); - if (!max_sev_asid) + if (!svm_sev_enabled()) return 1; /* Minimum ASID value that should be used for SEV guest */ @@ -1156,6 +1157,9 @@ err: void sev_hardware_teardown(void) { + if (!svm_sev_enabled()) + return; + bitmap_free(sev_asid_bitmap); bitmap_free(sev_reclaim_asid_bitmap); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2be5bbae3a40..2f379bacbb26 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3276,7 +3276,7 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) svm_complete_interrupts(svm); } -bool __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); +void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); static void svm_vcpu_run(struct kvm_vcpu *vcpu) { @@ -3330,13 +3330,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) */ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); - local_irq_enable(); - __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs); - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); - #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else @@ -3366,8 +3361,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) reload_tss(vcpu); - local_irq_disable(); - x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); vcpu->arch.cr2 = svm->vmcb->save.cr2; @@ -3411,7 +3404,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) mark_all_clean(svm->vmcb); } -STACK_FRAME_NON_STANDARD(svm_vcpu_run); static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root) { diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index fa1af90067e9..bf944334003a 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -3,6 +3,7 @@ #include <asm/asm.h> #include <asm/bitsperlong.h> #include <asm/kvm_vcpu_regs.h> +#include <asm/nospec-branch.h> #define WORD_SIZE (BITS_PER_LONG / 8) @@ -35,7 +36,6 @@ */ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BP - mov %_ASM_SP, %_ASM_BP #ifdef CONFIG_X86_64 push %r15 push %r14 @@ -78,6 +78,7 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_AX /* Enter guest mode */ + sti 1: vmload %_ASM_AX jmp 3f 2: cmpb $0, kvm_rebooting @@ -99,6 +100,13 @@ SYM_FUNC_START(__svm_vcpu_run) ud2 _ASM_EXTABLE(5b, 6b) 7: + cli + +#ifdef CONFIG_RETPOLINE + /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +#endif + /* "POP" @regs to RAX. */ pop %_ASM_AX diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index cbc9ea2de28f..fd78ffbde644 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5533,8 +5533,25 @@ static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu, return 1 & (b >> (field & 7)); } +static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12) +{ + u32 entry_intr_info = vmcs12->vm_entry_intr_info_field; + + if (nested_cpu_has_mtf(vmcs12)) + return true; + + /* + * An MTF VM-exit may be injected into the guest by setting the + * interruption-type to 7 (other event) and the vector field to 0. Such + * is the case regardless of the 'monitor trap flag' VM-execution + * control. + */ + return entry_intr_info == (INTR_INFO_VALID_MASK + | INTR_TYPE_OTHER_EVENT); +} + /* - * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we + * Return true if we should exit from L2 to L1 to handle an exit, or false if we * should handle it ourselves in L0 (and then continue L2). Only call this * when in is_guest_mode (L2). */ @@ -5633,7 +5650,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) case EXIT_REASON_MWAIT_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); case EXIT_REASON_MONITOR_TRAP_FLAG: - return nested_cpu_has_mtf(vmcs12); + return nested_vmx_exit_handled_mtf(vmcs12); case EXIT_REASON_MONITOR_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING); case EXIT_REASON_PAUSE_INSTRUCTION: diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 83050977490c..c2c6335a998c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4572,7 +4572,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, */ static void kvm_machine_check(void) { -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) +#if defined(CONFIG_X86_MCE) struct pt_regs regs = { .cs = 3, /* Fake ring 3 no matter what the guest ran on */ .flags = X86_EFLAGS_IF, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3bf2ecafd027..c5835f9cb9ad 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3060,6 +3060,17 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_PERF_CTL: case MSR_AMD64_DC_CFG: case MSR_F15H_EX_CFG: + /* + * Intel Sandy Bridge CPUs must support the RAPL (running average power + * limit) MSRs. Just return 0, as we do not want to expose the host + * data here. Do not conditionalize this on CPUID, as KVM does not do + * so for existing CPU-specific MSRs. + */ + case MSR_RAPL_POWER_UNIT: + case MSR_PP0_ENERGY_STATUS: /* Power plane 0 (core) */ + case MSR_PP1_ENERGY_STATUS: /* Power plane 1 (graphics uncore) */ + case MSR_PKG_ENERGY_STATUS: /* Total package */ + case MSR_DRAM_ENERGY_STATUS: /* DRAM controller */ msr_info->data = 0; break; case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: @@ -5049,10 +5060,13 @@ set_identity_unlock: r = -EFAULT; if (copy_from_user(&u.ps, argp, sizeof(u.ps))) goto out; + mutex_lock(&kvm->lock); r = -ENXIO; if (!kvm->arch.vpit) - goto out; + goto set_pit_out; r = kvm_vm_ioctl_set_pit(kvm, &u.ps); +set_pit_out: + mutex_unlock(&kvm->lock); break; } case KVM_GET_PIT2: { @@ -5072,10 +5086,13 @@ set_identity_unlock: r = -EFAULT; if (copy_from_user(&u.ps2, argp, sizeof(u.ps2))) goto out; + mutex_lock(&kvm->lock); r = -ENXIO; if (!kvm->arch.vpit) - goto out; + goto set_pit2_out; r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2); +set_pit2_out: + mutex_unlock(&kvm->lock); break; } case KVM_REINJECT_CONTROL: { diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 211bb9358b73..c5e393f8bb3f 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -202,7 +202,7 @@ virt_to_phys_or_null_size(void *va, unsigned long size) int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - unsigned long pfn, text, pf; + unsigned long pfn, text, pf, rodata; struct page *page; unsigned npages; pgd_t *pgd = efi_mm.pgd; @@ -256,7 +256,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ - npages = (__end_rodata_aligned - _text) >> PAGE_SHIFT; + npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); pfn = text >> PAGE_SHIFT; @@ -266,6 +266,14 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 1; } + npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT; + rodata = __pa(__start_rodata); + pfn = rodata >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) { + pr_err("Failed to map kernel rodata 1:1\n"); + return 1; + } + return 0; } @@ -638,7 +646,7 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, phys_vendor = virt_to_phys_or_null(vnd); phys_data = virt_to_phys_or_null_size(data, data_size); - if (!phys_name || !phys_data) + if (!phys_name || (data && !phys_data)) status = EFI_INVALID_PARAMETER; else status = efi_thunk(set_variable, phys_name, phys_vendor, @@ -669,7 +677,7 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, phys_vendor = virt_to_phys_or_null(vnd); phys_data = virt_to_phys_or_null_size(data, data_size); - if (!phys_name || !phys_data) + if (!phys_name || (data && !phys_data)) status = EFI_INVALID_PARAMETER; else status = efi_thunk(set_variable, phys_name, phys_vendor, |