diff options
Diffstat (limited to 'arch')
80 files changed, 1654 insertions, 1632 deletions
diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 213607a1f45c..e26a278d301a 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -44,20 +44,20 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); /* optinsn template addresses */ -extern __visible kprobe_opcode_t optprobe_template_entry; -extern __visible kprobe_opcode_t optprobe_template_val; -extern __visible kprobe_opcode_t optprobe_template_call; -extern __visible kprobe_opcode_t optprobe_template_end; -extern __visible kprobe_opcode_t optprobe_template_sub_sp; -extern __visible kprobe_opcode_t optprobe_template_add_sp; -extern __visible kprobe_opcode_t optprobe_template_restore_begin; -extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn; -extern __visible kprobe_opcode_t optprobe_template_restore_end; +extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_val[]; +extern __visible kprobe_opcode_t optprobe_template_call[]; +extern __visible kprobe_opcode_t optprobe_template_end[]; +extern __visible kprobe_opcode_t optprobe_template_sub_sp[]; +extern __visible kprobe_opcode_t optprobe_template_add_sp[]; +extern __visible kprobe_opcode_t optprobe_template_restore_begin[]; +extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn[]; +extern __visible kprobe_opcode_t optprobe_template_restore_end[]; #define MAX_OPTIMIZED_LENGTH 4 #define MAX_OPTINSN_SIZE \ - ((unsigned long)&optprobe_template_end - \ - (unsigned long)&optprobe_template_entry) + ((unsigned long)optprobe_template_end - \ + (unsigned long)optprobe_template_entry) #define RELATIVEJUMP_SIZE 4 struct arch_optimized_insn { diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c index 05fe92aa7d98..0529f90395c9 100644 --- a/arch/arm/kernel/perf_regs.c +++ b/arch/arm/kernel/perf_regs.c @@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index 7a449df0b359..c78180172120 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -85,21 +85,21 @@ asm ( "optprobe_template_end:\n"); #define TMPL_VAL_IDX \ - ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry) #define TMPL_CALL_IDX \ - ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry) #define TMPL_END_IDX \ - ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry) #define TMPL_ADD_SP \ - ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry) #define TMPL_SUB_SP \ - ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_BEGIN \ - ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_ORIGN_INSN \ - ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_END \ - ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry) /* * ARM can always optimize an instruction when using ARM ISA, except @@ -234,7 +234,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, (unsigned long *)&optprobe_template_entry, + memcpy(code, (unsigned long *)optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts index f46eb47cfa4d..8161dd237971 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts @@ -75,6 +75,7 @@ &enetc_port0 { phy-handle = <&phy0>; phy-connection-type = "sgmii"; + managed = "in-band-status"; status = "okay"; mdio { diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index e7d98997c09c..162539d4c8cd 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -21,7 +21,7 @@ #define ARM64_HAS_VIRT_HOST_EXTN 11 #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 -#define ARM64_HARDEN_EL2_VECTORS 14 +#define ARM64_SPECTRE_V3A 14 #define ARM64_HAS_CNP 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 04ab88db4b43..461277ae7a48 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -268,6 +268,8 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; /* * CPU feature detected at boot time based on feature of one or more CPUs. * All possible conflicts for a late CPU are ignored. + * NOTE: this means that a late CPU with the feature will *not* cause the + * capability to be advertised by cpus_have_*cap()! */ #define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE \ (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 9e2e9a63c7b6..ef5b040dee44 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,8 @@ #define QCOM_CPU_PART_FALKOR_V1 0x800 #define QCOM_CPU_PART_FALKOR 0xC00 #define QCOM_CPU_PART_KRYO 0x200 +#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 +#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 @@ -116,6 +118,8 @@ #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) +#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 54387ccd1ab2..4a6a77d8d13e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -34,8 +34,6 @@ */ #define KVM_VECTOR_PREAMBLE (2 * AARCH64_INSN_SIZE) -#define __SMCCC_WORKAROUND_1_SMC_SZ 36 - #define KVM_HOST_SMCCC_ID(id) \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ARM_SMCCC_SMC_64, \ @@ -175,7 +173,6 @@ extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; DECLARE_KVM_NVHE_SYM(__per_cpu_start); DECLARE_KVM_NVHE_SYM(__per_cpu_end); -extern atomic_t arm64_el2_vector_last_slot; DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); #define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) @@ -189,8 +186,6 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); -extern void __kvm_enable_ssbs(void); - extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_read_vmcr(void); extern void __vgic_v3_write_vmcr(u32 vmcr); @@ -198,8 +193,6 @@ extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); -extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; - /* * Obtain the PC-relative address of a kernel symbol * s: symbol diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h deleted file mode 100644 index d6bb40122fdb..000000000000 --- a/arch/arm64/include/asm/kvm_coproc.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * Derived from arch/arm/include/asm/kvm_coproc.h - * Copyright (C) 2012 Rusty Russell IBM Corporation - */ - -#ifndef __ARM64_KVM_COPROC_H__ -#define __ARM64_KVM_COPROC_H__ - -#include <linux/kvm_host.h> - -void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); - -struct kvm_sys_reg_table { - const struct sys_reg_desc *table; - size_t num; -}; - -int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); -int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); -int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); -int kvm_handle_cp15_32(struct kvm_vcpu *vcpu); -int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); -int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); - -#define kvm_coproc_table_init kvm_sys_reg_table_init -void kvm_sys_reg_table_init(void); - -struct kvm_one_reg; -int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); -int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); -int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); -unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); - -#endif /* __ARM64_KVM_COPROC_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5ef2669ccd6c..c8f550a53516 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -21,20 +21,25 @@ #include <asm/cputype.h> #include <asm/virt.h> -unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); -unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu); -void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v); +#define CURRENT_EL_SP_EL0_VECTOR 0x0 +#define CURRENT_EL_SP_ELx_VECTOR 0x200 +#define LOWER_EL_AArch64_VECTOR 0x400 +#define LOWER_EL_AArch32_VECTOR 0x600 + +enum exception_type { + except_type_sync = 0, + except_type_irq = 0x80, + except_type_fiq = 0x100, + except_type_serror = 0x180, +}; bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); -void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); +void kvm_skip_instr32(struct kvm_vcpu *vcpu); void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_vabt(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_inject_undef32(struct kvm_vcpu *vcpu); -void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr); static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { @@ -168,30 +173,6 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, vcpu_gp_regs(vcpu)->regs[reg_num] = val; } -static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) -{ - if (vcpu_mode_is_32bit(vcpu)) - return vcpu_read_spsr32(vcpu); - - if (vcpu->arch.sysregs_loaded_on_cpu) - return read_sysreg_el1(SYS_SPSR); - else - return __vcpu_sys_reg(vcpu, SPSR_EL1); -} - -static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) -{ - if (vcpu_mode_is_32bit(vcpu)) { - vcpu_write_spsr32(vcpu, v); - return; - } - - if (vcpu->arch.sysregs_loaded_on_cpu) - write_sysreg_el1(v, SYS_SPSR); - else - __vcpu_sys_reg(vcpu, SPSR_EL1) = v; -} - /* * The layout of SPSR for an AArch32 state is different when observed from an * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 @@ -472,32 +453,9 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, return data; /* Leave LE untouched */ } -static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) -{ - if (vcpu_mode_is_32bit(vcpu)) { - kvm_skip_instr32(vcpu, is_wide_instr); - } else { - *vcpu_pc(vcpu) += 4; - *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; - } - - /* advance the singlestep state machine */ - *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; -} - -/* - * Skip an instruction which has been emulated at hyp while most guest sysregs - * are live. - */ -static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) +static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) { - *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); - vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); - - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - - write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); - write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); + vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; } #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 724c74cfacdd..21ce5c420247 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -119,6 +119,9 @@ struct kvm_arch { */ unsigned long *pmu_filter; unsigned int pmuver; + + u8 pfr0_csv2; + u8 pfr0_csv3; }; struct kvm_vcpu_fault_info { @@ -202,48 +205,6 @@ enum vcpu_sysreg { NR_SYS_REGS /* Nothing after this line! */ }; -/* 32bit mapping */ -#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ -#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ -#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ -#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ -#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ -#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ -#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ -#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ -#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ -#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ -#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ -#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ -#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ -#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ -#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ -#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ -#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ -#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ -#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ -#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ -#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ -#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ -#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ -#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ -#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ -#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ -#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ -#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ -#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ - -#define cp14_DBGDSCRext (MDSCR_EL1 * 2) -#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) -#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) -#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) -#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) -#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) -#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) -#define cp14_DBGVCR (DBGVCR32_EL2 * 2) - -#define NR_COPRO_REGS (NR_SYS_REGS * 2) - struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -254,10 +215,7 @@ struct kvm_cpu_context { struct user_fpsimd_state fp_regs; - union { - u64 sys_regs[NR_SYS_REGS]; - u32 copro[NR_COPRO_REGS]; - }; + u64 sys_regs[NR_SYS_REGS]; struct kvm_vcpu *__hyp_running_vcpu; }; @@ -408,8 +366,33 @@ struct kvm_vcpu_arch { #define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ #define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ #define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ +#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */ +#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */ + +/* + * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can + * take the following values: + * + * For AArch32 EL1: + */ +#define KVM_ARM64_EXCEPT_AA32_UND (0 << 9) +#define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9) +#define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9) +/* For AArch64: */ +#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9) +#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9) +#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ (2 << 9) +#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9) +#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11) +#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11) -#define vcpu_has_sve(vcpu) (system_supports_sve() && \ +/* + * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be + * set together with an exception... + */ +#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */ + +#define vcpu_has_sve(vcpu) (system_supports_sve() && \ ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE)) #ifdef CONFIG_ARM64_PTR_AUTH @@ -439,14 +422,96 @@ struct kvm_vcpu_arch { u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); -/* - * CP14 and CP15 live in the same array, as they are backed by the - * same system registers. - */ -#define CPx_BIAS IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) +static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) +{ + /* + * *** VHE ONLY *** + * + * System registers listed in the switch are not saved on every + * exit from the guest but are only saved on vcpu_put. + * + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but + * should never be listed below, because the guest cannot modify its + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's + * thread when emulating cross-VCPU communication. + */ + if (!has_vhe()) + return false; + + switch (reg) { + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; + case PAR_EL1: *val = read_sysreg_par(); break; + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + default: return false; + } + + return true; +} -#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) -#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) +static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) +{ + /* + * *** VHE ONLY *** + * + * System registers listed in the switch are not restored on every + * entry to the guest but are only restored on vcpu_load. + * + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but + * should never be listed below, because the MPIDR should only be set + * once, before running the VCPU, and never changed later. + */ + if (!has_vhe()) + return false; + + switch (reg) { + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + default: return false; + } + + return true; +} struct kvm_vm_stat { ulong remote_tlb_flush; @@ -472,6 +537,12 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); + +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); + int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); @@ -534,6 +605,17 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); int handle_exit(struct kvm_vcpu *vcpu, int exception_index); void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index); +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu); +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); + +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); + +void kvm_sys_reg_table_init(void); + /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); @@ -653,4 +735,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_vcpu_has_pmu(vcpu) \ + (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 331394306cce..e298191a854d 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -72,6 +72,28 @@ alternative_cb kvm_update_va_mask alternative_cb_end .endm +/* + * Convert a kernel image address to a PA + * reg: kernel address to be converted in place + * tmp: temporary register + * + * The actual code generation takes place in kvm_get_kimage_voffset, and + * the instructions below are only there to reserve the space and + * perform the register allocation (kvm_get_kimage_voffset uses the + * specific registers encoded in the instructions). + */ +.macro kimg_pa reg, tmp +alternative_cb kvm_get_kimage_voffset + movz \tmp, #0 + movk \tmp, #0, lsl #16 + movk \tmp, #0, lsl #32 + movk \tmp, #0, lsl #48 +alternative_cb_end + + /* reg = __pa(reg) */ + sub \reg, \reg, \tmp +.endm + #else #include <linux/pgtable.h> @@ -98,6 +120,24 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) +static __always_inline unsigned long __kimg_hyp_va(unsigned long v) +{ + unsigned long offset; + + asm volatile(ALTERNATIVE_CB("movz %0, #0\n" + "movk %0, #0, lsl #16\n" + "movk %0, #0, lsl #32\n" + "movk %0, #0, lsl #48\n", + kvm_update_kimg_phys_offset) + : "=r" (offset)); + + return __kern_hyp_va((v - offset) | PAGE_OFFSET); +} + +#define kimg_fn_hyp_va(v) ((typeof(*v))(__kimg_hyp_va((unsigned long)(v)))) + +#define kimg_fn_ptr(x) (typeof(x) **)(x) + /* * We currently support using a VM-specified IPA size. For backward * compatibility, the default IPA size is fixed to 40bits. @@ -208,52 +248,6 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } -/* - * EL2 vectors can be mapped and rerouted in a number of ways, - * depending on the kernel configuration and CPU present: - * - * - If the CPU is affected by Spectre-v2, the hardening sequence is - * placed in one of the vector slots, which is executed before jumping - * to the real vectors. - * - * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot - * containing the hardening sequence is mapped next to the idmap page, - * and executed before jumping to the real vectors. - * - * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an - * empty slot is selected, mapped next to the idmap page, and - * executed before jumping to the real vectors. - * - * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with - * VHE, as we don't have hypervisor-specific mappings. If the system - * is VHE and yet selects this capability, it will be ignored. - */ -extern void *__kvm_bp_vect_base; -extern int __kvm_harden_el2_vector_slot; - -static inline void *kvm_get_hyp_vector(void) -{ - struct bp_hardening_data *data = arm64_get_bp_hardening_data(); - void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); - int slot = -1; - - if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) { - vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); - slot = data->hyp_vectors_slot; - } - - if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) { - vect = __kvm_bp_vect_base; - if (slot == -1) - slot = __kvm_harden_el2_vector_slot; - } - - if (slot != -1) - vect += slot * SZ_2K; - - return vect; -} - #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index b2e91c187e2a..75beffe2ee8a 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -12,9 +12,6 @@ #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) #define TTBR_ASID_MASK (UL(0xffff) << 48) -#define BP_HARDEN_EL2_SLOTS 4 -#define __BP_HARDEN_HYP_VECS_SZ (BP_HARDEN_EL2_SLOTS * SZ_2K) - #ifndef __ASSEMBLY__ #include <linux/refcount.h> @@ -41,32 +38,6 @@ static inline bool arm64_kernel_unmapped_at_el0(void) return cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); } -typedef void (*bp_hardening_cb_t)(void); - -struct bp_hardening_data { - int hyp_vectors_slot; - bp_hardening_cb_t fn; -}; - -DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); - -static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) -{ - return this_cpu_ptr(&bp_hardening_data); -} - -static inline void arm64_apply_bp_hardening(void) -{ - struct bp_hardening_data *d; - - if (!cpus_have_const_cap(ARM64_SPECTRE_V2)) - return; - - d = arm64_get_bp_hardening_data(); - if (d->fn) - d->fn(); -} - extern void arm64_memblock_init(void); extern void paging_init(void); extern void bootmem_init(void); diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index fcdfbce302bd..f62ca39da6c5 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -9,7 +9,15 @@ #ifndef __ASM_SPECTRE_H #define __ASM_SPECTRE_H +#define BP_HARDEN_EL2_SLOTS 4 +#define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K) + +#ifndef __ASSEMBLY__ + +#include <linux/percpu.h> + #include <asm/cpufeature.h> +#include <asm/virt.h> /* Watch out, ordering is important here. */ enum mitigation_state { @@ -20,13 +28,70 @@ enum mitigation_state { struct task_struct; +/* + * Note: the order of this enum corresponds to __bp_harden_hyp_vecs and + * we rely on having the direct vectors first. + */ +enum arm64_hyp_spectre_vector { + /* + * Take exceptions directly to __kvm_hyp_vector. This must be + * 0 so that it used by default when mitigations are not needed. + */ + HYP_VECTOR_DIRECT, + + /* + * Bounce via a slot in the hypervisor text mapping of + * __bp_harden_hyp_vecs, which contains an SMC call. + */ + HYP_VECTOR_SPECTRE_DIRECT, + + /* + * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs + * next to the idmap page. + */ + HYP_VECTOR_INDIRECT, + + /* + * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs + * next to the idmap page, which contains an SMC call. + */ + HYP_VECTOR_SPECTRE_INDIRECT, +}; + +typedef void (*bp_hardening_cb_t)(void); + +struct bp_hardening_data { + enum arm64_hyp_spectre_vector slot; + bp_hardening_cb_t fn; +}; + +DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +static inline void arm64_apply_bp_hardening(void) +{ + struct bp_hardening_data *d; + + if (!cpus_have_const_cap(ARM64_SPECTRE_V2)) + return; + + d = this_cpu_ptr(&bp_hardening_data); + if (d->fn) + d->fn(); +} + enum mitigation_state arm64_get_spectre_v2_state(void); bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); +bool has_spectre_v3a(const struct arm64_cpu_capabilities *cap, int scope); +void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused); + enum mitigation_state arm64_get_spectre_v4_state(void); bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused); void spectre_v4_enable_task_mitigation(struct task_struct *tsk); +enum mitigation_state arm64_get_meltdown_state(void); + +#endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 174817ba119c..500efe405b48 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -372,6 +372,8 @@ #define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) #define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) +#define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7) + #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) @@ -404,6 +406,8 @@ #define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) #define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) +#define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7) + /* Definitions for system register interface to AMU for ARMv8.4 onwards */ #define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2)) #define SYS_AMCR_EL0 SYS_AM_EL0(2, 0) @@ -461,6 +465,7 @@ #define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) +#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 61314fd70f13..a63428301f42 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -196,16 +196,6 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, return is_midr_in_range(midr, &range) && has_dic; } -#ifdef CONFIG_RANDOMIZE_BASE - -static const struct midr_range ca57_a72[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - {}, -}; - -#endif - #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 @@ -299,6 +289,8 @@ static const struct midr_range erratum_845719_list[] = { MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), /* Brahma-B53 r0p[0] */ MIDR_REV(MIDR_BRAHMA_B53, 0, 0), + /* Kryo2XX Silver rAp4 */ + MIDR_REV(MIDR_QCOM_KRYO_2XX_SILVER, 0xa, 0x4), {}, }; #endif @@ -459,9 +451,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #ifdef CONFIG_RANDOMIZE_BASE { - .desc = "EL2 vector hardening", - .capability = ARM64_HARDEN_EL2_VECTORS, - ERRATA_MIDR_RANGE_LIST(ca57_a72), + /* Must come after the Spectre-v2 entry */ + .desc = "Spectre-v3a", + .capability = ARM64_SPECTRE_V3A, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_spectre_v3a, + .cpu_enable = spectre_v3a_enable_mitigation, }, #endif { diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index dcc165b3fc04..280b10762f6b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1337,6 +1337,8 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_GOLD), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } @@ -2844,14 +2846,28 @@ static int __init enable_mrs_emulation(void) core_initcall(enable_mrs_emulation); +enum mitigation_state arm64_get_meltdown_state(void) +{ + if (__meltdown_safe) + return SPECTRE_UNAFFECTED; + + if (arm64_kernel_unmapped_at_el0()) + return SPECTRE_MITIGATED; + + return SPECTRE_VULNERABLE; +} + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { - if (__meltdown_safe) + switch (arm64_get_meltdown_state()) { + case SPECTRE_UNAFFECTED: return sprintf(buf, "Not affected\n"); - if (arm64_kernel_unmapped_at_el0()) + case SPECTRE_MITIGATED: return sprintf(buf, "Mitigation: PTI\n"); - return sprintf(buf, "Vulnerable\n"); + default: + return sprintf(buf, "Vulnerable\n"); + } } diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index c615b285ff5b..4b32588918d9 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -64,13 +64,12 @@ __efistub__ctype = _ctype; /* Alternative callbacks for init-time patching of nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); +KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset); +KVM_NVHE_ALIAS(kvm_get_kimage_voffset); /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_vgic_global_state); -/* Kernel constant needed to compute idmap addresses. */ -KVM_NVHE_ALIAS(kimage_voffset); - /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */ KVM_NVHE_ALIAS(__hyp_panic_string); KVM_NVHE_ALIAS(panic); diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 66adee8b5fc8..9ec34690e255 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -127,7 +127,7 @@ static void *image_load(struct kimage *image, kernel_segment->mem, kbuf.bufsz, kernel_segment->memsz); - return 0; + return NULL; } #ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 94e8718e7229..f6f58e6265df 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -73,8 +73,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4784011cecac..a47a40ec6ad9 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -522,14 +522,13 @@ static void erratum_1418040_thread_switch(struct task_struct *prev, bool prev32, next32; u64 val; - if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) && - cpus_have_const_cap(ARM64_WORKAROUND_1418040))) + if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040)) return; prev32 = is_compat_thread(task_thread_info(prev)); next32 = is_compat_thread(task_thread_info(next)); - if (prev32 == next32) + if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040)) return; val = read_sysreg(cntkctl_el1); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index c18eb7d41274..fca03648e270 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Handle detection, reporting and mitigation of Spectre v1, v2 and v4, as + * Handle detection, reporting and mitigation of Spectre v1, v2, v3a and v4, as * detailed at: * * https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability @@ -26,6 +26,7 @@ #include <asm/spectre.h> #include <asm/traps.h> +#include <asm/virt.h> /* * We try to ensure that the mitigation state can never change as the result of @@ -118,6 +119,7 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } @@ -169,72 +171,26 @@ bool has_spectre_v2(const struct arm64_cpu_capabilities *entry, int scope) return true; } -DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); - enum mitigation_state arm64_get_spectre_v2_state(void) { return spectre_v2_state; } -#ifdef CONFIG_KVM -#include <asm/cacheflush.h> -#include <asm/kvm_asm.h> - -atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); - -static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K); - int i; - - for (i = 0; i < SZ_2K; i += 0x80) - memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); - - __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); -} +DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); static void install_bp_hardening_cb(bp_hardening_cb_t fn) { - static DEFINE_RAW_SPINLOCK(bp_lock); - int cpu, slot = -1; - const char *hyp_vecs_start = __smccc_workaround_1_smc; - const char *hyp_vecs_end = __smccc_workaround_1_smc + - __SMCCC_WORKAROUND_1_SMC_SZ; + __this_cpu_write(bp_hardening_data.fn, fn); /* * Vinz Clortho takes the hyp_vecs start/end "keys" at * the door when we're a guest. Skip the hyp-vectors work. */ - if (!is_hyp_mode_available()) { - __this_cpu_write(bp_hardening_data.fn, fn); + if (!is_hyp_mode_available()) return; - } - - raw_spin_lock(&bp_lock); - for_each_possible_cpu(cpu) { - if (per_cpu(bp_hardening_data.fn, cpu) == fn) { - slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); - break; - } - } - - if (slot == -1) { - slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); - __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); - } - __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); - __this_cpu_write(bp_hardening_data.fn, fn); - raw_spin_unlock(&bp_lock); -} -#else -static void install_bp_hardening_cb(bp_hardening_cb_t fn) -{ - __this_cpu_write(bp_hardening_data.fn, fn); + __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT); } -#endif /* CONFIG_KVM */ static void call_smc_arch_workaround_1(void) { @@ -316,6 +272,33 @@ void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused) } /* + * Spectre-v3a. + * + * Phew, there's not an awful lot to do here! We just instruct EL2 to use + * an indirect trampoline for the hyp vectors so that guests can't read + * VBAR_EL2 to defeat randomisation of the hypervisor VA layout. + */ +bool has_spectre_v3a(const struct arm64_cpu_capabilities *entry, int scope) +{ + static const struct midr_range spectre_v3a_unsafe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + {}, + }; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range_list(read_cpuid_id(), spectre_v3a_unsafe_list); +} + +void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused) +{ + struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); + + if (this_cpu_has_cap(ARM64_SPECTRE_V3A)) + data->slot += HYP_VECTOR_INDIRECT; +} + +/* * Spectre v4. * * If you thought Spectre v2 was nasty, wait until you see this mess. A CPU is diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 43ae4e0c968f..62d2bda7adb8 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -66,7 +66,6 @@ static int cpu_psci_cpu_disable(unsigned int cpu) static void cpu_psci_cpu_die(unsigned int cpu) { - int ret; /* * There are no known implementations of PSCI actually using the * power state field, pass a sensible default for now. @@ -74,9 +73,7 @@ static void cpu_psci_cpu_die(unsigned int cpu) u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT; - ret = psci_ops.cpu_off(state); - - pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); + psci_ops.cpu_off(state); } static int cpu_psci_cpu_kill(unsigned int cpu) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 09c96f57818c..18e9727d3f64 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -413,6 +413,7 @@ void cpu_die_early(void) /* Mark this CPU absent */ set_cpu_present(cpu, 0); + rcu_report_dead(cpu); if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { update_cpu_boot_status(CPU_KILL_ME); diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 1504c81fbf5d..60fd181df624 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -13,10 +13,10 @@ obj-$(CONFIG_KVM) += hyp/ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ $(KVM)/vfio.o $(KVM)/irqchip.o \ arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ - inject_fault.o regmap.o va_layout.o handle_exit.o \ + inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ - aarch32.o arch_timer.o \ + arch_timer.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ diff --git a/arch/arm64/kvm/aarch32.c b/arch/arm64/kvm/aarch32.c deleted file mode 100644 index 40a62a99fbf8..000000000000 --- a/arch/arm64/kvm/aarch32.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * (not much of an) Emulation layer for 32bit guests. - * - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * based on arch/arm/kvm/emulate.c - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall <c.dall@virtualopensystems.com> - */ - -#include <linux/bits.h> -#include <linux/kvm_host.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> - -#define DFSR_FSC_EXTABT_LPAE 0x10 -#define DFSR_FSC_EXTABT_nLPAE 0x08 -#define DFSR_LPAE BIT(9) - -/* - * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. - */ -static const u8 return_offsets[8][2] = { - [0] = { 0, 0 }, /* Reset, unused */ - [1] = { 4, 2 }, /* Undefined */ - [2] = { 0, 0 }, /* SVC, unused */ - [3] = { 4, 4 }, /* Prefetch abort */ - [4] = { 8, 8 }, /* Data abort */ - [5] = { 0, 0 }, /* HVC, unused */ - [6] = { 4, 4 }, /* IRQ, unused */ - [7] = { 4, 4 }, /* FIQ, unused */ -}; - -static bool pre_fault_synchronize(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - if (vcpu->arch.sysregs_loaded_on_cpu) { - kvm_arch_vcpu_put(vcpu); - return true; - } - - preempt_enable(); - return false; -} - -static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded) -{ - if (loaded) { - kvm_arch_vcpu_load(vcpu, smp_processor_id()); - preempt_enable(); - } -} - -/* - * When an exception is taken, most CPSR fields are left unchanged in the - * handler. However, some are explicitly overridden (e.g. M[4:0]). - * - * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with - * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was - * obsoleted by the ARMv7 virtualization extensions and is RES0. - * - * For the SPSR layout seen from AArch32, see: - * - ARM DDI 0406C.d, page B1-1148 - * - ARM DDI 0487E.a, page G8-6264 - * - * For the SPSR_ELx layout for AArch32 seen from AArch64, see: - * - ARM DDI 0487E.a, page C5-426 - * - * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from - * MSB to LSB. - */ -static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) -{ - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - unsigned long old, new; - - old = *vcpu_cpsr(vcpu); - new = 0; - - new |= (old & PSR_AA32_N_BIT); - new |= (old & PSR_AA32_Z_BIT); - new |= (old & PSR_AA32_C_BIT); - new |= (old & PSR_AA32_V_BIT); - new |= (old & PSR_AA32_Q_BIT); - - // CPSR.IT[7:0] are set to zero upon any exception - // See ARM DDI 0487E.a, section G1.12.3 - // See ARM DDI 0406C.d, section B1.8.3 - - new |= (old & PSR_AA32_DIT_BIT); - - // CPSR.SSBS is set to SCTLR.DSSBS upon any exception - // See ARM DDI 0487E.a, page G8-6244 - if (sctlr & BIT(31)) - new |= PSR_AA32_SSBS_BIT; - - // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 - // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented - // See ARM DDI 0487E.a, page G8-6246 - new |= (old & PSR_AA32_PAN_BIT); - if (!(sctlr & BIT(23))) - new |= PSR_AA32_PAN_BIT; - - // SS does not exist in AArch32, so ignore - - // CPSR.IL is set to zero upon any exception - // See ARM DDI 0487E.a, page G1-5527 - - new |= (old & PSR_AA32_GE_MASK); - - // CPSR.IT[7:0] are set to zero upon any exception - // See prior comment above - - // CPSR.E is set to SCTLR.EE upon any exception - // See ARM DDI 0487E.a, page G8-6245 - // See ARM DDI 0406C.d, page B4-1701 - if (sctlr & BIT(25)) - new |= PSR_AA32_E_BIT; - - // CPSR.A is unchanged upon an exception to Undefined, Supervisor - // CPSR.A is set upon an exception to other modes - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= (old & PSR_AA32_A_BIT); - if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) - new |= PSR_AA32_A_BIT; - - // CPSR.I is set upon any exception - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= PSR_AA32_I_BIT; - - // CPSR.F is set upon an exception to FIQ - // CPSR.F is unchanged upon an exception to other modes - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= (old & PSR_AA32_F_BIT); - if (mode == PSR_AA32_MODE_FIQ) - new |= PSR_AA32_F_BIT; - - // CPSR.T is set to SCTLR.TE upon any exception - // See ARM DDI 0487E.a, page G8-5514 - // See ARM DDI 0406C.d, page B1-1181 - if (sctlr & BIT(30)) - new |= PSR_AA32_T_BIT; - - new |= mode; - - return new; -} - -static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) -{ - unsigned long spsr = *vcpu_cpsr(vcpu); - bool is_thumb = (spsr & PSR_AA32_T_BIT); - u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - - *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); - - /* Note: These now point to the banked copies */ - vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); - *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; - - /* Branch to exception vector */ - if (sctlr & (1 << 13)) - vect_offset += 0xffff0000; - else /* always have security exceptions */ - vect_offset += vcpu_cp15(vcpu, c12_VBAR); - - *vcpu_pc(vcpu) = vect_offset; -} - -void kvm_inject_undef32(struct kvm_vcpu *vcpu) -{ - bool loaded = pre_fault_synchronize(vcpu); - - prepare_fault32(vcpu, PSR_AA32_MODE_UND, 4); - post_fault_synchronize(vcpu, loaded); -} - -/* - * Modelled after TakeDataAbortException() and TakePrefetchAbortException - * pseudocode. - */ -static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, - unsigned long addr) -{ - u32 vect_offset; - u32 *far, *fsr; - bool is_lpae; - bool loaded; - - loaded = pre_fault_synchronize(vcpu); - - if (is_pabt) { - vect_offset = 12; - far = &vcpu_cp15(vcpu, c6_IFAR); - fsr = &vcpu_cp15(vcpu, c5_IFSR); - } else { /* !iabt */ - vect_offset = 16; - far = &vcpu_cp15(vcpu, c6_DFAR); - fsr = &vcpu_cp15(vcpu, c5_DFSR); - } - - prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); - - *far = addr; - - /* Give the guest an IMPLEMENTATION DEFINED exception */ - is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - if (is_lpae) { - *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; - } else { - /* no need to shuffle FS[4] into DFSR[10] as its 0 */ - *fsr = DFSR_FSC_EXTABT_nLPAE; - } - - post_fault_synchronize(vcpu, loaded); -} - -void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) -{ - inject_abt32(vcpu, false, addr); -} - -void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr) -{ - inject_abt32(vcpu, true, addr); -} diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 97a9332f12cc..7e86207fa2fc 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -35,7 +35,6 @@ #include <asm/kvm_asm.h> #include <asm/kvm_mmu.h> #include <asm/kvm_emulate.h> -#include <asm/kvm_coproc.h> #include <asm/sections.h> #include <kvm/arm_hypercalls.h> @@ -102,6 +101,22 @@ static int kvm_arm_default_max_vcpus(void) return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; } +static void set_default_spectre(struct kvm *kvm) +{ + /* + * The default is to expose CSV2 == 1 if the HW isn't affected. + * Although this is a per-CPU feature, we make it global because + * asymmetric systems are just a nuisance. + * + * Userspace can override this as long as it doesn't promise + * the impossible. + */ + if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) + kvm->arch.pfr0_csv2 = 1; + if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED) + kvm->arch.pfr0_csv3 = 1; +} + /** * kvm_arch_init_vm - initializes a VM data structure * @kvm: pointer to the KVM struct @@ -127,6 +142,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); + set_default_spectre(kvm); + return ret; out_free_stage2_pgd: kvm_free_stage2_pgd(&kvm->arch.mmu); @@ -1322,37 +1339,44 @@ static unsigned long nvhe_percpu_order(void) return size ? get_order(size) : 0; } -static int kvm_map_vectors(void) +/* A lookup table holding the hypervisor VA for each vector slot */ +static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS]; + +static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot) { - /* - * SV2 = ARM64_SPECTRE_V2 - * HEL2 = ARM64_HARDEN_EL2_VECTORS - * - * !SV2 + !HEL2 -> use direct vectors - * SV2 + !HEL2 -> use hardened vectors in place - * !SV2 + HEL2 -> allocate one vector slot and use exec mapping - * SV2 + HEL2 -> use hardened vectors and use exec mapping - */ - if (cpus_have_const_cap(ARM64_SPECTRE_V2)) { - __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs); - __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); - } + return slot - (slot != HYP_VECTOR_DIRECT); +} - if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { - phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs); - unsigned long size = __BP_HARDEN_HYP_VECS_SZ; +static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot) +{ + int idx = __kvm_vector_slot2idx(slot); - /* - * Always allocate a spare vector slot, as we don't - * know yet which CPUs have a BP hardening slot that - * we can reuse. - */ - __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); - return create_hyp_exec_mappings(vect_pa, size, - &__kvm_bp_vect_base); + hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K); +} + +static int kvm_init_vector_slots(void) +{ + int err; + void *base; + + base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); + kvm_init_vector_slot(base, HYP_VECTOR_DIRECT); + + base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); + kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); + + if (!cpus_have_const_cap(ARM64_SPECTRE_V3A)) + return 0; + + if (!has_vhe()) { + err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), + __BP_HARDEN_HYP_VECS_SZ, &base); + if (err) + return err; } + kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT); + kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT); return 0; } @@ -1407,13 +1431,40 @@ static void cpu_hyp_reset(void) __hyp_reset_vectors(); } +/* + * EL2 vectors can be mapped and rerouted in a number of ways, + * depending on the kernel configuration and CPU present: + * + * - If the CPU is affected by Spectre-v2, the hardening sequence is + * placed in one of the vector slots, which is executed before jumping + * to the real vectors. + * + * - If the CPU also has the ARM64_SPECTRE_V3A cap, the slot + * containing the hardening sequence is mapped next to the idmap page, + * and executed before jumping to the real vectors. + * + * - If the CPU only has the ARM64_SPECTRE_V3A cap, then an + * empty slot is selected, mapped next to the idmap page, and + * executed before jumping to the real vectors. + * + * Note that ARM64_SPECTRE_V3A is somewhat incompatible with + * VHE, as we don't have hypervisor-specific mappings. If the system + * is VHE and yet selects this capability, it will be ignored. + */ +static void cpu_set_hyp_vector(void) +{ + struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); + void *vector = hyp_spectre_vector_selector[data->slot]; + + *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector; +} + static void cpu_hyp_reinit(void) { kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); cpu_hyp_reset(); - - *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)kvm_get_hyp_vector(); + cpu_set_hyp_vector(); if (is_kernel_in_hyp_mode()) kvm_timer_init_vhe(); @@ -1552,7 +1603,7 @@ static int init_subsystems(void) goto out; kvm_perf_init(); - kvm_coproc_table_init(); + kvm_sys_reg_table_init(); out: on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); @@ -1643,12 +1694,6 @@ static int init_hyp_mode(void) goto out_err; } - err = kvm_map_vectors(); - if (err) { - kvm_err("Cannot map vectors\n"); - goto out_err; - } - /* * Map the Hyp stack pages */ @@ -1792,6 +1837,12 @@ int kvm_arch_init(void *opaque) goto out_err; } + err = kvm_init_vector_slots(); + if (err) { + kvm_err("Cannot initialise vector slots\n"); + goto out_err; + } + err = init_subsystems(); if (err) goto out_hyp; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dfb5218137ca..9bbd30e62799 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -24,7 +24,6 @@ #include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_emulate.h> -#include <asm/kvm_coproc.h> #include <asm/sigcontext.h> #include "trace.h" @@ -252,10 +251,32 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) memcpy(addr, valp, KVM_REG_SIZE(reg->id)); if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { - int i; + int i, nr_reg; + + switch (*vcpu_cpsr(vcpu)) { + /* + * Either we are dealing with user mode, and only the + * first 15 registers (+ PC) must be narrowed to 32bit. + * AArch32 r0-r14 conveniently map to AArch64 x0-x14. + */ + case PSR_AA32_MODE_USR: + case PSR_AA32_MODE_SYS: + nr_reg = 15; + break; + + /* + * Otherwide, this is a priviledged mode, and *all* the + * registers must be narrowed to 32bit. + */ + default: + nr_reg = 31; + break; + } + + for (i = 0; i < nr_reg; i++) + vcpu_set_reg(vcpu, i, (u32)vcpu_get_reg(vcpu, i)); - for (i = 0; i < 16; i++) - *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i); + *vcpu_pc(vcpu) = (u32)*vcpu_pc(vcpu); } out: return err; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 5d690d60ccad..cebe39f3b1b6 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -14,7 +14,6 @@ #include <asm/esr.h> #include <asm/exception.h> #include <asm/kvm_asm.h> -#include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> #include <asm/debug-monitors.h> @@ -61,7 +60,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * otherwise return to the same address... */ vcpu_set_reg(vcpu, 0, ~0UL); - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); return 1; } @@ -100,7 +99,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) kvm_clear_request(KVM_REQ_UNHALT, vcpu); } - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); return 1; } @@ -221,7 +220,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu) * that fail their condition code check" */ if (!kvm_condition_valid(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); handled = 1; } else { exit_handle_fn exit_handler; @@ -241,23 +240,6 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) { struct kvm_run *run = vcpu->run; - if (ARM_SERROR_PENDING(exception_index)) { - u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); - - /* - * HVC/SMC already have an adjusted PC, which we need - * to correct in order to return to after having - * injected the SError. - */ - if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 || - esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) { - u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2; - *vcpu_pc(vcpu) -= adj; - } - - return 1; - } - exception_index = ARM_EXCEPTION_CODE(exception_index); switch (exception_index) { diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 4a81eddabcd8..687598e41b21 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \ -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o smccc_wa.o +obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c index ae56d8a4b382..f98cbe2626a1 100644 --- a/arch/arm64/kvm/hyp/aarch32.c +++ b/arch/arm64/kvm/hyp/aarch32.c @@ -123,13 +123,13 @@ static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) * kvm_skip_instr - skip a trapped instruction and proceed to the next * @vcpu: The vcpu pointer */ -void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +void kvm_skip_instr32(struct kvm_vcpu *vcpu) { u32 pc = *vcpu_pc(vcpu); bool is_thumb; is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); - if (is_thumb && !is_wide_instr) + if (is_thumb && !kvm_vcpu_trap_il_is32bit(vcpu)) pc += 2; else pc += 4; diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c new file mode 100644 index 000000000000..73629094f903 --- /dev/null +++ b/arch/arm64/kvm/hyp/exception.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Fault injection for both 32 and 64bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <hyp/adjust_pc.h> +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> + +#if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__) +#error Hypervisor code only! +#endif + +static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +{ + u64 val; + + if (__vcpu_read_sys_reg_from_cpu(reg, &val)) + return val; + + return __vcpu_sys_reg(vcpu, reg); +} + +static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +{ + if (__vcpu_write_sys_reg_to_cpu(val, reg)) + return; + + __vcpu_sys_reg(vcpu, reg) = val; +} + +static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) +{ + write_sysreg_el1(val, SYS_SPSR); +} + +static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) +{ + if (has_vhe()) + write_sysreg(val, spsr_abt); + else + vcpu->arch.ctxt.spsr_abt = val; +} + +static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val) +{ + if (has_vhe()) + write_sysreg(val, spsr_und); + else + vcpu->arch.ctxt.spsr_und = val; +} + +/* + * This performs the exception entry at a given EL (@target_mode), stashing PC + * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. + * The EL passed to this function *must* be a non-secure, privileged mode with + * bit 0 being set (PSTATE.SP == 1). + * + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, + enum exception_type type) +{ + unsigned long sctlr, vbar, old, new, mode; + u64 exc_offset; + + mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); + + if (mode == target_mode) + exc_offset = CURRENT_EL_SP_ELx_VECTOR; + else if ((mode | PSR_MODE_THREAD_BIT) == target_mode) + exc_offset = CURRENT_EL_SP_EL0_VECTOR; + else if (!(mode & PSR_MODE32_BIT)) + exc_offset = LOWER_EL_AArch64_VECTOR; + else + exc_offset = LOWER_EL_AArch32_VECTOR; + + switch (target_mode) { + case PSR_MODE_EL1h: + vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL1); + sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); + break; + default: + /* Don't do that */ + BUG(); + } + + *vcpu_pc(vcpu) = vbar + exc_offset + type; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_N_BIT); + new |= (old & PSR_Z_BIT); + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + + // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + + new |= (old & PSR_DIT_BIT); + + // PSTATE.UAO is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D5-2579. + + // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 + // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page D5-2578. + new |= (old & PSR_PAN_BIT); + if (!(sctlr & SCTLR_EL1_SPAN)) + new |= PSR_PAN_BIT; + + // PSTATE.SS is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D2-2452. + + // PSTATE.IL is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D1-2306. + + // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 + // See ARM DDI 0487E.a, page D13-3258 + if (sctlr & SCTLR_ELx_DSSBS) + new |= PSR_SSBS_BIT; + + // PSTATE.BTYPE is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. + + new |= PSR_D_BIT; + new |= PSR_A_BIT; + new |= PSR_I_BIT; + new |= PSR_F_BIT; + + new |= target_mode; + + *vcpu_cpsr(vcpu) = new; + __vcpu_write_spsr(vcpu, old); +} + +/* + * When an exception is taken, most CPSR fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). + * + * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with + * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was + * obsoleted by the ARMv7 virtualization extensions and is RES0. + * + * For the SPSR layout seen from AArch32, see: + * - ARM DDI 0406C.d, page B1-1148 + * - ARM DDI 0487E.a, page G8-6264 + * + * For the SPSR_ELx layout for AArch32 seen from AArch64, see: + * - ARM DDI 0487E.a, page C5-426 + * + * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) +{ + u32 sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_AA32_N_BIT); + new |= (old & PSR_AA32_Z_BIT); + new |= (old & PSR_AA32_C_BIT); + new |= (old & PSR_AA32_V_BIT); + new |= (old & PSR_AA32_Q_BIT); + + // CPSR.IT[7:0] are set to zero upon any exception + // See ARM DDI 0487E.a, section G1.12.3 + // See ARM DDI 0406C.d, section B1.8.3 + + new |= (old & PSR_AA32_DIT_BIT); + + // CPSR.SSBS is set to SCTLR.DSSBS upon any exception + // See ARM DDI 0487E.a, page G8-6244 + if (sctlr & BIT(31)) + new |= PSR_AA32_SSBS_BIT; + + // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 + // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page G8-6246 + new |= (old & PSR_AA32_PAN_BIT); + if (!(sctlr & BIT(23))) + new |= PSR_AA32_PAN_BIT; + + // SS does not exist in AArch32, so ignore + + // CPSR.IL is set to zero upon any exception + // See ARM DDI 0487E.a, page G1-5527 + + new |= (old & PSR_AA32_GE_MASK); + + // CPSR.IT[7:0] are set to zero upon any exception + // See prior comment above + + // CPSR.E is set to SCTLR.EE upon any exception + // See ARM DDI 0487E.a, page G8-6245 + // See ARM DDI 0406C.d, page B4-1701 + if (sctlr & BIT(25)) + new |= PSR_AA32_E_BIT; + + // CPSR.A is unchanged upon an exception to Undefined, Supervisor + // CPSR.A is set upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_A_BIT); + if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) + new |= PSR_AA32_A_BIT; + + // CPSR.I is set upon any exception + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= PSR_AA32_I_BIT; + + // CPSR.F is set upon an exception to FIQ + // CPSR.F is unchanged upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_F_BIT); + if (mode == PSR_AA32_MODE_FIQ) + new |= PSR_AA32_F_BIT; + + // CPSR.T is set to SCTLR.TE upon any exception + // See ARM DDI 0487E.a, page G8-5514 + // See ARM DDI 0406C.d, page B1-1181 + if (sctlr & BIT(30)) + new |= PSR_AA32_T_BIT; + + new |= mode; + + return new; +} + +/* + * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. + */ +static const u8 return_offsets[8][2] = { + [0] = { 0, 0 }, /* Reset, unused */ + [1] = { 4, 2 }, /* Undefined */ + [2] = { 0, 0 }, /* SVC, unused */ + [3] = { 4, 4 }, /* Prefetch abort */ + [4] = { 8, 8 }, /* Data abort */ + [5] = { 0, 0 }, /* HVC, unused */ + [6] = { 4, 4 }, /* IRQ, unused */ + [7] = { 4, 4 }, /* FIQ, unused */ +}; + +static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) +{ + unsigned long spsr = *vcpu_cpsr(vcpu); + bool is_thumb = (spsr & PSR_AA32_T_BIT); + u32 sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + u32 return_address; + + *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); + return_address = *vcpu_pc(vcpu); + return_address += return_offsets[vect_offset >> 2][is_thumb]; + + /* KVM only enters the ABT and UND modes, so only deal with those */ + switch(mode) { + case PSR_AA32_MODE_ABT: + __vcpu_write_spsr_abt(vcpu, host_spsr_to_spsr32(spsr)); + vcpu_gp_regs(vcpu)->compat_lr_abt = return_address; + break; + + case PSR_AA32_MODE_UND: + __vcpu_write_spsr_und(vcpu, host_spsr_to_spsr32(spsr)); + vcpu_gp_regs(vcpu)->compat_lr_und = return_address; + break; + } + + /* Branch to exception vector */ + if (sctlr & (1 << 13)) + vect_offset += 0xffff0000; + else /* always have security exceptions */ + vect_offset += __vcpu_read_sys_reg(vcpu, VBAR_EL1); + + *vcpu_pc(vcpu) = vect_offset; +} + +void kvm_inject_exception(struct kvm_vcpu *vcpu) +{ + if (vcpu_el1_is_32bit(vcpu)) { + switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { + case KVM_ARM64_EXCEPT_AA32_UND: + enter_exception32(vcpu, PSR_AA32_MODE_UND, 4); + break; + case KVM_ARM64_EXCEPT_AA32_IABT: + enter_exception32(vcpu, PSR_AA32_MODE_ABT, 12); + break; + case KVM_ARM64_EXCEPT_AA32_DABT: + enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16); + break; + default: + /* Err... */ + break; + } + } else { + switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { + case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_EXCEPT_AA64_EL1): + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + break; + default: + /* + * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ} + * will be implemented at some point. Everything + * else gets silently ignored. + */ + break; + } + } +} diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 0a5b36eb54b3..d179056e1af8 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -13,6 +13,7 @@ #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> #include <asm/mmu.h> +#include <asm/spectre.h> .macro save_caller_saved_regs_vect /* x0 and x1 were saved in the vector entry */ @@ -187,52 +188,60 @@ SYM_CODE_START(__kvm_hyp_vector) valid_vect el1_error // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_vector) -.macro hyp_ventry - .align 7 +.macro spectrev2_smccc_wa1_smc + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + add sp, sp, #(8 * 2) +.endm + +.macro hyp_ventry indirect, spectrev2 + .align 7 1: esb - .rept 26 - nop - .endr -/* - * The default sequence is to directly branch to the KVM vectors, - * using the computed offset. This applies for VHE as well as - * !ARM64_HARDEN_EL2_VECTORS. The first vector must always run the preamble. - * - * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced - * with: - * - * stp x0, x1, [sp, #-16]! - * movz x0, #(addr & 0xffff) - * movk x0, #((addr >> 16) & 0xffff), lsl #16 - * movk x0, #((addr >> 32) & 0xffff), lsl #32 - * br x0 - * - * Where: - * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE. - * See kvm_patch_vector_branch for details. - */ -alternative_cb kvm_patch_vector_branch + .if \spectrev2 != 0 + spectrev2_smccc_wa1_smc + .else stp x0, x1, [sp, #-16]! - b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE) + .endif + .if \indirect != 0 + alternative_cb kvm_patch_vector_branch + /* + * For ARM64_SPECTRE_V3A configurations, these NOPs get replaced with: + * + * movz x0, #(addr & 0xffff) + * movk x0, #((addr >> 16) & 0xffff), lsl #16 + * movk x0, #((addr >> 32) & 0xffff), lsl #32 + * br x0 + * + * Where: + * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE. + * See kvm_patch_vector_branch for details. + */ nop nop nop -alternative_cb_end + nop + alternative_cb_end + .endif + b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE) .endm -.macro generate_vectors +.macro generate_vectors indirect, spectrev2 0: .rept 16 - hyp_ventry + hyp_ventry \indirect, \spectrev2 .endr .org 0b + SZ_2K // Safety measure .endm .align 11 SYM_CODE_START(__bp_harden_hyp_vecs) - .rept BP_HARDEN_EL2_SLOTS - generate_vectors - .endr + generate_vectors indirect = 0, spectrev2 = 1 // HYP_VECTOR_SPECTRE_DIRECT + generate_vectors indirect = 1, spectrev2 = 0 // HYP_VECTOR_INDIRECT + generate_vectors indirect = 1, spectrev2 = 1 // HYP_VECTOR_SPECTRE_INDIRECT 1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ .org 1b SYM_CODE_END(__bp_harden_hyp_vecs) diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h new file mode 100644 index 000000000000..b1f60923a8fe --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Guest PC manipulation helpers + * + * Copyright (C) 2012,2013 - ARM Ltd + * Copyright (C) 2020 - Google LLC + * Author: Marc Zyngier <maz@kernel.org> + */ + +#ifndef __ARM64_KVM_HYP_ADJUST_PC_H__ +#define __ARM64_KVM_HYP_ADJUST_PC_H__ + +#include <asm/kvm_emulate.h> +#include <asm/kvm_host.h> + +void kvm_inject_exception(struct kvm_vcpu *vcpu); + +static inline void kvm_skip_instr(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) { + kvm_skip_instr32(vcpu); + } else { + *vcpu_pc(vcpu) += 4; + *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; + } + + /* advance the singlestep state machine */ + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; +} + +/* + * Skip an instruction which has been emulated at hyp while most guest sysregs + * are live. + */ +static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) +{ + *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); + vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); + + kvm_skip_instr(vcpu); + + write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); + write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); +} + +/* + * Adjust the guest PC on entry, depending on flags provided by EL1 + * for the purpose of emulation (MMIO, sysreg) or exception injection. + */ +static inline void __adjust_pc(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) { + kvm_inject_exception(vcpu); + vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { + kvm_skip_instr(vcpu); + vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC; + } +} + +#endif diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 1f875a8f20c4..84473574c2e7 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -7,6 +7,8 @@ #ifndef __ARM64_KVM_HYP_SWITCH_H__ #define __ARM64_KVM_HYP_SWITCH_H__ +#include <hyp/adjust_pc.h> + #include <linux/arm-smccc.h> #include <linux/kvm_host.h> #include <linux/types.h> @@ -409,6 +411,21 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); + if (ARM_SERROR_PENDING(*exit_code)) { + u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); + + /* + * HVC already have an adjusted PC, which we need to + * correct in order to return to after having injected + * the SError. + * + * SMC, on the other hand, is *trapped*, meaning its + * preferred return address is the SMC itself. + */ + if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64) + write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR); + } + /* * We're using the raw exception code in order to only process * the trap if no SError is pending. We will come back to the diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index ddde15fe85f2..77b8c4e06f2f 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -8,7 +8,7 @@ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o + ../fpsimd.o ../hyp-entry.o ../exception.o ## ## Build rules for compiling nVHE hyp code diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index ed27f06a31ba..fe2740b224cf 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -13,8 +13,6 @@ .text SYM_FUNC_START(__host_exit) - stp x0, x1, [sp, #-16]! - get_host_ctxt x0, x1 /* Store the host regs x2 and x3 */ @@ -99,13 +97,15 @@ SYM_FUNC_END(__hyp_do_panic) mrs x0, esr_el2 lsr x0, x0, #ESR_ELx_EC_SHIFT cmp x0, #ESR_ELx_EC_HVC64 - ldp x0, x1, [sp], #16 b.ne __host_exit + ldp x0, x1, [sp] // Don't fixup the stack yet + /* Check for a stub HVC call */ cmp x0, #HVC_STUB_HCALL_NR b.hs __host_exit + add sp, sp, #16 /* * Compute the idmap address of __kvm_handle_stub_hvc and * jump there. Since we use kimage_voffset, do not use the @@ -115,10 +115,7 @@ SYM_FUNC_END(__hyp_do_panic) * Preserve x0-x4, which may contain stub parameters. */ ldr x5, =__kvm_handle_stub_hvc - ldr_l x6, kimage_voffset - - /* x5 = __pa(x5) */ - sub x5, x5, x6 + kimg_pa x5, x6 br x5 .L__vect_end\@: .if ((.L__vect_end\@ - .L__vect_start\@) > 0x80) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index e2eafe2c93af..82df7fc24760 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -12,106 +12,150 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> -#include <kvm/arm_hypercalls.h> - -static void handle_host_hcall(unsigned long func_id, - struct kvm_cpu_context *host_ctxt) -{ - unsigned long ret = 0; - - switch (func_id) { - case KVM_HOST_SMCCC_FUNC(__kvm_vcpu_run): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)r1; - - ret = __kvm_vcpu_run(kern_hyp_va(vcpu)); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_flush_vm_context): - __kvm_flush_vm_context(); - break; - case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid_ipa): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; - phys_addr_t ipa = host_ctxt->regs.regs[2]; - int level = host_ctxt->regs.regs[3]; - - __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; - - __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_local_vmid): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; - - __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_timer_set_cntvoff): { - u64 cntvoff = host_ctxt->regs.regs[1]; - - __kvm_timer_set_cntvoff(cntvoff); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_enable_ssbs): - __kvm_enable_ssbs(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_get_ich_vtr_el2): - ret = __vgic_v3_get_ich_vtr_el2(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_read_vmcr): - ret = __vgic_v3_read_vmcr(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_write_vmcr): { - u32 vmcr = host_ctxt->regs.regs[1]; - - __vgic_v3_write_vmcr(vmcr); - break; - } - case KVM_HOST_SMCCC_FUNC(__vgic_v3_init_lrs): - __vgic_v3_init_lrs(); - break; - case KVM_HOST_SMCCC_FUNC(__kvm_get_mdcr_el2): - ret = __kvm_get_mdcr_el2(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_save_aprs): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1; - - __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); - break; - } - case KVM_HOST_SMCCC_FUNC(__vgic_v3_restore_aprs): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1; - - __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); - break; - } - default: - /* Invalid host HVC. */ - host_ctxt->regs.regs[0] = SMCCC_RET_NOT_SUPPORTED; - return; - } - - host_ctxt->regs.regs[0] = SMCCC_RET_SUCCESS; - host_ctxt->regs.regs[1] = ret; +#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] +#define DECLARE_REG(type, name, ctxt, reg) \ + type name = (type)cpu_reg(ctxt, (reg)) + +static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); +} + +static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt) +{ + __kvm_flush_vm_context(); +} + +static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2); + DECLARE_REG(int, level, host_ctxt, 3); + + __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level); +} + +static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + + __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); +} + +static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + + __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); +} + +static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt) +{ + __kvm_timer_set_cntvoff(cpu_reg(host_ctxt, 1)); +} + +static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt) +{ + u64 tmp; + + tmp = read_sysreg_el2(SYS_SCTLR); + tmp |= SCTLR_ELx_DSSBS; + write_sysreg_el2(tmp, SYS_SCTLR); +} + +static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __vgic_v3_get_ich_vtr_el2(); +} + +static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr(); +} + +static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt) +{ + __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1)); +} + +static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt) +{ + __vgic_v3_init_lrs(); +} + +static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2(); +} + +static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); + + __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); +} + +static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); + + __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); +} + +typedef void (*hcall_t)(struct kvm_cpu_context *); + +#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x) + +static const hcall_t *host_hcall[] = { + HANDLE_FUNC(__kvm_vcpu_run), + HANDLE_FUNC(__kvm_flush_vm_context), + HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), + HANDLE_FUNC(__kvm_tlb_flush_vmid), + HANDLE_FUNC(__kvm_tlb_flush_local_vmid), + HANDLE_FUNC(__kvm_timer_set_cntvoff), + HANDLE_FUNC(__kvm_enable_ssbs), + HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2), + HANDLE_FUNC(__vgic_v3_read_vmcr), + HANDLE_FUNC(__vgic_v3_write_vmcr), + HANDLE_FUNC(__vgic_v3_init_lrs), + HANDLE_FUNC(__kvm_get_mdcr_el2), + HANDLE_FUNC(__vgic_v3_save_aprs), + HANDLE_FUNC(__vgic_v3_restore_aprs), +}; + +static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(unsigned long, id, host_ctxt, 0); + const hcall_t *kfn; + hcall_t hfn; + + id -= KVM_HOST_SMCCC_ID(0); + + if (unlikely(id >= ARRAY_SIZE(host_hcall))) + goto inval; + + kfn = host_hcall[id]; + if (unlikely(!kfn)) + goto inval; + + cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS; + + hfn = kimg_fn_hyp_va(kfn); + hfn(host_ctxt); + + return; +inval: + cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED; } void handle_trap(struct kvm_cpu_context *host_ctxt) { u64 esr = read_sysreg_el2(SYS_ESR); - unsigned long func_id; - if (ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64) + if (unlikely(ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64)) hyp_panic(); - func_id = host_ctxt->regs.regs[0]; - handle_host_hcall(func_id, host_ctxt); + handle_host_hcall(host_ctxt); } diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 8ae8160bc93a..3e50ff35aa4f 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -4,6 +4,7 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#include <hyp/adjust_pc.h> #include <hyp/switch.h> #include <hyp/sysreg-sr.h> @@ -189,6 +190,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_save_state_nvhe(host_ctxt); + __adjust_pc(vcpu); + /* * We must restore the 32-bit state before the sysregs, thanks * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c index 88a25fc8fcd3..29305022bc04 100644 --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -33,14 +33,3 @@ void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) __sysreg_restore_user_state(ctxt); __sysreg_restore_el2_return_state(ctxt); } - -void __kvm_enable_ssbs(void) -{ - u64 tmp; - - asm volatile( - "mrs %0, sctlr_el2\n" - "orr %0, %0, %1\n" - "msr sctlr_el2, %0" - : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); -} diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S deleted file mode 100644 index b0441dbdf68b..000000000000 --- a/arch/arm64/kvm/hyp/smccc_wa.S +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2015-2018 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/arm-smccc.h> -#include <linux/linkage.h> - -#include <asm/kvm_asm.h> -#include <asm/kvm_mmu.h> - - /* - * This is not executed directly and is instead copied into the vectors - * by install_bp_hardening_cb(). - */ - .data - .pushsection .rodata - .global __smccc_workaround_1_smc -SYM_DATA_START(__smccc_workaround_1_smc) - esb - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - smc #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ - .org 1b -SYM_DATA_END(__smccc_workaround_1_smc) diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index bd1bab551d48..8f0585640241 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -4,6 +4,8 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#include <hyp/adjust_pc.h> + #include <linux/compiler.h> #include <linux/irqchip/arm-gic.h> #include <linux/kvm_host.h> diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 452f4cacd674..80406f463c28 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -4,6 +4,8 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#include <hyp/adjust_pc.h> + #include <linux/compiler.h> #include <linux/irqchip/arm-gic-v3.h> #include <linux/kvm_host.h> diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 461e97c375cc..96bec0ecf9dd 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -8,4 +8,4 @@ ccflags-y := -D__KVM_VHE_HYPERVISOR__ obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o + ../fpsimd.o ../hyp-entry.o ../exception.o diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 62546e20b251..af8e940d0f03 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -4,6 +4,7 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#include <hyp/adjust_pc.h> #include <hyp/switch.h> #include <linux/arm-smccc.h> @@ -133,6 +134,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __load_guest_stage2(vcpu->arch.hw_mmu); __activate_traps(vcpu); + __adjust_pc(vcpu); + sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 34a96ab244fa..b47df73e98d7 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -14,119 +14,15 @@ #include <asm/kvm_emulate.h> #include <asm/esr.h> -#define CURRENT_EL_SP_EL0_VECTOR 0x0 -#define CURRENT_EL_SP_ELx_VECTOR 0x200 -#define LOWER_EL_AArch64_VECTOR 0x400 -#define LOWER_EL_AArch32_VECTOR 0x600 - -enum exception_type { - except_type_sync = 0, - except_type_irq = 0x80, - except_type_fiq = 0x100, - except_type_serror = 0x180, -}; - -/* - * This performs the exception entry at a given EL (@target_mode), stashing PC - * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. - * The EL passed to this function *must* be a non-secure, privileged mode with - * bit 0 being set (PSTATE.SP == 1). - * - * When an exception is taken, most PSTATE fields are left unchanged in the - * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all - * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx - * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. - * - * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. - * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. - * - * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from - * MSB to LSB. - */ -static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, - enum exception_type type) -{ - unsigned long sctlr, vbar, old, new, mode; - u64 exc_offset; - - mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); - - if (mode == target_mode) - exc_offset = CURRENT_EL_SP_ELx_VECTOR; - else if ((mode | PSR_MODE_THREAD_BIT) == target_mode) - exc_offset = CURRENT_EL_SP_EL0_VECTOR; - else if (!(mode & PSR_MODE32_BIT)) - exc_offset = LOWER_EL_AArch64_VECTOR; - else - exc_offset = LOWER_EL_AArch32_VECTOR; - - switch (target_mode) { - case PSR_MODE_EL1h: - vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1); - sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); - vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); - break; - default: - /* Don't do that */ - BUG(); - } - - *vcpu_pc(vcpu) = vbar + exc_offset + type; - - old = *vcpu_cpsr(vcpu); - new = 0; - - new |= (old & PSR_N_BIT); - new |= (old & PSR_Z_BIT); - new |= (old & PSR_C_BIT); - new |= (old & PSR_V_BIT); - - // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) - - new |= (old & PSR_DIT_BIT); - - // PSTATE.UAO is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, page D5-2579. - - // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 - // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented - // See ARM DDI 0487E.a, page D5-2578. - new |= (old & PSR_PAN_BIT); - if (!(sctlr & SCTLR_EL1_SPAN)) - new |= PSR_PAN_BIT; - - // PSTATE.SS is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, page D2-2452. - - // PSTATE.IL is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, page D1-2306. - - // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 - // See ARM DDI 0487E.a, page D13-3258 - if (sctlr & SCTLR_ELx_DSSBS) - new |= PSR_SSBS_BIT; - - // PSTATE.BTYPE is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. - - new |= PSR_D_BIT; - new |= PSR_A_BIT; - new |= PSR_I_BIT; - new |= PSR_F_BIT; - - new |= target_mode; - - *vcpu_cpsr(vcpu) = new; - vcpu_write_spsr(vcpu, old); -} - static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u32 esr = 0; - enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_PENDING_EXCEPTION); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -156,7 +52,9 @@ static void inject_undef64(struct kvm_vcpu *vcpu) { u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_PENDING_EXCEPTION); /* * Build an unknown exception, depending on the instruction @@ -168,6 +66,53 @@ static void inject_undef64(struct kvm_vcpu *vcpu) vcpu_write_sys_reg(vcpu, esr, ESR_EL1); } +#define DFSR_FSC_EXTABT_LPAE 0x10 +#define DFSR_FSC_EXTABT_nLPAE 0x08 +#define DFSR_LPAE BIT(9) +#define TTBCR_EAE BIT(31) + +static void inject_undef32(struct kvm_vcpu *vcpu) +{ + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND | + KVM_ARM64_PENDING_EXCEPTION); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr) +{ + u64 far; + u32 fsr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + if (vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE) { + fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; + } else { + /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + fsr = DFSR_FSC_EXTABT_nLPAE; + } + + far = vcpu_read_sys_reg(vcpu, FAR_EL1); + + if (is_pabt) { + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT | + KVM_ARM64_PENDING_EXCEPTION); + far &= GENMASK(31, 0); + far |= (u64)addr << 32; + vcpu_write_sys_reg(vcpu, fsr, IFSR32_EL2); + } else { /* !iabt */ + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT | + KVM_ARM64_PENDING_EXCEPTION); + far &= GENMASK(63, 32); + far |= addr; + vcpu_write_sys_reg(vcpu, fsr, ESR_EL1); + } + + vcpu_write_sys_reg(vcpu, far, FAR_EL1); +} + /** * kvm_inject_dabt - inject a data abort into the guest * @vcpu: The VCPU to receive the data abort @@ -179,7 +124,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (vcpu_el1_is_32bit(vcpu)) - kvm_inject_dabt32(vcpu, addr); + inject_abt32(vcpu, false, addr); else inject_abt64(vcpu, false, addr); } @@ -195,7 +140,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (vcpu_el1_is_32bit(vcpu)) - kvm_inject_pabt32(vcpu, addr); + inject_abt32(vcpu, true, addr); else inject_abt64(vcpu, true, addr); } @@ -210,7 +155,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) void kvm_inject_undefined(struct kvm_vcpu *vcpu) { if (vcpu_el1_is_32bit(vcpu)) - kvm_inject_undef32(vcpu); + inject_undef32(vcpu); else inject_undef64(vcpu); } diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 6a2826f1bf5e..3e2d8ba11a02 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -115,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) * The MMIO instruction is emulated and should not be re-executed * in the guest. */ - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); return 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 57972bdb213a..1f41173e6149 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -788,10 +788,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } switch (vma_shift) { +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SHIFT: if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) break; fallthrough; +#endif case CONT_PMD_SHIFT: vma_shift = PMD_SHIFT; fallthrough; @@ -1014,7 +1016,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * cautious, and skip the instruction. */ if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); ret = 1; goto out_unlock; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 2ed5ef8f274b..398f6df1bbe4 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -384,7 +384,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = &vcpu->arch.pmu; bool overflow; - if (!kvm_arm_pmu_v3_ready(vcpu)) + if (!kvm_vcpu_has_pmu(vcpu)) return; overflow = !!kvm_pmu_overflow_status(vcpu); @@ -825,9 +825,12 @@ bool kvm_arm_support_pmu_v3(void) int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) { - if (!vcpu->arch.pmu.created) + if (!kvm_vcpu_has_pmu(vcpu)) return 0; + if (!vcpu->arch.pmu.created) + return -EINVAL; + /* * A valid interrupt configuration for the PMU is either to have a * properly configured interrupt number and using an in-kernel @@ -835,9 +838,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) */ if (irqchip_in_kernel(vcpu->kvm)) { int irq = vcpu->arch.pmu.irq_num; - if (!kvm_arm_pmu_irq_initialized(vcpu)) - return -EINVAL; - /* * If we are using an in-kernel vgic, at this point we know * the vgic will be initialized, so we can check the PMU irq @@ -851,7 +851,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) } kvm_pmu_vcpu_reset(vcpu); - vcpu->arch.pmu.ready = true; return 0; } @@ -913,8 +912,7 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - if (!kvm_arm_support_pmu_v3() || - !test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (vcpu->arch.pmu.created) @@ -1015,7 +1013,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!irqchip_in_kernel(vcpu->kvm)) return -EINVAL; - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (!kvm_arm_pmu_irq_initialized(vcpu)) @@ -1035,8 +1033,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_PMU_V3_IRQ: case KVM_ARM_VCPU_PMU_V3_INIT: case KVM_ARM_VCPU_PMU_V3_FILTER: - if (kvm_arm_support_pmu_v3() && - test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + if (kvm_vcpu_has_pmu(vcpu)) return 0; } diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c deleted file mode 100644 index accc1d5fba61..000000000000 --- a/arch/arm64/kvm/regmap.c +++ /dev/null @@ -1,224 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * Derived from arch/arm/kvm/emulate.c: - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall <c.dall@virtualopensystems.com> - */ - -#include <linux/mm.h> -#include <linux/kvm_host.h> -#include <asm/kvm_emulate.h> -#include <asm/ptrace.h> - -#define VCPU_NR_MODES 6 -#define REG_OFFSET(_reg) \ - (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long)) - -#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R)) - -static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { - /* USR Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), - REG_OFFSET(pc) - }, - - /* FIQ Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), - REG_OFFSET(compat_r8_fiq), /* r8 */ - REG_OFFSET(compat_r9_fiq), /* r9 */ - REG_OFFSET(compat_r10_fiq), /* r10 */ - REG_OFFSET(compat_r11_fiq), /* r11 */ - REG_OFFSET(compat_r12_fiq), /* r12 */ - REG_OFFSET(compat_sp_fiq), /* r13 */ - REG_OFFSET(compat_lr_fiq), /* r14 */ - REG_OFFSET(pc) - }, - - /* IRQ Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_irq), /* r13 */ - REG_OFFSET(compat_lr_irq), /* r14 */ - REG_OFFSET(pc) - }, - - /* SVC Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_svc), /* r13 */ - REG_OFFSET(compat_lr_svc), /* r14 */ - REG_OFFSET(pc) - }, - - /* ABT Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_abt), /* r13 */ - REG_OFFSET(compat_lr_abt), /* r14 */ - REG_OFFSET(pc) - }, - - /* UND Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_und), /* r13 */ - REG_OFFSET(compat_lr_und), /* r14 */ - REG_OFFSET(pc) - }, -}; - -/* - * Return a pointer to the register number valid in the current mode of - * the virtual CPU. - */ -unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) -{ - unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.regs; - unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; - - switch (mode) { - case PSR_AA32_MODE_USR ... PSR_AA32_MODE_SVC: - mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */ - break; - - case PSR_AA32_MODE_ABT: - mode = 4; - break; - - case PSR_AA32_MODE_UND: - mode = 5; - break; - - case PSR_AA32_MODE_SYS: - mode = 0; /* SYS maps to USR */ - break; - - default: - BUG(); - } - - return reg_array + vcpu_reg_offsets[mode][reg_num]; -} - -/* - * Return the SPSR for the current mode of the virtual CPU. - */ -static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu) -{ - unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; - switch (mode) { - case PSR_AA32_MODE_SVC: return KVM_SPSR_SVC; - case PSR_AA32_MODE_ABT: return KVM_SPSR_ABT; - case PSR_AA32_MODE_UND: return KVM_SPSR_UND; - case PSR_AA32_MODE_IRQ: return KVM_SPSR_IRQ; - case PSR_AA32_MODE_FIQ: return KVM_SPSR_FIQ; - default: BUG(); - } -} - -unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) -{ - int spsr_idx = vcpu_spsr32_mode(vcpu); - - if (!vcpu->arch.sysregs_loaded_on_cpu) { - switch (spsr_idx) { - case KVM_SPSR_SVC: - return __vcpu_sys_reg(vcpu, SPSR_EL1); - case KVM_SPSR_ABT: - return vcpu->arch.ctxt.spsr_abt; - case KVM_SPSR_UND: - return vcpu->arch.ctxt.spsr_und; - case KVM_SPSR_IRQ: - return vcpu->arch.ctxt.spsr_irq; - case KVM_SPSR_FIQ: - return vcpu->arch.ctxt.spsr_fiq; - } - } - - switch (spsr_idx) { - case KVM_SPSR_SVC: - return read_sysreg_el1(SYS_SPSR); - case KVM_SPSR_ABT: - return read_sysreg(spsr_abt); - case KVM_SPSR_UND: - return read_sysreg(spsr_und); - case KVM_SPSR_IRQ: - return read_sysreg(spsr_irq); - case KVM_SPSR_FIQ: - return read_sysreg(spsr_fiq); - default: - BUG(); - } -} - -void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) -{ - int spsr_idx = vcpu_spsr32_mode(vcpu); - - if (!vcpu->arch.sysregs_loaded_on_cpu) { - switch (spsr_idx) { - case KVM_SPSR_SVC: - __vcpu_sys_reg(vcpu, SPSR_EL1) = v; - break; - case KVM_SPSR_ABT: - vcpu->arch.ctxt.spsr_abt = v; - break; - case KVM_SPSR_UND: - vcpu->arch.ctxt.spsr_und = v; - break; - case KVM_SPSR_IRQ: - vcpu->arch.ctxt.spsr_irq = v; - break; - case KVM_SPSR_FIQ: - vcpu->arch.ctxt.spsr_fiq = v; - break; - } - - return; - } - - switch (spsr_idx) { - case KVM_SPSR_SVC: - write_sysreg_el1(v, SYS_SPSR); - break; - case KVM_SPSR_ABT: - write_sysreg(v, spsr_abt); - break; - case KVM_SPSR_UND: - write_sysreg(v, spsr_und); - break; - case KVM_SPSR_IRQ: - write_sysreg(v, spsr_irq); - break; - case KVM_SPSR_FIQ: - write_sysreg(v, spsr_fiq); - break; - } -} diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index c7985f4607a9..47f3f035f3ea 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -25,7 +25,6 @@ #include <asm/ptrace.h> #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> -#include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> #include <asm/virt.h> @@ -234,6 +233,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) pstate = VCPU_RESET_PSTATE_EL1; } + if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) { + ret = -EINVAL; + goto out; + } break; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index fb12d3ef423a..3313dedfa505 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -20,7 +20,6 @@ #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/kvm_arm.h> -#include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> @@ -64,87 +63,6 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, return false; } -static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) -{ - /* - * System registers listed in the switch are not saved on every - * exit from the guest but are only saved on vcpu_put. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the guest cannot modify its - * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's - * thread when emulating cross-VCPU communication. - */ - switch (reg) { - case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; - case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; - case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; - case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; - case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; - case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; - case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; - case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; - case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; - case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; - case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; - case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; - case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; - case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; - case PAR_EL1: *val = read_sysreg_par(); break; - case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; - case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; - default: return false; - } - - return true; -} - -static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) -{ - /* - * System registers listed in the switch are not restored on every - * entry to the guest but are only restored on vcpu_load. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the MPIDR should only be set - * once, before running the VCPU, and never changed later. - */ - switch (reg) { - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; - case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; - default: return false; - } - - return true; -} - u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) { u64 val = 0x8badf00d8badf00d; @@ -169,7 +87,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) static u32 cache_levels; /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ -#define CSSELR_MAX 12 +#define CSSELR_MAX 14 /* Which cache CCSIDR represents depends on CSSELR value. */ static u32 get_ccsidr(u32 csselr) @@ -209,6 +127,24 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, return true; } +static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift) +{ + switch (r->aarch32_map) { + case AA32_LO: + *mask = GENMASK_ULL(31, 0); + *shift = 0; + break; + case AA32_HI: + *mask = GENMASK_ULL(63, 32); + *shift = 32; + break; + default: + *mask = GENMASK_ULL(63, 0); + *shift = 0; + break; + } +} + /* * Generic accessor for VM registers. Only called as long as HCR_TVM * is set. If the guest enables the MMU, we stop trapping the VM @@ -219,26 +155,21 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { bool was_enabled = vcpu_has_cache_enabled(vcpu); - u64 val; - int reg = r->reg; + u64 val, mask, shift; BUG_ON(!p->is_write); - /* See the 32bit mapping in kvm_host.h */ - if (p->is_aarch32) - reg = r->reg / 2; + get_access_mask(r, &mask, &shift); - if (!p->is_aarch32 || !p->is_32bit) { - val = p->regval; + if (~mask) { + val = vcpu_read_sys_reg(vcpu, r->reg); + val &= ~mask; } else { - val = vcpu_read_sys_reg(vcpu, reg); - if (r->reg % 2) - val = (p->regval << 32) | (u64)lower_32_bits(val); - else - val = ((u64)upper_32_bits(val) << 32) | - lower_32_bits(p->regval); + val = 0; } - vcpu_write_sys_reg(vcpu, val, reg); + + val |= (p->regval & (mask >> shift)) << shift; + vcpu_write_sys_reg(vcpu, val, r->reg); kvm_toggle_cache(vcpu, was_enabled); return true; @@ -248,17 +179,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { + u64 mask, shift; + if (p->is_write) return ignore_write(vcpu, p); - p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); - - if (p->is_aarch32) { - if (r->Op2 & 2) - p->regval = upper_32_bits(p->regval); - else - p->regval = lower_32_bits(p->regval); - } + get_access_mask(r, &mask, &shift); + p->regval = (vcpu_read_sys_reg(vcpu, r->reg) & mask) >> shift; return true; } @@ -285,7 +212,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, * equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure * group. */ - if (p->is_aarch32) { + if (p->Op0 == 0) { /* AArch32 */ switch (p->Op1) { default: /* Keep GCC quiet */ case 0: /* ICC_SGI1R */ @@ -296,7 +223,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, g1 = false; break; } - } else { + } else { /* AArch64 */ switch (p->Op2) { default: /* Keep GCC quiet */ case 5: /* ICC_SGI1R_EL1 */ @@ -438,26 +365,30 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, */ static void reg_to_dbg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *rd, u64 *dbg_reg) { - u64 val = p->regval; + u64 mask, shift, val; - if (p->is_32bit) { - val &= 0xffffffffUL; - val |= ((*dbg_reg >> 32) << 32); - } + get_access_mask(rd, &mask, &shift); + val = *dbg_reg; + val &= ~mask; + val |= (p->regval & (mask >> shift)) << shift; *dbg_reg = val; + vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; } static void dbg_to_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *rd, u64 *dbg_reg) { - p->regval = *dbg_reg; - if (p->is_32bit) - p->regval &= 0xffffffffUL; + u64 mask, shift; + + get_access_mask(rd, &mask, &shift); + p->regval = (*dbg_reg & mask) >> shift; } static bool trap_bvr(struct kvm_vcpu *vcpu, @@ -467,9 +398,9 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -509,9 +440,9 @@ static bool trap_bcr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -552,9 +483,9 @@ static bool trap_wvr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]); @@ -595,9 +526,9 @@ static bool trap_wcr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -678,8 +609,9 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) { u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0); - bool enabled = (reg & flags) || vcpu_mode_priv(vcpu); + bool enabled = kvm_vcpu_has_pmu(vcpu); + enabled &= (reg & flags) || vcpu_mode_priv(vcpu); if (!enabled) kvm_inject_undefined(vcpu); @@ -711,9 +643,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 val; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -740,9 +669,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_event_counter_el0_disabled(vcpu)) return false; @@ -761,9 +687,6 @@ static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 pmceid; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - BUG_ON(p->is_write); if (pmu_access_el0_disabled(vcpu)) @@ -794,10 +717,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - u64 idx; - - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); + u64 idx = ~0UL; if (r->CRn == 9 && r->CRm == 13) { if (r->Op2 == 2) { @@ -813,8 +733,6 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, return false; idx = ARMV8_PMU_CYCLE_IDX; - } else { - return false; } } else if (r->CRn == 0 && r->CRm == 9) { /* PMCCNTR */ @@ -828,10 +746,11 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, return false; idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); - } else { - return false; } + /* Catch any decoding mistake */ + WARN_ON(idx == ~0UL); + if (!pmu_counter_idx_valid(vcpu, idx)) return false; @@ -852,9 +771,6 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 idx, reg; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -892,9 +808,6 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 val, mask; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -923,13 +836,8 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 mask = kvm_pmu_valid_counter_mask(vcpu); - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - - if (!vcpu_mode_priv(vcpu)) { - kvm_inject_undefined(vcpu); + if (check_pmu_access_disabled(vcpu, 0)) return false; - } if (p->is_write) { u64 val = p->regval & mask; @@ -952,9 +860,6 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 mask = kvm_pmu_valid_counter_mask(vcpu); - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -977,9 +882,6 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 mask; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (!p->is_write) return read_from_write_only(vcpu, p, r); @@ -994,8 +896,10 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); + if (!kvm_vcpu_has_pmu(vcpu)) { + kvm_inject_undefined(vcpu); + return false; + } if (p->is_write) { if (!vcpu_mode_priv(vcpu)) { @@ -1038,8 +942,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } -static bool access_amu(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) { kvm_inject_undefined(vcpu); @@ -1047,33 +951,25 @@ static bool access_amu(struct kvm_vcpu *vcpu, struct sys_reg_params *p, } /* Macro to expand the AMU counter and type registers*/ -#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), access_amu } -#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), access_amu } -#define AMU_AMEVCNTR1_EL0(n) { SYS_DESC(SYS_AMEVCNTR1_EL0(n)), access_amu } -#define AMU_AMEVTYPER1_EL0(n) { SYS_DESC(SYS_AMEVTYPER1_EL0(n)), access_amu } - -static bool trap_ptrauth(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) -{ - /* - * If we land here, that is because we didn't fixup the access on exit - * by allowing the PtrAuth sysregs. The only way this happens is when - * the guest does not have PtrAuth support enabled. - */ - kvm_inject_undefined(vcpu); - - return false; -} +#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), undef_access } +#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), undef_access } +#define AMU_AMEVCNTR1_EL0(n) { SYS_DESC(SYS_AMEVCNTR1_EL0(n)), undef_access } +#define AMU_AMEVTYPER1_EL0(n) { SYS_DESC(SYS_AMEVTYPER1_EL0(n)), undef_access } static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN_USER | REG_HIDDEN_GUEST; + return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN; } +/* + * If we land here on a PtrAuth access, that is because we didn't + * fixup the access on exit by allowing the PtrAuth sysregs. The only + * way this happens is when the guest does not have PtrAuth support + * enabled. + */ #define __PTRAUTH_KEY(k) \ - { SYS_DESC(SYS_## k), trap_ptrauth, reset_unknown, k, \ + { SYS_DESC(SYS_## k), undef_access, reset_unknown, k, \ .visibility = ptrauth_visibility} #define PTRAUTH_KEY(k) \ @@ -1128,9 +1024,10 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, if (!vcpu_has_sve(vcpu)) val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT); val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT); - if (!(val & (0xfUL << ID_AA64PFR0_CSV2_SHIFT)) && - arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) - val |= (1UL << ID_AA64PFR0_CSV2_SHIFT); + val &= ~(0xfUL << ID_AA64PFR0_CSV2_SHIFT); + val |= ((u64)vcpu->kvm->arch.pfr0_csv2 << ID_AA64PFR0_CSV2_SHIFT); + val &= ~(0xfUL << ID_AA64PFR0_CSV3_SHIFT); + val |= ((u64)vcpu->kvm->arch.pfr0_csv3 << ID_AA64PFR0_CSV3_SHIFT); } else if (id == SYS_ID_AA64PFR1_EL1) { val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT); } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) { @@ -1139,10 +1036,15 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, (0xfUL << ID_AA64ISAR1_GPA_SHIFT) | (0xfUL << ID_AA64ISAR1_GPI_SHIFT)); } else if (id == SYS_ID_AA64DFR0_EL1) { + u64 cap = 0; + /* Limit guests to PMUv3 for ARMv8.1 */ + if (kvm_vcpu_has_pmu(vcpu)) + cap = ID_AA64DFR0_PMUVER_8_1; + val = cpuid_feature_cap_perfmon_field(val, ID_AA64DFR0_PMUVER_SHIFT, - ID_AA64DFR0_PMUVER_8_1); + cap); } else if (id == SYS_ID_DFR0_EL1) { /* Limit guests to PMUv3 for ARMv8.1 */ val = cpuid_feature_cap_perfmon_field(val, @@ -1153,6 +1055,22 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, return val; } +static unsigned int id_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + u32 id = sys_reg((u32)r->Op0, (u32)r->Op1, + (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + + switch (id) { + case SYS_ID_AA64ZFR0_EL1: + if (!vcpu_has_sve(vcpu)) + return REG_RAZ; + break; + } + + return 0; +} + /* cpufeature ID register access trap handlers */ static bool __access_id_reg(struct kvm_vcpu *vcpu, @@ -1171,7 +1089,9 @@ static bool access_id_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - return __access_id_reg(vcpu, p, r, false); + bool raz = sysreg_visible_as_raz(vcpu, r); + + return __access_id_reg(vcpu, p, r, raz); } static bool access_raz_id_reg(struct kvm_vcpu *vcpu, @@ -1192,71 +1112,48 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu, if (vcpu_has_sve(vcpu)) return 0; - return REG_HIDDEN_USER | REG_HIDDEN_GUEST; + return REG_HIDDEN; } -/* Visibility overrides for SVE-specific ID registers */ -static unsigned int sve_id_visibility(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) -{ - if (vcpu_has_sve(vcpu)) - return 0; - - return REG_HIDDEN_USER; -} - -/* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */ -static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu) -{ - if (!vcpu_has_sve(vcpu)) - return 0; - - return read_sanitised_ftr_reg(SYS_ID_AA64ZFR0_EL1); -} - -static bool access_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) -{ - if (p->is_write) - return write_to_read_only(vcpu, p, rd); - - p->regval = guest_id_aa64zfr0_el1(vcpu); - return true; -} - -static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) -{ - u64 val; - - if (WARN_ON(!vcpu_has_sve(vcpu))) - return -ENOENT; - - val = guest_id_aa64zfr0_el1(vcpu); - return reg_to_user(uaddr, &val, reg->id); -} - -static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) +static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) { const u64 id = sys_reg_to_index(rd); + u8 csv2, csv3; int err; u64 val; - if (WARN_ON(!vcpu_has_sve(vcpu))) - return -ENOENT; - err = reg_from_user(&val, uaddr, id); if (err) return err; - /* This is what we mean by invariant: you can't change it. */ - if (val != guest_id_aa64zfr0_el1(vcpu)) + /* + * Allow AA64PFR0_EL1.CSV2 to be set from userspace as long as + * it doesn't promise more than what is actually provided (the + * guest could otherwise be covered in ectoplasmic residue). + */ + csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV2_SHIFT); + if (csv2 > 1 || + (csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED)) return -EINVAL; + /* Same thing for CSV3 */ + csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV3_SHIFT); + if (csv3 > 1 || + (csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED)) + return -EINVAL; + + /* We can only differ with CSV[23], and anything else is an error */ + val ^= read_id_reg(vcpu, rd, false); + val &= ~((0xFUL << ID_AA64PFR0_CSV2_SHIFT) | + (0xFUL << ID_AA64PFR0_CSV3_SHIFT)); + if (val) + return -EINVAL; + + vcpu->kvm->arch.pfr0_csv2 = csv2; + vcpu->kvm->arch.pfr0_csv3 = csv3 ; + return 0; } @@ -1299,13 +1196,17 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu, static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - return __get_id_reg(vcpu, rd, uaddr, false); + bool raz = sysreg_visible_as_raz(vcpu, rd); + + return __get_id_reg(vcpu, rd, uaddr, raz); } static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - return __set_id_reg(vcpu, rd, uaddr, false); + bool raz = sysreg_visible_as_raz(vcpu, rd); + + return __set_id_reg(vcpu, rd, uaddr, raz); } static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, @@ -1345,10 +1246,6 @@ static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { int reg = r->reg; - /* See the 32bit mapping in kvm_host.h */ - if (p->is_aarch32) - reg = r->reg / 2; - if (p->is_write) vcpu_write_sys_reg(vcpu, p->regval, reg); else @@ -1384,19 +1281,13 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } -static bool access_mte_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - kvm_inject_undefined(vcpu); - return false; -} - /* sys_reg_desc initialiser for known cpufeature ID registers */ #define ID_SANITISED(name) { \ SYS_DESC(SYS_##name), \ .access = access_id_reg, \ .get_user = get_id_reg, \ .set_user = set_id_reg, \ + .visibility = id_visibility, \ } /* @@ -1514,11 +1405,12 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* AArch64 ID registers */ /* CRm=4 */ - ID_SANITISED(ID_AA64PFR0_EL1), + { SYS_DESC(SYS_ID_AA64PFR0_EL1), .access = access_id_reg, + .get_user = get_id_reg, .set_user = set_id_aa64pfr0_el1, }, ID_SANITISED(ID_AA64PFR1_EL1), ID_UNALLOCATED(4,2), ID_UNALLOCATED(4,3), - { SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, .visibility = sve_id_visibility }, + ID_SANITISED(ID_AA64ZFR0_EL1), ID_UNALLOCATED(4,5), ID_UNALLOCATED(4,6), ID_UNALLOCATED(4,7), @@ -1557,8 +1449,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, - { SYS_DESC(SYS_RGSR_EL1), access_mte_regs }, - { SYS_DESC(SYS_GCR_EL1), access_mte_regs }, + { SYS_DESC(SYS_RGSR_EL1), undef_access }, + { SYS_DESC(SYS_GCR_EL1), undef_access }, { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, @@ -1584,8 +1476,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi }, { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, - { SYS_DESC(SYS_TFSR_EL1), access_mte_regs }, - { SYS_DESC(SYS_TFSRE0_EL1), access_mte_regs }, + { SYS_DESC(SYS_TFSR_EL1), undef_access }, + { SYS_DESC(SYS_TFSRE0_EL1), undef_access }, { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, @@ -1621,6 +1513,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, { SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 }, + { SYS_DESC(SYS_SCXTNUM_EL1), undef_access }, + { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0}, { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, @@ -1649,14 +1543,16 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, - { SYS_DESC(SYS_AMCR_EL0), access_amu }, - { SYS_DESC(SYS_AMCFGR_EL0), access_amu }, - { SYS_DESC(SYS_AMCGCR_EL0), access_amu }, - { SYS_DESC(SYS_AMUSERENR_EL0), access_amu }, - { SYS_DESC(SYS_AMCNTENCLR0_EL0), access_amu }, - { SYS_DESC(SYS_AMCNTENSET0_EL0), access_amu }, - { SYS_DESC(SYS_AMCNTENCLR1_EL0), access_amu }, - { SYS_DESC(SYS_AMCNTENSET1_EL0), access_amu }, + { SYS_DESC(SYS_SCXTNUM_EL0), undef_access }, + + { SYS_DESC(SYS_AMCR_EL0), undef_access }, + { SYS_DESC(SYS_AMCFGR_EL0), undef_access }, + { SYS_DESC(SYS_AMCGCR_EL0), undef_access }, + { SYS_DESC(SYS_AMUSERENR_EL0), undef_access }, + { SYS_DESC(SYS_AMCNTENCLR0_EL0), undef_access }, + { SYS_DESC(SYS_AMCNTENSET0_EL0), undef_access }, + { SYS_DESC(SYS_AMCNTENCLR1_EL0), undef_access }, + { SYS_DESC(SYS_AMCNTENSET1_EL0), undef_access }, AMU_AMEVCNTR0_EL0(0), AMU_AMEVCNTR0_EL0(1), AMU_AMEVCNTR0_EL0(2), @@ -1820,66 +1716,27 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, } } -static bool trap_debug32(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (p->is_write) { - vcpu_cp14(vcpu, r->reg) = p->regval; - vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; - } else { - p->regval = vcpu_cp14(vcpu, r->reg); - } - - return true; -} - -/* AArch32 debug register mappings +/* + * AArch32 debug register mappings * * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0] * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32] * - * All control registers and watchpoint value registers are mapped to - * the lower 32 bits of their AArch64 equivalents. We share the trap - * handlers with the above AArch64 code which checks what mode the - * system is in. + * None of the other registers share their location, so treat them as + * if they were 64bit. */ - -static bool trap_xvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) -{ - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; - - if (p->is_write) { - u64 val = *dbg_reg; - - val &= 0xffffffffUL; - val |= p->regval << 32; - *dbg_reg = val; - - vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; - } else { - p->regval = *dbg_reg >> 32; - } - - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); - - return true; -} - -#define DBG_BCR_BVR_WCR_WVR(n) \ - /* DBGBVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ - /* DBGBCRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ - /* DBGWVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ - /* DBGWCRn */ \ +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ + /* DBGBCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ + /* DBGWVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ + /* DBGWCRn */ \ { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n } -#define DBGBXVR(n) \ - { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n } +#define DBGBXVR(n) \ + { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_bvr, NULL, n } /* * Trapped cp14 registers. We generally ignore most of the external @@ -1897,9 +1754,9 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(1), /* DBGDCCINT */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug_regs, NULL, MDCCINT_EL1 }, /* DBGDSCRext */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug_regs, NULL, MDSCR_EL1 }, DBG_BCR_BVR_WCR_WVR(2), /* DBGDTR[RT]Xint */ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, @@ -1914,7 +1771,7 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(6), /* DBGVCR */ - { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR }, + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug_regs, NULL, DBGVCR32_EL2 }, DBG_BCR_BVR_WCR_WVR(7), DBG_BCR_BVR_WCR_WVR(8), DBG_BCR_BVR_WCR_WVR(9), @@ -2000,19 +1857,29 @@ static const struct sys_reg_desc cp14_64_regs[] = { */ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, - { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR }, - { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR }, - { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR }, - { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR }, - { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR }, - { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR }, - { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, SCTLR_EL1 }, + /* ACTLR */ + { AA32(LO), Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr, NULL, ACTLR_EL1 }, + /* ACTLR2 */ + { AA32(HI), Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr, NULL, ACTLR_EL1 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, TTBR1_EL1 }, + /* TTBCR */ + { AA32(LO), Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, TCR_EL1 }, + /* TTBCR2 */ + { AA32(HI), Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, TCR_EL1 }, + { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, DACR32_EL2 }, + /* DFSR */ + { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, ESR_EL1 }, + { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, IFSR32_EL2 }, + /* ADFSR */ + { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, AFSR0_EL1 }, + /* AIFSR */ + { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, AFSR1_EL1 }, + /* DFAR */ + { AA32(LO), Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, FAR_EL1 }, + /* IFAR */ + { AA32(HI), Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, FAR_EL1 }, /* * DC{C,I,CI}SW operations: @@ -2038,15 +1905,19 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten }, { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs }, - { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, - { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, - { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 }, - { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, + /* PRRR/MAIR0 */ + { AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 }, + /* NMRR/MAIR1 */ + { AA32(HI), Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, MAIR_EL1 }, + /* AMAIR0 */ + { AA32(LO), Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, AMAIR_EL1 }, + /* AMAIR1 */ + { AA32(HI), Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, AMAIR_EL1 }, /* ICC_SRE */ { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre }, - { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, + { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, CONTEXTIDR_EL1 }, /* Arch Tmers */ { SYS_DESC(SYS_AARCH32_CNTP_TVAL), access_arch_timer }, @@ -2121,14 +1992,14 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr }, { Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr }, - { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, c0_CSSELR }, + { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, CSSELR_EL1 }, }; static const struct sys_reg_desc cp15_64_regs[] = { - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr }, { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */ - { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, + { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 }, { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, @@ -2185,7 +2056,7 @@ static void perform_access(struct kvm_vcpu *vcpu, trace_kvm_sys_access(*vcpu_pc(vcpu), params, r); /* Check for regs disabled by runtime config */ - if (sysreg_hidden_from_guest(vcpu, r)) { + if (sysreg_hidden(vcpu, r)) { kvm_inject_undefined(vcpu); return; } @@ -2199,7 +2070,7 @@ static void perform_access(struct kvm_vcpu *vcpu, /* Skip instruction if instructed so */ if (likely(r->access(vcpu, params, r))) - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); } /* @@ -2272,8 +2143,6 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, int Rt = kvm_vcpu_sys_get_rt(vcpu); int Rt2 = (esr >> 10) & 0x1f; - params.is_aarch32 = true; - params.is_32bit = false; params.CRm = (esr >> 1) & 0xf; params.is_write = ((esr & 1) == 0); @@ -2323,8 +2192,6 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, u32 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); - params.is_aarch32 = true; - params.is_32bit = true; params.CRm = (esr >> 1) & 0xf; params.regval = vcpu_get_reg(vcpu, Rt); params.is_write = ((esr & 1) == 0); @@ -2418,8 +2285,6 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) trace_kvm_handle_sys_reg(esr); - params.is_aarch32 = false; - params.is_32bit = false; params.Op0 = (esr >> 20) & 3; params.Op1 = (esr >> 14) & 0x7; params.CRn = (esr >> 10) & 0xf; @@ -2684,7 +2549,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg return get_invariant_sys_reg(reg->id, uaddr); /* Check for regs disabled by runtime config */ - if (sysreg_hidden_from_user(vcpu, r)) + if (sysreg_hidden(vcpu, r)) return -ENOENT; if (r->get_user) @@ -2709,7 +2574,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg return set_invariant_sys_reg(reg->id, uaddr); /* Check for regs disabled by runtime config */ - if (sysreg_hidden_from_user(vcpu, r)) + if (sysreg_hidden(vcpu, r)) return -ENOENT; if (r->set_user) @@ -2780,7 +2645,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu, if (!(rd->reg || rd->get_user)) return 0; - if (sysreg_hidden_from_user(vcpu, rd)) + if (sysreg_hidden(vcpu, rd)) return 0; if (!copy_reg_to_user(rd, uind)) diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 5a6fc30f5989..9d0621417c2a 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -19,14 +19,18 @@ struct sys_reg_params { u8 Op2; u64 regval; bool is_write; - bool is_aarch32; - bool is_32bit; /* Only valid if is_aarch32 is true */ }; struct sys_reg_desc { /* Sysreg string for debug */ const char *name; + enum { + AA32_ZEROHIGH, + AA32_LO, + AA32_HI, + } aarch32_map; + /* MRS/MSR instruction which accesses it. */ u8 Op0; u8 Op1; @@ -59,8 +63,8 @@ struct sys_reg_desc { const struct sys_reg_desc *rd); }; -#define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */ -#define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */ +#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */ +#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */ static __printf(2, 3) inline void print_sys_reg_msg(const struct sys_reg_params *p, @@ -111,22 +115,22 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r __vcpu_sys_reg(vcpu, r->reg) = r->val; } -static inline bool sysreg_hidden_from_guest(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *r) +static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) { if (likely(!r->visibility)) return false; - return r->visibility(vcpu, r) & REG_HIDDEN_GUEST; + return r->visibility(vcpu, r) & REG_HIDDEN; } -static inline bool sysreg_hidden_from_user(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *r) +static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) { if (likely(!r->visibility)) return false; - return r->visibility(vcpu, r) & REG_HIDDEN_USER; + return r->visibility(vcpu, r) & REG_RAZ; } static inline int cmp_sys_reg(const struct sys_reg_desc *i1, @@ -153,6 +157,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, const struct sys_reg_desc table[], unsigned int num); +#define AA32(_x) .aarch32_map = AA32_##_x #define Op0(_x) .Op0 = _x #define Op1(_x) .Op1 = _x #define CRn(_x) .CRn = _x diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index e0404bcab019..4130b72e6891 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -11,6 +11,7 @@ #include <asm/debug-monitors.h> #include <asm/insn.h> #include <asm/kvm_mmu.h> +#include <asm/memory.h> /* * The LSB of the HYP VA tag @@ -131,21 +132,16 @@ void __init kvm_update_va_mask(struct alt_instr *alt, } } -void *__kvm_bp_vect_base; -int __kvm_harden_el2_vector_slot; - void kvm_patch_vector_branch(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { u64 addr; u32 insn; - BUG_ON(nr_inst != 5); + BUG_ON(nr_inst != 4); - if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { - WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)); + if (!cpus_have_const_cap(ARM64_SPECTRE_V3A) || WARN_ON_ONCE(has_vhe())) return; - } /* * Compute HYP VA by using the same computation as kern_hyp_va() @@ -163,15 +159,6 @@ void kvm_patch_vector_branch(struct alt_instr *alt, */ addr += KVM_VECTOR_PREAMBLE; - /* stp x0, x1, [sp, #-16]! */ - insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0, - AARCH64_INSN_REG_1, - AARCH64_INSN_REG_SP, - -16, - AARCH64_INSN_VARIANT_64BIT, - AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX); - *updptr++ = cpu_to_le32(insn); - /* movz x0, #(addr & 0xffff) */ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0, (u16)addr, @@ -201,3 +188,58 @@ void kvm_patch_vector_branch(struct alt_instr *alt, AARCH64_INSN_BRANCH_NOLINK); *updptr++ = cpu_to_le32(insn); } + +static void generate_mov_q(u64 val, __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u32 insn, oinsn, rd; + + BUG_ON(nr_inst != 4); + + /* Compute target register */ + oinsn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn); + + /* movz rd, #(val & 0xffff) */ + insn = aarch64_insn_gen_movewide(rd, + (u16)val, + 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_ZERO); + *updptr++ = cpu_to_le32(insn); + + /* movk rd, #((val >> 16) & 0xffff), lsl #16 */ + insn = aarch64_insn_gen_movewide(rd, + (u16)(val >> 16), + 16, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); + + /* movk rd, #((val >> 32) & 0xffff), lsl #32 */ + insn = aarch64_insn_gen_movewide(rd, + (u16)(val >> 32), + 32, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); + + /* movk rd, #((val >> 48) & 0xffff), lsl #48 */ + insn = aarch64_insn_gen_movewide(rd, + (u16)(val >> 48), + 48, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); +} + +void kvm_update_kimg_phys_offset(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst); +} + +void kvm_get_kimage_voffset(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + generate_mov_q(kimage_voffset, origptr, updptr, nr_inst); +} diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 2f92bdcb1188..07d5271e9f05 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -268,8 +268,6 @@ int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, params.regval = *reg; params.is_write = is_write; - params.is_aarch32 = false; - params.is_32bit = false; if (find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs))) @@ -288,8 +286,6 @@ int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, if (is_write) params.regval = *reg; params.is_write = is_write; - params.is_aarch32 = false; - params.is_32bit = false; r = find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs)); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 1c0f3e02f731..ca692a815731 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1444,11 +1444,28 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) free_empty_tables(start, end, PAGE_OFFSET, PAGE_END); } +static bool inside_linear_region(u64 start, u64 size) +{ + /* + * Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)] + * accommodating both its ends but excluding PAGE_END. Max physical + * range which can be mapped inside this linear mapping range, must + * also be derived from its end points. + */ + return start >= __pa(_PAGE_OFFSET(vabits_actual)) && + (start + size - 1) <= __pa(PAGE_END - 1); +} + int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) { int ret, flags = 0; + if (!inside_linear_region(start, size)) { + pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size); + return -EINVAL; + } + if (rodata_full || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; diff --git a/arch/csky/kernel/perf_regs.c b/arch/csky/kernel/perf_regs.c index eb32838b8210..09b7f88a2d6a 100644 --- a/arch/csky/kernel/perf_regs.c +++ b/arch/csky/kernel/perf_regs.c @@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9ed4fcccf8a9..7b25548ec42b 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1336,7 +1336,7 @@ static void dump_trace_imc_data(struct perf_event *event) /* If this is a valid record, create the sample */ struct perf_output_handle handle; - if (perf_output_begin(&handle, event, header.size)) + if (perf_output_begin(&handle, &data, event, header.size)) return; perf_output_sample(&handle, &header, &data, event); diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 8e53f2fc3fe0..6f681b105eec 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -144,8 +144,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : diff --git a/arch/riscv/kernel/perf_regs.c b/arch/riscv/kernel/perf_regs.c index 04a38fbeb9c7..fd304a248de6 100644 --- a/arch/riscv/kernel/perf_regs.c +++ b/arch/riscv/kernel/perf_regs.c @@ -36,8 +36,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 4f9e4626df55..00255ae3979d 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -672,7 +672,7 @@ static void cpumsf_output_event_pid(struct perf_event *event, rcu_read_lock(); perf_prepare_sample(&header, data, event, regs); - if (perf_output_begin(&handle, event, header.size)) + if (perf_output_begin(&handle, data, event, header.size)) goto out; /* Update the process ID (see also kernel/events/core.c) */ diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index 4352a504f235..6e9e5d5e927e 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -53,8 +53,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { /* * Use the regs from the first interruption and let diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index 5393e13e07e0..2bbf28cf3aa9 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -33,7 +33,13 @@ do { \ } while (0) #ifdef CONFIG_3_LEVEL_PGTABLES -#define __pmd_free_tlb(tlb,x, address) tlb_remove_page((tlb),virt_to_page(x)) + +#define __pmd_free_tlb(tlb, pmd, address) \ +do { \ + pgtable_pmd_page_dtor(virt_to_page(pmd)); \ + tlb_remove_page((tlb),virt_to_page(pmd)); \ +} while (0) \ + #endif #endif diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f1926e9f2143..af457f8cb29d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2630,7 +2630,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) u64 pebs_enabled = cpuc->pebs_enabled; handled++; - x86_pmu.drain_pebs(regs); + x86_pmu.drain_pebs(regs, &data); status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; /* @@ -4987,6 +4987,12 @@ __init int intel_pmu_init(void) x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ + if (version >= 5) { + x86_pmu.intel_cap.anythread_deprecated = edx.split.anythread_deprecated; + if (x86_pmu.intel_cap.anythread_deprecated) + pr_cont(" AnyThread deprecated, "); + } + /* * Install the hw-cache-events table: */ @@ -5512,6 +5518,10 @@ __init int intel_pmu_init(void) x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + /* AnyThread may be deprecated on arch perfmon v5 or later */ + if (x86_pmu.intel_cap.anythread_deprecated) + x86_pmu.format_attrs = intel_arch_formats_attr; + if (x86_pmu.event_constraints) { /* * event on fixed counter2 (REF_CYCLES) only works on this diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 404315df1e16..b47cc4226934 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -642,8 +642,8 @@ int intel_pmu_drain_bts_buffer(void) rcu_read_lock(); perf_prepare_sample(&header, &data, event, ®s); - if (perf_output_begin(&handle, event, header.size * - (top - base - skip))) + if (perf_output_begin(&handle, &data, event, + header.size * (top - base - skip))) goto unlock; for (at = base; at < top; at++) { @@ -670,7 +670,9 @@ unlock: static inline void intel_pmu_drain_pebs_buffer(void) { - x86_pmu.drain_pebs(NULL); + struct perf_sample_data data; + + x86_pmu.drain_pebs(NULL, &data); } /* @@ -1719,23 +1721,24 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) return 0; } -static void __intel_pmu_pebs_event(struct perf_event *event, - struct pt_regs *iregs, - void *base, void *top, - int bit, int count, - void (*setup_sample)(struct perf_event *, - struct pt_regs *, - void *, - struct perf_sample_data *, - struct pt_regs *)) +static __always_inline void +__intel_pmu_pebs_event(struct perf_event *event, + struct pt_regs *iregs, + struct perf_sample_data *data, + void *base, void *top, + int bit, int count, + void (*setup_sample)(struct perf_event *, + struct pt_regs *, + void *, + struct perf_sample_data *, + struct pt_regs *)) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - struct perf_sample_data data; struct x86_perf_regs perf_regs; struct pt_regs *regs = &perf_regs.regs; void *at = get_next_pebs_record_by_bit(base, top, bit); - struct pt_regs dummy_iregs; + static struct pt_regs dummy_iregs; if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { /* @@ -1752,14 +1755,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event, iregs = &dummy_iregs; while (count > 1) { - setup_sample(event, iregs, at, &data, regs); - perf_event_output(event, &data, regs); + setup_sample(event, iregs, at, data, regs); + perf_event_output(event, data, regs); at += cpuc->pebs_record_size; at = get_next_pebs_record_by_bit(at, top, bit); count--; } - setup_sample(event, iregs, at, &data, regs); + setup_sample(event, iregs, at, data, regs); if (iregs == &dummy_iregs) { /* * The PEBS records may be drained in the non-overflow context, @@ -1767,18 +1770,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event, * last record the same as other PEBS records, and doesn't * invoke the generic overflow handler. */ - perf_event_output(event, &data, regs); + perf_event_output(event, data, regs); } else { /* * All but the last records are processed. * The last one is left to be able to call the overflow handler. */ - if (perf_event_overflow(event, &data, regs)) + if (perf_event_overflow(event, data, regs)) x86_pmu_stop(event, 0); } } -static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -1812,7 +1815,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) return; } - __intel_pmu_pebs_event(event, iregs, at, top, 0, n, + __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n, setup_pebs_fixed_sample_data); } @@ -1835,7 +1838,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int } } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -1942,14 +1945,14 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) } if (counts[bit]) { - __intel_pmu_pebs_event(event, iregs, base, + __intel_pmu_pebs_event(event, iregs, data, base, top, bit, counts[bit], setup_pebs_fixed_sample_data); } } } -static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data) { short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1997,7 +2000,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) if (WARN_ON_ONCE(!event->attr.precise_ip)) continue; - __intel_pmu_pebs_event(event, iregs, base, + __intel_pmu_pebs_event(event, iregs, data, base, top, bit, counts[bit], setup_pebs_adaptive_sample_data); } diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 39e632ed6ca9..bbd1120ae161 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -475,7 +475,7 @@ enum perf_snb_uncore_imc_freerunning_types { static struct freerunning_counters snb_uncore_imc_freerunning[] = { [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x0, 0x0, 1, 32 }, - [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, + [SNB_PCI_UNCORE_IMC_DATA_WRITES] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, 0x0, 0x0, 1, 32 }, [SNB_PCI_UNCORE_IMC_GT_REQUESTS] = { SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE, 0x0, 0x0, 1, 32 }, diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ee2b9b9fc2a5..6a8edfe59b09 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -585,6 +585,7 @@ union perf_capabilities { u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; + u64 anythread_deprecated:1; }; u64 capabilities; }; @@ -727,7 +728,7 @@ struct x86_pmu { int pebs_record_size; int pebs_buffer_size; int max_pebs_events; - void (*drain_pebs)(struct pt_regs *regs); + void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); unsigned long large_pebs_flags; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d44858b69353..324ddd7fd0aa 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -639,6 +639,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; + unsigned long cr3_lm_rsvd_bits; int maxphyaddr; int max_tdp_level; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 6960cd6d1f23..b9a7fd0a27e2 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -137,7 +137,9 @@ union cpuid10_edx { struct { unsigned int num_counters_fixed:5; unsigned int bit_width_fixed:8; - unsigned int reserved:19; + unsigned int reserved1:2; + unsigned int anythread_deprecated:1; + unsigned int reserved2:16; } split; unsigned int full; }; diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 172d3e4a9e4b..648eb23fe7f0 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -2,14 +2,8 @@ #ifndef _ASM_X86_UV_UV_H #define _ASM_X86_UV_UV_H -#include <asm/tlbflush.h> - enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC}; -struct cpumask; -struct mm_struct; -struct flush_tlb_info; - #ifdef CONFIG_X86_UV #include <linux/efi.h> @@ -44,10 +38,6 @@ static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubbed(int uv) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } -static inline const struct cpumask * -uv_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) -{ return cpumask; } #endif /* X86_UV */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 3115caa7d7d0..1b98f8c12b96 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -33,7 +33,7 @@ static union uvh_apicid uvh_apicid; static int uv_node_id; /* Unpack AT/OEM/TABLE ID's to be NULL terminated strings */ -static u8 uv_archtype[UV_AT_SIZE]; +static u8 uv_archtype[UV_AT_SIZE + 1]; static u8 oem_id[ACPI_OEM_ID_SIZE + 1]; static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; @@ -320,7 +320,7 @@ static int __init decode_arch_type(unsigned long ptr) if (n > 0 && n < sizeof(uv_ate->archtype)) { pr_info("UV: UVarchtype received from BIOS\n"); - uv_stringify(UV_AT_SIZE, uv_archtype, uv_ate->archtype); + uv_stringify(sizeof(uv_archtype), uv_archtype, uv_ate->archtype); return 1; } return 0; @@ -378,7 +378,7 @@ static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id) if (!early_get_arch_type()) /* If not use OEM ID for UVarchtype */ - uv_stringify(UV_AT_SIZE, uv_archtype, _oem_id); + uv_stringify(sizeof(uv_archtype), uv_archtype, oem_id); /* Check if not hubbed */ if (strncmp(uv_archtype, "SGI", 3) != 0) { diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index bb7e1132290b..f9e5352b3bef 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -101,8 +101,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); @@ -129,12 +128,20 @@ u64 perf_reg_abi(struct task_struct *task) return PERF_SAMPLE_REGS_ABI_64; } +static DEFINE_PER_CPU(struct pt_regs, nmi_user_regs); + void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { + struct pt_regs *regs_user_copy = this_cpu_ptr(&nmi_user_regs); struct pt_regs *user_regs = task_pt_regs(current); + if (!in_nmi()) { + regs_user->regs = user_regs; + regs_user->abi = perf_reg_abi(current); + return; + } + /* * If we're in an NMI that interrupted task_pt_regs setup, then * we can't sample user regs at all. This check isn't really diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 06a278b3701d..83637a2ff605 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -90,6 +90,20 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent) return 0; } +void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0); + + /* + * save the feature bitmap to avoid cpuid lookup for every PV + * operation + */ + if (best) + vcpu->arch.pv_cpuid.features = best->eax; +} + void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -124,13 +138,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); - /* - * save the feature bitmap to avoid cpuid lookup for every PV - * operation - */ - if (best) - vcpu->arch.pv_cpuid.features = best->eax; - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { best = kvm_find_cpuid_entry(vcpu, 0x1, 0); if (best) @@ -162,6 +169,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.guest_supported_xcr0 = (best->eax | ((u64)best->edx << 32)) & supported_xcr0; + kvm_update_pv_runtime(vcpu); + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); kvm_mmu_reset_context(vcpu); @@ -169,6 +178,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.cr4_guest_rsvd_bits = __cr4_reserved_bits(guest_cpuid_has, vcpu); + vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63); + /* Invoke the vendor callback only after the above state is updated. */ kvm_x86_ops.vcpu_after_set_cpuid(vcpu); } @@ -672,7 +683,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS); edx.split.bit_width_fixed = cap.bit_width_fixed; - edx.split.reserved = 0; + edx.split.anythread_deprecated = 1; + edx.split.reserved1 = 0; + edx.split.reserved2 = 0; entry->eax = eax.full; entry->ebx = cap.events_mask; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index bf8577947ed2..f7a6e8f83783 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -11,6 +11,7 @@ extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly; void kvm_set_cpu_caps(void); void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); +void kvm_update_pv_runtime(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0d917eb70319..56cae1ff9e3f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4046,6 +4046,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_clflushopt(struct x86_emulate_ctxt *ctxt) +{ + /* emulating clflushopt regardless of cpuid */ + return X86EMUL_CONTINUE; +} + static int em_movsxd(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = (s32) ctxt->src.val; @@ -4585,7 +4591,7 @@ static const struct opcode group11[] = { }; static const struct gprefix pfx_0f_ae_7 = { - I(SrcMem | ByteOp, em_clflush), N, N, N, + I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N, }; static const struct group_dual group15 = { { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 1f96adff8dc4..5bb1939b65d8 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -856,12 +856,14 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, } else { rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte); desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); - while (desc->sptes[PTE_LIST_EXT-1] && desc->more) { - desc = desc->more; + while (desc->sptes[PTE_LIST_EXT-1]) { count += PTE_LIST_EXT; - } - if (desc->sptes[PTE_LIST_EXT-1]) { - desc->more = mmu_alloc_pte_list_desc(vcpu); + + if (!desc->more) { + desc->more = mmu_alloc_pte_list_desc(vcpu); + desc = desc->more; + break; + } desc = desc->more; } for (i = 0; desc->sptes[i]; ++i) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 27e381c9da6c..ff28a5c6abd6 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -49,7 +49,14 @@ bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) { struct kvm_mmu_page *sp; + if (!kvm->arch.tdp_mmu_enabled) + return false; + if (WARN_ON(!VALID_PAGE(hpa))) + return false; + sp = to_shadow_page(hpa); + if (WARN_ON(!sp)) + return false; return sp->tdp_mmu_page && sp->root_count; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2f32fd09e259..1e81cfebd491 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3741,6 +3741,7 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_cpuid_entry2 *best; vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && boot_cpu_has(X86_FEATURE_XSAVE) && @@ -3753,6 +3754,13 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) /* Check again if INVPCID interception if required */ svm_check_invpcid(svm); + /* For sev guests, the memory encryption bit is not reserved in CR3. */ + if (sev_guest(vcpu->kvm)) { + best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0); + if (best) + vcpu->arch.cr3_lm_rsvd_bits &= ~(1UL << (best->ebx & 0x3f)); + } + if (!kvm_vcpu_apicv_active(vcpu)) return; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f5ede41bf9e6..078a39d489fe 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -255,11 +255,10 @@ static struct kmem_cache *x86_emulator_cache; /* * When called, it means the previous get/set msr reached an invalid msr. - * Return 0 if we want to ignore/silent this failed msr access, or 1 if we want - * to fail the caller. + * Return true if we want to ignore/silent this failed msr access. */ -static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, - u64 data, bool write) +static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, + u64 data, bool write) { const char *op = write ? "wrmsr" : "rdmsr"; @@ -268,11 +267,11 @@ static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", op, msr, data); /* Mask the error */ - return 0; + return true; } else { kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n", op, msr, data); - return -ENOENT; + return false; } } @@ -1042,7 +1041,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) } if (is_long_mode(vcpu) && - (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))) + (cr3 & vcpu->arch.cr3_lm_rsvd_bits)) return 1; else if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) @@ -1416,7 +1415,8 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) if (r == KVM_MSR_RET_INVALID) { /* Unconditionally clear the output for simplicity */ *data = 0; - r = kvm_msr_ignored_check(vcpu, index, 0, false); + if (kvm_msr_ignored_check(vcpu, index, 0, false)) + r = 0; } if (r) @@ -1540,7 +1540,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, struct msr_data msr; if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) - return -EPERM; + return KVM_MSR_RET_FILTERED; switch (index) { case MSR_FS_BASE: @@ -1581,7 +1581,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu, int ret = __kvm_set_msr(vcpu, index, data, host_initiated); if (ret == KVM_MSR_RET_INVALID) - ret = kvm_msr_ignored_check(vcpu, index, data, true); + if (kvm_msr_ignored_check(vcpu, index, data, true)) + ret = 0; return ret; } @@ -1599,7 +1600,7 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, int ret; if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) - return -EPERM; + return KVM_MSR_RET_FILTERED; msr.index = index; msr.host_initiated = host_initiated; @@ -1618,7 +1619,8 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, if (ret == KVM_MSR_RET_INVALID) { /* Unconditionally clear *data for simplicity */ *data = 0; - ret = kvm_msr_ignored_check(vcpu, index, 0, false); + if (kvm_msr_ignored_check(vcpu, index, 0, false)) + ret = 0; } return ret; @@ -1662,9 +1664,9 @@ static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu) static u64 kvm_msr_reason(int r) { switch (r) { - case -ENOENT: + case KVM_MSR_RET_INVALID: return KVM_MSR_EXIT_REASON_UNKNOWN; - case -EPERM: + case KVM_MSR_RET_FILTERED: return KVM_MSR_EXIT_REASON_FILTER; default: return KVM_MSR_EXIT_REASON_INVAL; @@ -1965,7 +1967,7 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time, struct kvm_arch *ka = &vcpu->kvm->arch; if (vcpu->vcpu_id == 0 && !host_initiated) { - if (ka->boot_vcpu_runs_old_kvmclock && old_msr) + if (ka->boot_vcpu_runs_old_kvmclock != old_msr) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); ka->boot_vcpu_runs_old_kvmclock = old_msr; @@ -3063,9 +3065,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Values other than LBR and BTF are vendor-specific, thus reserved and should throw a #GP */ return 1; - } - vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", - __func__, data); + } else if (report_ignored_msrs) + vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", + __func__, data); break; case 0x200 ... 0x2ff: return kvm_mtrr_set_msr(vcpu, msr, data); @@ -3463,29 +3465,63 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.efer; break; case MSR_KVM_WALL_CLOCK: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) + return 1; + + msr_info->data = vcpu->kvm->arch.wall_clock; + break; case MSR_KVM_WALL_CLOCK_NEW: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) + return 1; + msr_info->data = vcpu->kvm->arch.wall_clock; break; case MSR_KVM_SYSTEM_TIME: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) + return 1; + + msr_info->data = vcpu->arch.time; + break; case MSR_KVM_SYSTEM_TIME_NEW: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) + return 1; + msr_info->data = vcpu->arch.time; break; case MSR_KVM_ASYNC_PF_EN: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + return 1; + msr_info->data = vcpu->arch.apf.msr_en_val; break; case MSR_KVM_ASYNC_PF_INT: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) + return 1; + msr_info->data = vcpu->arch.apf.msr_int_val; break; case MSR_KVM_ASYNC_PF_ACK: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + return 1; + msr_info->data = 0; break; case MSR_KVM_STEAL_TIME: + if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME)) + return 1; + msr_info->data = vcpu->arch.st.msr_val; break; case MSR_KVM_PV_EOI_EN: + if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI)) + return 1; + msr_info->data = vcpu->arch.pv_eoi.msr_val; break; case MSR_KVM_POLL_CONTROL: + if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL)) + return 1; + msr_info->data = vcpu->arch.msr_kvm_poll_control; break; case MSR_IA32_P5_MC_ADDR: @@ -4575,6 +4611,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: vcpu->arch.pv_cpuid.enforce = cap->args[0]; + if (vcpu->arch.pv_cpuid.enforce) + kvm_update_pv_runtime(vcpu); return 0; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 3900ab0c6004..e7ca622a468f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -376,7 +376,13 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type); -#define KVM_MSR_RET_INVALID 2 +/* + * Internal error codes that are used to indicate that MSR emulation encountered + * an error that should result in #GP in the guest, unless userspace + * handles it. + */ +#define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */ +#define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */ #define __cr4_reserved_bits(__cpu_has, __c) \ ({ \ |