summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/kprobes.h22
-rw-r--r--arch/arm/kernel/perf_regs.c3
-rw-r--r--arch/arm/probes/kprobes/opt-arm.c18
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts1
-rw-r--r--arch/arm64/include/asm/cpucaps.h2
-rw-r--r--arch/arm64/include/asm/cpufeature.h2
-rw-r--r--arch/arm64/include/asm/cputype.h4
-rw-r--r--arch/arm64/include/asm/kvm_asm.h7
-rw-r--r--arch/arm64/include/asm/kvm_coproc.h38
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h70
-rw-r--r--arch/arm64/include/asm/kvm_host.h193
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h86
-rw-r--r--arch/arm64/include/asm/mmu.h29
-rw-r--r--arch/arm64/include/asm/spectre.h65
-rw-r--r--arch/arm64/include/asm/sysreg.h5
-rw-r--r--arch/arm64/kernel/cpu_errata.c21
-rw-r--r--arch/arm64/kernel/cpufeature.c22
-rw-r--r--arch/arm64/kernel/image-vars.h5
-rw-r--r--arch/arm64/kernel/kexec_image.c2
-rw-r--r--arch/arm64/kernel/perf_regs.c3
-rw-r--r--arch/arm64/kernel/process.c5
-rw-r--r--arch/arm64/kernel/proton-pack.c85
-rw-r--r--arch/arm64/kernel/psci.c5
-rw-r--r--arch/arm64/kernel/smp.c1
-rw-r--r--arch/arm64/kvm/Makefile4
-rw-r--r--arch/arm64/kvm/aarch32.c232
-rw-r--r--arch/arm64/kvm/arm.c123
-rw-r--r--arch/arm64/kvm/guest.c29
-rw-r--r--arch/arm64/kvm/handle_exit.c24
-rw-r--r--arch/arm64/kvm/hyp/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/aarch32.c4
-rw-r--r--arch/arm64/kvm/hyp/exception.c331
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S71
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/adjust_pc.h62
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h17
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S11
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c232
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sysreg-sr.c11
-rw-r--r--arch/arm64/kvm/hyp/smccc_wa.S32
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c2
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c2
-rw-r--r--arch/arm64/kvm/hyp/vhe/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c3
-rw-r--r--arch/arm64/kvm/inject_fault.c167
-rw-r--r--arch/arm64/kvm/mmio.c2
-rw-r--r--arch/arm64/kvm/mmu.c4
-rw-r--r--arch/arm64/kvm/pmu-emul.c19
-rw-r--r--arch/arm64/kvm/regmap.c224
-rw-r--r--arch/arm64/kvm/reset.c5
-rw-r--r--arch/arm64/kvm/sys_regs.c575
-rw-r--r--arch/arm64/kvm/sys_regs.h25
-rw-r--r--arch/arm64/kvm/va_layout.c74
-rw-r--r--arch/arm64/kvm/vgic-sys-reg-v3.c4
-rw-r--r--arch/arm64/mm/mmu.c17
-rw-r--r--arch/csky/kernel/perf_regs.c3
-rw-r--r--arch/powerpc/perf/imc-pmu.c2
-rw-r--r--arch/powerpc/perf/perf_regs.c3
-rw-r--r--arch/riscv/kernel/perf_regs.c3
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c2
-rw-r--r--arch/s390/kernel/perf_regs.c3
-rw-r--r--arch/um/include/asm/pgalloc.h8
-rw-r--r--arch/x86/events/intel/core.c12
-rw-r--r--arch/x86/events/intel/ds.c53
-rw-r--r--arch/x86/events/intel/uncore_snb.c2
-rw-r--r--arch/x86/events/perf_event.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/perf_event.h4
-rw-r--r--arch/x86/include/asm/uv/uv.h10
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c6
-rw-r--r--arch/x86/kernel/perf_regs.c15
-rw-r--r--arch/x86/kvm/cpuid.c29
-rw-r--r--arch/x86/kvm/cpuid.h1
-rw-r--r--arch/x86/kvm/emulate.c8
-rw-r--r--arch/x86/kvm/mmu/mmu.c12
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c7
-rw-r--r--arch/x86/kvm/svm/svm.c8
-rw-r--r--arch/x86/kvm/x86.c74
-rw-r--r--arch/x86/kvm/x86.h8
80 files changed, 1654 insertions, 1632 deletions
diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h
index 213607a1f45c..e26a278d301a 100644
--- a/arch/arm/include/asm/kprobes.h
+++ b/arch/arm/include/asm/kprobes.h
@@ -44,20 +44,20 @@ int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
/* optinsn template addresses */
-extern __visible kprobe_opcode_t optprobe_template_entry;
-extern __visible kprobe_opcode_t optprobe_template_val;
-extern __visible kprobe_opcode_t optprobe_template_call;
-extern __visible kprobe_opcode_t optprobe_template_end;
-extern __visible kprobe_opcode_t optprobe_template_sub_sp;
-extern __visible kprobe_opcode_t optprobe_template_add_sp;
-extern __visible kprobe_opcode_t optprobe_template_restore_begin;
-extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn;
-extern __visible kprobe_opcode_t optprobe_template_restore_end;
+extern __visible kprobe_opcode_t optprobe_template_entry[];
+extern __visible kprobe_opcode_t optprobe_template_val[];
+extern __visible kprobe_opcode_t optprobe_template_call[];
+extern __visible kprobe_opcode_t optprobe_template_end[];
+extern __visible kprobe_opcode_t optprobe_template_sub_sp[];
+extern __visible kprobe_opcode_t optprobe_template_add_sp[];
+extern __visible kprobe_opcode_t optprobe_template_restore_begin[];
+extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn[];
+extern __visible kprobe_opcode_t optprobe_template_restore_end[];
#define MAX_OPTIMIZED_LENGTH 4
#define MAX_OPTINSN_SIZE \
- ((unsigned long)&optprobe_template_end - \
- (unsigned long)&optprobe_template_entry)
+ ((unsigned long)optprobe_template_end - \
+ (unsigned long)optprobe_template_entry)
#define RELATIVEJUMP_SIZE 4
struct arch_optimized_insn {
diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c
index 05fe92aa7d98..0529f90395c9 100644
--- a/arch/arm/kernel/perf_regs.c
+++ b/arch/arm/kernel/perf_regs.c
@@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task)
}
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c
index 7a449df0b359..c78180172120 100644
--- a/arch/arm/probes/kprobes/opt-arm.c
+++ b/arch/arm/probes/kprobes/opt-arm.c
@@ -85,21 +85,21 @@ asm (
"optprobe_template_end:\n");
#define TMPL_VAL_IDX \
- ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry)
#define TMPL_CALL_IDX \
- ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry)
#define TMPL_END_IDX \
- ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry)
#define TMPL_ADD_SP \
- ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry)
#define TMPL_SUB_SP \
- ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry)
#define TMPL_RESTORE_BEGIN \
- ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry)
#define TMPL_RESTORE_ORIGN_INSN \
- ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry)
#define TMPL_RESTORE_END \
- ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry)
/*
* ARM can always optimize an instruction when using ARM ISA, except
@@ -234,7 +234,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or
}
/* Copy arch-dep-instance from template. */
- memcpy(code, (unsigned long *)&optprobe_template_entry,
+ memcpy(code, (unsigned long *)optprobe_template_entry,
TMPL_END_IDX * sizeof(kprobe_opcode_t));
/* Adjust buffer according to instruction. */
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
index f46eb47cfa4d..8161dd237971 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
@@ -75,6 +75,7 @@
&enetc_port0 {
phy-handle = <&phy0>;
phy-connection-type = "sgmii";
+ managed = "in-band-status";
status = "okay";
mdio {
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index e7d98997c09c..162539d4c8cd 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -21,7 +21,7 @@
#define ARM64_HAS_VIRT_HOST_EXTN 11
#define ARM64_WORKAROUND_CAVIUM_27456 12
#define ARM64_HAS_32BIT_EL0 13
-#define ARM64_HARDEN_EL2_VECTORS 14
+#define ARM64_SPECTRE_V3A 14
#define ARM64_HAS_CNP 15
#define ARM64_HAS_NO_FPSIMD 16
#define ARM64_WORKAROUND_REPEAT_TLBI 17
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 04ab88db4b43..461277ae7a48 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -268,6 +268,8 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
/*
* CPU feature detected at boot time based on feature of one or more CPUs.
* All possible conflicts for a late CPU are ignored.
+ * NOTE: this means that a late CPU with the feature will *not* cause the
+ * capability to be advertised by cpus_have_*cap()!
*/
#define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE \
(ARM64_CPUCAP_SCOPE_LOCAL_CPU | \
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 9e2e9a63c7b6..ef5b040dee44 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -86,6 +86,8 @@
#define QCOM_CPU_PART_FALKOR_V1 0x800
#define QCOM_CPU_PART_FALKOR 0xC00
#define QCOM_CPU_PART_KRYO 0x200
+#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800
+#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801
#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
@@ -116,6 +118,8 @@
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
+#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD)
+#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER)
#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 54387ccd1ab2..4a6a77d8d13e 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -34,8 +34,6 @@
*/
#define KVM_VECTOR_PREAMBLE (2 * AARCH64_INSN_SIZE)
-#define __SMCCC_WORKAROUND_1_SMC_SZ 36
-
#define KVM_HOST_SMCCC_ID(id) \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_64, \
@@ -175,7 +173,6 @@ extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_SYM(__per_cpu_start);
DECLARE_KVM_NVHE_SYM(__per_cpu_end);
-extern atomic_t arm64_el2_vector_last_slot;
DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
#define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
@@ -189,8 +186,6 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
-extern void __kvm_enable_ssbs(void);
-
extern u64 __vgic_v3_get_ich_vtr_el2(void);
extern u64 __vgic_v3_read_vmcr(void);
extern void __vgic_v3_write_vmcr(u32 vmcr);
@@ -198,8 +193,6 @@ extern void __vgic_v3_init_lrs(void);
extern u32 __kvm_get_mdcr_el2(void);
-extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ];
-
/*
* Obtain the PC-relative address of a kernel symbol
* s: symbol
diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
deleted file mode 100644
index d6bb40122fdb..000000000000
--- a/arch/arm64/include/asm/kvm_coproc.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * Derived from arch/arm/include/asm/kvm_coproc.h
- * Copyright (C) 2012 Rusty Russell IBM Corporation
- */
-
-#ifndef __ARM64_KVM_COPROC_H__
-#define __ARM64_KVM_COPROC_H__
-
-#include <linux/kvm_host.h>
-
-void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
-
-struct kvm_sys_reg_table {
- const struct sys_reg_desc *table;
- size_t num;
-};
-
-int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
-int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
-int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
-int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
-
-#define kvm_coproc_table_init kvm_sys_reg_table_init
-void kvm_sys_reg_table_init(void);
-
-struct kvm_one_reg;
-int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
-int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
-
-#endif /* __ARM64_KVM_COPROC_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5ef2669ccd6c..c8f550a53516 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -21,20 +21,25 @@
#include <asm/cputype.h>
#include <asm/virt.h>
-unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
-unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu);
-void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v);
+#define CURRENT_EL_SP_EL0_VECTOR 0x0
+#define CURRENT_EL_SP_ELx_VECTOR 0x200
+#define LOWER_EL_AArch64_VECTOR 0x400
+#define LOWER_EL_AArch32_VECTOR 0x600
+
+enum exception_type {
+ except_type_sync = 0,
+ except_type_irq = 0x80,
+ except_type_fiq = 0x100,
+ except_type_serror = 0x180,
+};
bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
-void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
+void kvm_skip_instr32(struct kvm_vcpu *vcpu);
void kvm_inject_undefined(struct kvm_vcpu *vcpu);
void kvm_inject_vabt(struct kvm_vcpu *vcpu);
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_undef32(struct kvm_vcpu *vcpu);
-void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr);
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
@@ -168,30 +173,6 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
vcpu_gp_regs(vcpu)->regs[reg_num] = val;
}
-static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
-{
- if (vcpu_mode_is_32bit(vcpu))
- return vcpu_read_spsr32(vcpu);
-
- if (vcpu->arch.sysregs_loaded_on_cpu)
- return read_sysreg_el1(SYS_SPSR);
- else
- return __vcpu_sys_reg(vcpu, SPSR_EL1);
-}
-
-static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
-{
- if (vcpu_mode_is_32bit(vcpu)) {
- vcpu_write_spsr32(vcpu, v);
- return;
- }
-
- if (vcpu->arch.sysregs_loaded_on_cpu)
- write_sysreg_el1(v, SYS_SPSR);
- else
- __vcpu_sys_reg(vcpu, SPSR_EL1) = v;
-}
-
/*
* The layout of SPSR for an AArch32 state is different when observed from an
* AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
@@ -472,32 +453,9 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
return data; /* Leave LE untouched */
}
-static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
-{
- if (vcpu_mode_is_32bit(vcpu)) {
- kvm_skip_instr32(vcpu, is_wide_instr);
- } else {
- *vcpu_pc(vcpu) += 4;
- *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK;
- }
-
- /* advance the singlestep state machine */
- *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
-}
-
-/*
- * Skip an instruction which has been emulated at hyp while most guest sysregs
- * are live.
- */
-static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu)
+static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
{
- *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
- vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR);
-
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-
- write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR);
- write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+ vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 724c74cfacdd..21ce5c420247 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -119,6 +119,9 @@ struct kvm_arch {
*/
unsigned long *pmu_filter;
unsigned int pmuver;
+
+ u8 pfr0_csv2;
+ u8 pfr0_csv3;
};
struct kvm_vcpu_fault_info {
@@ -202,48 +205,6 @@ enum vcpu_sysreg {
NR_SYS_REGS /* Nothing after this line! */
};
-/* 32bit mapping */
-#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
-#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */
-#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */
-#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */
-#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */
-#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */
-#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */
-#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */
-#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */
-#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */
-#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */
-#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */
-#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */
-#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */
-#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */
-#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */
-#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */
-#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */
-#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */
-#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */
-#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */
-#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */
-#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */
-#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */
-#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
-#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */
-#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
-#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
-#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
-
-#define cp14_DBGDSCRext (MDSCR_EL1 * 2)
-#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2)
-#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2)
-#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1)
-#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2)
-#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2)
-#define cp14_DBGDCCINT (MDCCINT_EL1 * 2)
-#define cp14_DBGVCR (DBGVCR32_EL2 * 2)
-
-#define NR_COPRO_REGS (NR_SYS_REGS * 2)
-
struct kvm_cpu_context {
struct user_pt_regs regs; /* sp = sp_el0 */
@@ -254,10 +215,7 @@ struct kvm_cpu_context {
struct user_fpsimd_state fp_regs;
- union {
- u64 sys_regs[NR_SYS_REGS];
- u32 copro[NR_COPRO_REGS];
- };
+ u64 sys_regs[NR_SYS_REGS];
struct kvm_vcpu *__hyp_running_vcpu;
};
@@ -408,8 +366,33 @@ struct kvm_vcpu_arch {
#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
+#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */
+#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */
+
+/*
+ * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can
+ * take the following values:
+ *
+ * For AArch32 EL1:
+ */
+#define KVM_ARM64_EXCEPT_AA32_UND (0 << 9)
+#define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9)
+#define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9)
+/* For AArch64: */
+#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9)
+#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9)
+#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ (2 << 9)
+#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9)
+#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11)
+#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11)
-#define vcpu_has_sve(vcpu) (system_supports_sve() && \
+/*
+ * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
+ * set together with an exception...
+ */
+#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */
+
+#define vcpu_has_sve(vcpu) (system_supports_sve() && \
((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
#ifdef CONFIG_ARM64_PTR_AUTH
@@ -439,14 +422,96 @@ struct kvm_vcpu_arch {
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
-/*
- * CP14 and CP15 live in the same array, as they are backed by the
- * same system registers.
- */
-#define CPx_BIAS IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
+static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
+{
+ /*
+ * *** VHE ONLY ***
+ *
+ * System registers listed in the switch are not saved on every
+ * exit from the guest but are only saved on vcpu_put.
+ *
+ * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+ * should never be listed below, because the guest cannot modify its
+ * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
+ * thread when emulating cross-VCPU communication.
+ */
+ if (!has_vhe())
+ return false;
+
+ switch (reg) {
+ case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break;
+ case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break;
+ case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break;
+ case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break;
+ case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break;
+ case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break;
+ case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break;
+ case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break;
+ case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break;
+ case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break;
+ case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break;
+ case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break;
+ case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
+ case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break;
+ case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break;
+ case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break;
+ case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
+ case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
+ case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break;
+ case PAR_EL1: *val = read_sysreg_par(); break;
+ case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
+ case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
+ case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
+ default: return false;
+ }
+
+ return true;
+}
-#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS])
-#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS])
+static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
+{
+ /*
+ * *** VHE ONLY ***
+ *
+ * System registers listed in the switch are not restored on every
+ * entry to the guest but are only restored on vcpu_load.
+ *
+ * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+ * should never be listed below, because the MPIDR should only be set
+ * once, before running the VCPU, and never changed later.
+ */
+ if (!has_vhe())
+ return false;
+
+ switch (reg) {
+ case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break;
+ case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break;
+ case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break;
+ case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
+ case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break;
+ case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break;
+ case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break;
+ case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break;
+ case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break;
+ case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break;
+ case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break;
+ case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break;
+ case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
+ case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break;
+ case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break;
+ case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
+ case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
+ case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
+ case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break;
+ case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
+ case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
+ case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
+ case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
+ default: return false;
+ }
+
+ return true;
+}
struct kvm_vm_stat {
ulong remote_tlb_flush;
@@ -472,6 +537,12 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events);
@@ -534,6 +605,17 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
+
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
+
+void kvm_sys_reg_table_init(void);
+
/* MMIO helpers */
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
@@ -653,4 +735,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_arm_vcpu_sve_finalized(vcpu) \
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
+#define kvm_vcpu_has_pmu(vcpu) \
+ (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 331394306cce..e298191a854d 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -72,6 +72,28 @@ alternative_cb kvm_update_va_mask
alternative_cb_end
.endm
+/*
+ * Convert a kernel image address to a PA
+ * reg: kernel address to be converted in place
+ * tmp: temporary register
+ *
+ * The actual code generation takes place in kvm_get_kimage_voffset, and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_get_kimage_voffset uses the
+ * specific registers encoded in the instructions).
+ */
+.macro kimg_pa reg, tmp
+alternative_cb kvm_get_kimage_voffset
+ movz \tmp, #0
+ movk \tmp, #0, lsl #16
+ movk \tmp, #0, lsl #32
+ movk \tmp, #0, lsl #48
+alternative_cb_end
+
+ /* reg = __pa(reg) */
+ sub \reg, \reg, \tmp
+.endm
+
#else
#include <linux/pgtable.h>
@@ -98,6 +120,24 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
+static __always_inline unsigned long __kimg_hyp_va(unsigned long v)
+{
+ unsigned long offset;
+
+ asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
+ "movk %0, #0, lsl #16\n"
+ "movk %0, #0, lsl #32\n"
+ "movk %0, #0, lsl #48\n",
+ kvm_update_kimg_phys_offset)
+ : "=r" (offset));
+
+ return __kern_hyp_va((v - offset) | PAGE_OFFSET);
+}
+
+#define kimg_fn_hyp_va(v) ((typeof(*v))(__kimg_hyp_va((unsigned long)(v))))
+
+#define kimg_fn_ptr(x) (typeof(x) **)(x)
+
/*
* We currently support using a VM-specified IPA size. For backward
* compatibility, the default IPA size is fixed to 40bits.
@@ -208,52 +248,6 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
return ret;
}
-/*
- * EL2 vectors can be mapped and rerouted in a number of ways,
- * depending on the kernel configuration and CPU present:
- *
- * - If the CPU is affected by Spectre-v2, the hardening sequence is
- * placed in one of the vector slots, which is executed before jumping
- * to the real vectors.
- *
- * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot
- * containing the hardening sequence is mapped next to the idmap page,
- * and executed before jumping to the real vectors.
- *
- * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an
- * empty slot is selected, mapped next to the idmap page, and
- * executed before jumping to the real vectors.
- *
- * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with
- * VHE, as we don't have hypervisor-specific mappings. If the system
- * is VHE and yet selects this capability, it will be ignored.
- */
-extern void *__kvm_bp_vect_base;
-extern int __kvm_harden_el2_vector_slot;
-
-static inline void *kvm_get_hyp_vector(void)
-{
- struct bp_hardening_data *data = arm64_get_bp_hardening_data();
- void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
- int slot = -1;
-
- if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) {
- vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
- slot = data->hyp_vectors_slot;
- }
-
- if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) {
- vect = __kvm_bp_vect_base;
- if (slot == -1)
- slot = __kvm_harden_el2_vector_slot;
- }
-
- if (slot != -1)
- vect += slot * SZ_2K;
-
- return vect;
-}
-
#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index b2e91c187e2a..75beffe2ee8a 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -12,9 +12,6 @@
#define USER_ASID_FLAG (UL(1) << USER_ASID_BIT)
#define TTBR_ASID_MASK (UL(0xffff) << 48)
-#define BP_HARDEN_EL2_SLOTS 4
-#define __BP_HARDEN_HYP_VECS_SZ (BP_HARDEN_EL2_SLOTS * SZ_2K)
-
#ifndef __ASSEMBLY__
#include <linux/refcount.h>
@@ -41,32 +38,6 @@ static inline bool arm64_kernel_unmapped_at_el0(void)
return cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
}
-typedef void (*bp_hardening_cb_t)(void);
-
-struct bp_hardening_data {
- int hyp_vectors_slot;
- bp_hardening_cb_t fn;
-};
-
-DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
-
-static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
-{
- return this_cpu_ptr(&bp_hardening_data);
-}
-
-static inline void arm64_apply_bp_hardening(void)
-{
- struct bp_hardening_data *d;
-
- if (!cpus_have_const_cap(ARM64_SPECTRE_V2))
- return;
-
- d = arm64_get_bp_hardening_data();
- if (d->fn)
- d->fn();
-}
-
extern void arm64_memblock_init(void);
extern void paging_init(void);
extern void bootmem_init(void);
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index fcdfbce302bd..f62ca39da6c5 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -9,7 +9,15 @@
#ifndef __ASM_SPECTRE_H
#define __ASM_SPECTRE_H
+#define BP_HARDEN_EL2_SLOTS 4
+#define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/percpu.h>
+
#include <asm/cpufeature.h>
+#include <asm/virt.h>
/* Watch out, ordering is important here. */
enum mitigation_state {
@@ -20,13 +28,70 @@ enum mitigation_state {
struct task_struct;
+/*
+ * Note: the order of this enum corresponds to __bp_harden_hyp_vecs and
+ * we rely on having the direct vectors first.
+ */
+enum arm64_hyp_spectre_vector {
+ /*
+ * Take exceptions directly to __kvm_hyp_vector. This must be
+ * 0 so that it used by default when mitigations are not needed.
+ */
+ HYP_VECTOR_DIRECT,
+
+ /*
+ * Bounce via a slot in the hypervisor text mapping of
+ * __bp_harden_hyp_vecs, which contains an SMC call.
+ */
+ HYP_VECTOR_SPECTRE_DIRECT,
+
+ /*
+ * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs
+ * next to the idmap page.
+ */
+ HYP_VECTOR_INDIRECT,
+
+ /*
+ * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs
+ * next to the idmap page, which contains an SMC call.
+ */
+ HYP_VECTOR_SPECTRE_INDIRECT,
+};
+
+typedef void (*bp_hardening_cb_t)(void);
+
+struct bp_hardening_data {
+ enum arm64_hyp_spectre_vector slot;
+ bp_hardening_cb_t fn;
+};
+
+DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+static inline void arm64_apply_bp_hardening(void)
+{
+ struct bp_hardening_data *d;
+
+ if (!cpus_have_const_cap(ARM64_SPECTRE_V2))
+ return;
+
+ d = this_cpu_ptr(&bp_hardening_data);
+ if (d->fn)
+ d->fn();
+}
+
enum mitigation_state arm64_get_spectre_v2_state(void);
bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope);
void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+bool has_spectre_v3a(const struct arm64_cpu_capabilities *cap, int scope);
+void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+
enum mitigation_state arm64_get_spectre_v4_state(void);
bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope);
void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
void spectre_v4_enable_task_mitigation(struct task_struct *tsk);
+enum mitigation_state arm64_get_meltdown_state(void);
+
+#endif /* __ASSEMBLY__ */
#endif /* __ASM_SPECTRE_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 174817ba119c..500efe405b48 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -372,6 +372,8 @@
#define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1)
#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4)
+#define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7)
+
#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0)
@@ -404,6 +406,8 @@
#define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2)
#define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3)
+#define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7)
+
/* Definitions for system register interface to AMU for ARMv8.4 onwards */
#define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2))
#define SYS_AMCR_EL0 SYS_AM_EL0(2, 0)
@@ -461,6 +465,7 @@
#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
+#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0)
#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0)
#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 61314fd70f13..a63428301f42 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -196,16 +196,6 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
return is_midr_in_range(midr, &range) && has_dic;
}
-#ifdef CONFIG_RANDOMIZE_BASE
-
-static const struct midr_range ca57_a72[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
- {},
-};
-
-#endif
-
#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
@@ -299,6 +289,8 @@ static const struct midr_range erratum_845719_list[] = {
MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
/* Brahma-B53 r0p[0] */
MIDR_REV(MIDR_BRAHMA_B53, 0, 0),
+ /* Kryo2XX Silver rAp4 */
+ MIDR_REV(MIDR_QCOM_KRYO_2XX_SILVER, 0xa, 0x4),
{},
};
#endif
@@ -459,9 +451,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
},
#ifdef CONFIG_RANDOMIZE_BASE
{
- .desc = "EL2 vector hardening",
- .capability = ARM64_HARDEN_EL2_VECTORS,
- ERRATA_MIDR_RANGE_LIST(ca57_a72),
+ /* Must come after the Spectre-v2 entry */
+ .desc = "Spectre-v3a",
+ .capability = ARM64_SPECTRE_V3A,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = has_spectre_v3a,
+ .cpu_enable = spectre_v3a_enable_mitigation,
},
#endif
{
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index dcc165b3fc04..280b10762f6b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1337,6 +1337,8 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_GOLD),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),
MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
{ /* sentinel */ }
@@ -2844,14 +2846,28 @@ static int __init enable_mrs_emulation(void)
core_initcall(enable_mrs_emulation);
+enum mitigation_state arm64_get_meltdown_state(void)
+{
+ if (__meltdown_safe)
+ return SPECTRE_UNAFFECTED;
+
+ if (arm64_kernel_unmapped_at_el0())
+ return SPECTRE_MITIGATED;
+
+ return SPECTRE_VULNERABLE;
+}
+
ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr,
char *buf)
{
- if (__meltdown_safe)
+ switch (arm64_get_meltdown_state()) {
+ case SPECTRE_UNAFFECTED:
return sprintf(buf, "Not affected\n");
- if (arm64_kernel_unmapped_at_el0())
+ case SPECTRE_MITIGATED:
return sprintf(buf, "Mitigation: PTI\n");
- return sprintf(buf, "Vulnerable\n");
+ default:
+ return sprintf(buf, "Vulnerable\n");
+ }
}
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index c615b285ff5b..4b32588918d9 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -64,13 +64,12 @@ __efistub__ctype = _ctype;
/* Alternative callbacks for init-time patching of nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_patch_vector_branch);
KVM_NVHE_ALIAS(kvm_update_va_mask);
+KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset);
+KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
/* Global kernel state accessed by nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_vgic_global_state);
-/* Kernel constant needed to compute idmap addresses. */
-KVM_NVHE_ALIAS(kimage_voffset);
-
/* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */
KVM_NVHE_ALIAS(__hyp_panic_string);
KVM_NVHE_ALIAS(panic);
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
index 66adee8b5fc8..9ec34690e255 100644
--- a/arch/arm64/kernel/kexec_image.c
+++ b/arch/arm64/kernel/kexec_image.c
@@ -127,7 +127,7 @@ static void *image_load(struct kimage *image,
kernel_segment->mem, kbuf.bufsz,
kernel_segment->memsz);
- return 0;
+ return NULL;
}
#ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
index 94e8718e7229..f6f58e6265df 100644
--- a/arch/arm64/kernel/perf_regs.c
+++ b/arch/arm64/kernel/perf_regs.c
@@ -73,8 +73,7 @@ u64 perf_reg_abi(struct task_struct *task)
}
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 4784011cecac..a47a40ec6ad9 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -522,14 +522,13 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
bool prev32, next32;
u64 val;
- if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) &&
- cpus_have_const_cap(ARM64_WORKAROUND_1418040)))
+ if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040))
return;
prev32 = is_compat_thread(task_thread_info(prev));
next32 = is_compat_thread(task_thread_info(next));
- if (prev32 == next32)
+ if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
return;
val = read_sysreg(cntkctl_el1);
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index c18eb7d41274..fca03648e270 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Handle detection, reporting and mitigation of Spectre v1, v2 and v4, as
+ * Handle detection, reporting and mitigation of Spectre v1, v2, v3a and v4, as
* detailed at:
*
* https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability
@@ -26,6 +26,7 @@
#include <asm/spectre.h>
#include <asm/traps.h>
+#include <asm/virt.h>
/*
* We try to ensure that the mitigation state can never change as the result of
@@ -118,6 +119,7 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void)
MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),
MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
{ /* sentinel */ }
@@ -169,72 +171,26 @@ bool has_spectre_v2(const struct arm64_cpu_capabilities *entry, int scope)
return true;
}
-DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
-
enum mitigation_state arm64_get_spectre_v2_state(void)
{
return spectre_v2_state;
}
-#ifdef CONFIG_KVM
-#include <asm/cacheflush.h>
-#include <asm/kvm_asm.h>
-
-atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
-
-static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
- const char *hyp_vecs_end)
-{
- void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K);
- int i;
-
- for (i = 0; i < SZ_2K; i += 0x80)
- memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start);
-
- __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
-}
+DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
static void install_bp_hardening_cb(bp_hardening_cb_t fn)
{
- static DEFINE_RAW_SPINLOCK(bp_lock);
- int cpu, slot = -1;
- const char *hyp_vecs_start = __smccc_workaround_1_smc;
- const char *hyp_vecs_end = __smccc_workaround_1_smc +
- __SMCCC_WORKAROUND_1_SMC_SZ;
+ __this_cpu_write(bp_hardening_data.fn, fn);
/*
* Vinz Clortho takes the hyp_vecs start/end "keys" at
* the door when we're a guest. Skip the hyp-vectors work.
*/
- if (!is_hyp_mode_available()) {
- __this_cpu_write(bp_hardening_data.fn, fn);
+ if (!is_hyp_mode_available())
return;
- }
-
- raw_spin_lock(&bp_lock);
- for_each_possible_cpu(cpu) {
- if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
- slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
- break;
- }
- }
-
- if (slot == -1) {
- slot = atomic_inc_return(&arm64_el2_vector_last_slot);
- BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
- __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
- }
- __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
- __this_cpu_write(bp_hardening_data.fn, fn);
- raw_spin_unlock(&bp_lock);
-}
-#else
-static void install_bp_hardening_cb(bp_hardening_cb_t fn)
-{
- __this_cpu_write(bp_hardening_data.fn, fn);
+ __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT);
}
-#endif /* CONFIG_KVM */
static void call_smc_arch_workaround_1(void)
{
@@ -316,6 +272,33 @@ void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
}
/*
+ * Spectre-v3a.
+ *
+ * Phew, there's not an awful lot to do here! We just instruct EL2 to use
+ * an indirect trampoline for the hyp vectors so that guests can't read
+ * VBAR_EL2 to defeat randomisation of the hypervisor VA layout.
+ */
+bool has_spectre_v3a(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ static const struct midr_range spectre_v3a_unsafe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ {},
+ };
+
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+ return is_midr_in_range_list(read_cpuid_id(), spectre_v3a_unsafe_list);
+}
+
+void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
+{
+ struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
+
+ if (this_cpu_has_cap(ARM64_SPECTRE_V3A))
+ data->slot += HYP_VECTOR_INDIRECT;
+}
+
+/*
* Spectre v4.
*
* If you thought Spectre v2 was nasty, wait until you see this mess. A CPU is
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 43ae4e0c968f..62d2bda7adb8 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -66,7 +66,6 @@ static int cpu_psci_cpu_disable(unsigned int cpu)
static void cpu_psci_cpu_die(unsigned int cpu)
{
- int ret;
/*
* There are no known implementations of PSCI actually using the
* power state field, pass a sensible default for now.
@@ -74,9 +73,7 @@ static void cpu_psci_cpu_die(unsigned int cpu)
u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN <<
PSCI_0_2_POWER_STATE_TYPE_SHIFT;
- ret = psci_ops.cpu_off(state);
-
- pr_crit("unable to power off CPU%u (%d)\n", cpu, ret);
+ psci_ops.cpu_off(state);
}
static int cpu_psci_cpu_kill(unsigned int cpu)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 09c96f57818c..18e9727d3f64 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -413,6 +413,7 @@ void cpu_die_early(void)
/* Mark this CPU absent */
set_cpu_present(cpu, 0);
+ rcu_report_dead(cpu);
if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
update_cpu_boot_status(CPU_KILL_ME);
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 1504c81fbf5d..60fd181df624 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -13,10 +13,10 @@ obj-$(CONFIG_KVM) += hyp/
kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
$(KVM)/vfio.o $(KVM)/irqchip.o \
arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
- inject_fault.o regmap.o va_layout.o handle_exit.o \
+ inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o \
- aarch32.o arch_timer.o \
+ arch_timer.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
diff --git a/arch/arm64/kvm/aarch32.c b/arch/arm64/kvm/aarch32.c
deleted file mode 100644
index 40a62a99fbf8..000000000000
--- a/arch/arm64/kvm/aarch32.c
+++ /dev/null
@@ -1,232 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * (not much of an) Emulation layer for 32bit guests.
- *
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * based on arch/arm/kvm/emulate.c
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/bits.h>
-#include <linux/kvm_host.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-
-#define DFSR_FSC_EXTABT_LPAE 0x10
-#define DFSR_FSC_EXTABT_nLPAE 0x08
-#define DFSR_LPAE BIT(9)
-
-/*
- * Table taken from ARMv8 ARM DDI0487B-B, table G1-10.
- */
-static const u8 return_offsets[8][2] = {
- [0] = { 0, 0 }, /* Reset, unused */
- [1] = { 4, 2 }, /* Undefined */
- [2] = { 0, 0 }, /* SVC, unused */
- [3] = { 4, 4 }, /* Prefetch abort */
- [4] = { 8, 8 }, /* Data abort */
- [5] = { 0, 0 }, /* HVC, unused */
- [6] = { 4, 4 }, /* IRQ, unused */
- [7] = { 4, 4 }, /* FIQ, unused */
-};
-
-static bool pre_fault_synchronize(struct kvm_vcpu *vcpu)
-{
- preempt_disable();
- if (vcpu->arch.sysregs_loaded_on_cpu) {
- kvm_arch_vcpu_put(vcpu);
- return true;
- }
-
- preempt_enable();
- return false;
-}
-
-static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded)
-{
- if (loaded) {
- kvm_arch_vcpu_load(vcpu, smp_processor_id());
- preempt_enable();
- }
-}
-
-/*
- * When an exception is taken, most CPSR fields are left unchanged in the
- * handler. However, some are explicitly overridden (e.g. M[4:0]).
- *
- * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with
- * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was
- * obsoleted by the ARMv7 virtualization extensions and is RES0.
- *
- * For the SPSR layout seen from AArch32, see:
- * - ARM DDI 0406C.d, page B1-1148
- * - ARM DDI 0487E.a, page G8-6264
- *
- * For the SPSR_ELx layout for AArch32 seen from AArch64, see:
- * - ARM DDI 0487E.a, page C5-426
- *
- * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from
- * MSB to LSB.
- */
-static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode)
-{
- u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
- unsigned long old, new;
-
- old = *vcpu_cpsr(vcpu);
- new = 0;
-
- new |= (old & PSR_AA32_N_BIT);
- new |= (old & PSR_AA32_Z_BIT);
- new |= (old & PSR_AA32_C_BIT);
- new |= (old & PSR_AA32_V_BIT);
- new |= (old & PSR_AA32_Q_BIT);
-
- // CPSR.IT[7:0] are set to zero upon any exception
- // See ARM DDI 0487E.a, section G1.12.3
- // See ARM DDI 0406C.d, section B1.8.3
-
- new |= (old & PSR_AA32_DIT_BIT);
-
- // CPSR.SSBS is set to SCTLR.DSSBS upon any exception
- // See ARM DDI 0487E.a, page G8-6244
- if (sctlr & BIT(31))
- new |= PSR_AA32_SSBS_BIT;
-
- // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0
- // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented
- // See ARM DDI 0487E.a, page G8-6246
- new |= (old & PSR_AA32_PAN_BIT);
- if (!(sctlr & BIT(23)))
- new |= PSR_AA32_PAN_BIT;
-
- // SS does not exist in AArch32, so ignore
-
- // CPSR.IL is set to zero upon any exception
- // See ARM DDI 0487E.a, page G1-5527
-
- new |= (old & PSR_AA32_GE_MASK);
-
- // CPSR.IT[7:0] are set to zero upon any exception
- // See prior comment above
-
- // CPSR.E is set to SCTLR.EE upon any exception
- // See ARM DDI 0487E.a, page G8-6245
- // See ARM DDI 0406C.d, page B4-1701
- if (sctlr & BIT(25))
- new |= PSR_AA32_E_BIT;
-
- // CPSR.A is unchanged upon an exception to Undefined, Supervisor
- // CPSR.A is set upon an exception to other modes
- // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
- // See ARM DDI 0406C.d, page B1-1182
- new |= (old & PSR_AA32_A_BIT);
- if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC)
- new |= PSR_AA32_A_BIT;
-
- // CPSR.I is set upon any exception
- // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
- // See ARM DDI 0406C.d, page B1-1182
- new |= PSR_AA32_I_BIT;
-
- // CPSR.F is set upon an exception to FIQ
- // CPSR.F is unchanged upon an exception to other modes
- // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
- // See ARM DDI 0406C.d, page B1-1182
- new |= (old & PSR_AA32_F_BIT);
- if (mode == PSR_AA32_MODE_FIQ)
- new |= PSR_AA32_F_BIT;
-
- // CPSR.T is set to SCTLR.TE upon any exception
- // See ARM DDI 0487E.a, page G8-5514
- // See ARM DDI 0406C.d, page B1-1181
- if (sctlr & BIT(30))
- new |= PSR_AA32_T_BIT;
-
- new |= mode;
-
- return new;
-}
-
-static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
-{
- unsigned long spsr = *vcpu_cpsr(vcpu);
- bool is_thumb = (spsr & PSR_AA32_T_BIT);
- u32 return_offset = return_offsets[vect_offset >> 2][is_thumb];
- u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
-
- *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode);
-
- /* Note: These now point to the banked copies */
- vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr));
- *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
-
- /* Branch to exception vector */
- if (sctlr & (1 << 13))
- vect_offset += 0xffff0000;
- else /* always have security exceptions */
- vect_offset += vcpu_cp15(vcpu, c12_VBAR);
-
- *vcpu_pc(vcpu) = vect_offset;
-}
-
-void kvm_inject_undef32(struct kvm_vcpu *vcpu)
-{
- bool loaded = pre_fault_synchronize(vcpu);
-
- prepare_fault32(vcpu, PSR_AA32_MODE_UND, 4);
- post_fault_synchronize(vcpu, loaded);
-}
-
-/*
- * Modelled after TakeDataAbortException() and TakePrefetchAbortException
- * pseudocode.
- */
-static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
- unsigned long addr)
-{
- u32 vect_offset;
- u32 *far, *fsr;
- bool is_lpae;
- bool loaded;
-
- loaded = pre_fault_synchronize(vcpu);
-
- if (is_pabt) {
- vect_offset = 12;
- far = &vcpu_cp15(vcpu, c6_IFAR);
- fsr = &vcpu_cp15(vcpu, c5_IFSR);
- } else { /* !iabt */
- vect_offset = 16;
- far = &vcpu_cp15(vcpu, c6_DFAR);
- fsr = &vcpu_cp15(vcpu, c5_DFSR);
- }
-
- prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset);
-
- *far = addr;
-
- /* Give the guest an IMPLEMENTATION DEFINED exception */
- is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
- if (is_lpae) {
- *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE;
- } else {
- /* no need to shuffle FS[4] into DFSR[10] as its 0 */
- *fsr = DFSR_FSC_EXTABT_nLPAE;
- }
-
- post_fault_synchronize(vcpu, loaded);
-}
-
-void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr)
-{
- inject_abt32(vcpu, false, addr);
-}
-
-void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr)
-{
- inject_abt32(vcpu, true, addr);
-}
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 97a9332f12cc..7e86207fa2fc 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -35,7 +35,6 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_emulate.h>
-#include <asm/kvm_coproc.h>
#include <asm/sections.h>
#include <kvm/arm_hypercalls.h>
@@ -102,6 +101,22 @@ static int kvm_arm_default_max_vcpus(void)
return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
}
+static void set_default_spectre(struct kvm *kvm)
+{
+ /*
+ * The default is to expose CSV2 == 1 if the HW isn't affected.
+ * Although this is a per-CPU feature, we make it global because
+ * asymmetric systems are just a nuisance.
+ *
+ * Userspace can override this as long as it doesn't promise
+ * the impossible.
+ */
+ if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED)
+ kvm->arch.pfr0_csv2 = 1;
+ if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED)
+ kvm->arch.pfr0_csv3 = 1;
+}
+
/**
* kvm_arch_init_vm - initializes a VM data structure
* @kvm: pointer to the KVM struct
@@ -127,6 +142,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
/* The maximum number of VCPUs is limited by the host's GIC model */
kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
+ set_default_spectre(kvm);
+
return ret;
out_free_stage2_pgd:
kvm_free_stage2_pgd(&kvm->arch.mmu);
@@ -1322,37 +1339,44 @@ static unsigned long nvhe_percpu_order(void)
return size ? get_order(size) : 0;
}
-static int kvm_map_vectors(void)
+/* A lookup table holding the hypervisor VA for each vector slot */
+static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
+
+static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot)
{
- /*
- * SV2 = ARM64_SPECTRE_V2
- * HEL2 = ARM64_HARDEN_EL2_VECTORS
- *
- * !SV2 + !HEL2 -> use direct vectors
- * SV2 + !HEL2 -> use hardened vectors in place
- * !SV2 + HEL2 -> allocate one vector slot and use exec mapping
- * SV2 + HEL2 -> use hardened vectors and use exec mapping
- */
- if (cpus_have_const_cap(ARM64_SPECTRE_V2)) {
- __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs);
- __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base);
- }
+ return slot - (slot != HYP_VECTOR_DIRECT);
+}
- if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
- phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs);
- unsigned long size = __BP_HARDEN_HYP_VECS_SZ;
+static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot)
+{
+ int idx = __kvm_vector_slot2idx(slot);
- /*
- * Always allocate a spare vector slot, as we don't
- * know yet which CPUs have a BP hardening slot that
- * we can reuse.
- */
- __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot);
- BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS);
- return create_hyp_exec_mappings(vect_pa, size,
- &__kvm_bp_vect_base);
+ hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K);
+}
+
+static int kvm_init_vector_slots(void)
+{
+ int err;
+ void *base;
+
+ base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
+ kvm_init_vector_slot(base, HYP_VECTOR_DIRECT);
+
+ base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
+ kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
+
+ if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
+ return 0;
+
+ if (!has_vhe()) {
+ err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
+ __BP_HARDEN_HYP_VECS_SZ, &base);
+ if (err)
+ return err;
}
+ kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT);
+ kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT);
return 0;
}
@@ -1407,13 +1431,40 @@ static void cpu_hyp_reset(void)
__hyp_reset_vectors();
}
+/*
+ * EL2 vectors can be mapped and rerouted in a number of ways,
+ * depending on the kernel configuration and CPU present:
+ *
+ * - If the CPU is affected by Spectre-v2, the hardening sequence is
+ * placed in one of the vector slots, which is executed before jumping
+ * to the real vectors.
+ *
+ * - If the CPU also has the ARM64_SPECTRE_V3A cap, the slot
+ * containing the hardening sequence is mapped next to the idmap page,
+ * and executed before jumping to the real vectors.
+ *
+ * - If the CPU only has the ARM64_SPECTRE_V3A cap, then an
+ * empty slot is selected, mapped next to the idmap page, and
+ * executed before jumping to the real vectors.
+ *
+ * Note that ARM64_SPECTRE_V3A is somewhat incompatible with
+ * VHE, as we don't have hypervisor-specific mappings. If the system
+ * is VHE and yet selects this capability, it will be ignored.
+ */
+static void cpu_set_hyp_vector(void)
+{
+ struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
+ void *vector = hyp_spectre_vector_selector[data->slot];
+
+ *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector;
+}
+
static void cpu_hyp_reinit(void)
{
kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
cpu_hyp_reset();
-
- *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)kvm_get_hyp_vector();
+ cpu_set_hyp_vector();
if (is_kernel_in_hyp_mode())
kvm_timer_init_vhe();
@@ -1552,7 +1603,7 @@ static int init_subsystems(void)
goto out;
kvm_perf_init();
- kvm_coproc_table_init();
+ kvm_sys_reg_table_init();
out:
on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
@@ -1643,12 +1694,6 @@ static int init_hyp_mode(void)
goto out_err;
}
- err = kvm_map_vectors();
- if (err) {
- kvm_err("Cannot map vectors\n");
- goto out_err;
- }
-
/*
* Map the Hyp stack pages
*/
@@ -1792,6 +1837,12 @@ int kvm_arch_init(void *opaque)
goto out_err;
}
+ err = kvm_init_vector_slots();
+ if (err) {
+ kvm_err("Cannot initialise vector slots\n");
+ goto out_err;
+ }
+
err = init_subsystems();
if (err)
goto out_hyp;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index dfb5218137ca..9bbd30e62799 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -24,7 +24,6 @@
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_emulate.h>
-#include <asm/kvm_coproc.h>
#include <asm/sigcontext.h>
#include "trace.h"
@@ -252,10 +251,32 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
memcpy(addr, valp, KVM_REG_SIZE(reg->id));
if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
- int i;
+ int i, nr_reg;
+
+ switch (*vcpu_cpsr(vcpu)) {
+ /*
+ * Either we are dealing with user mode, and only the
+ * first 15 registers (+ PC) must be narrowed to 32bit.
+ * AArch32 r0-r14 conveniently map to AArch64 x0-x14.
+ */
+ case PSR_AA32_MODE_USR:
+ case PSR_AA32_MODE_SYS:
+ nr_reg = 15;
+ break;
+
+ /*
+ * Otherwide, this is a priviledged mode, and *all* the
+ * registers must be narrowed to 32bit.
+ */
+ default:
+ nr_reg = 31;
+ break;
+ }
+
+ for (i = 0; i < nr_reg; i++)
+ vcpu_set_reg(vcpu, i, (u32)vcpu_get_reg(vcpu, i));
- for (i = 0; i < 16; i++)
- *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i);
+ *vcpu_pc(vcpu) = (u32)*vcpu_pc(vcpu);
}
out:
return err;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 5d690d60ccad..cebe39f3b1b6 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -14,7 +14,6 @@
#include <asm/esr.h>
#include <asm/exception.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/debug-monitors.h>
@@ -61,7 +60,7 @@ static int handle_smc(struct kvm_vcpu *vcpu)
* otherwise return to the same address...
*/
vcpu_set_reg(vcpu, 0, ~0UL);
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ kvm_incr_pc(vcpu);
return 1;
}
@@ -100,7 +99,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ kvm_incr_pc(vcpu);
return 1;
}
@@ -221,7 +220,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu)
* that fail their condition code check"
*/
if (!kvm_condition_valid(vcpu)) {
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ kvm_incr_pc(vcpu);
handled = 1;
} else {
exit_handle_fn exit_handler;
@@ -241,23 +240,6 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
{
struct kvm_run *run = vcpu->run;
- if (ARM_SERROR_PENDING(exception_index)) {
- u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu));
-
- /*
- * HVC/SMC already have an adjusted PC, which we need
- * to correct in order to return to after having
- * injected the SError.
- */
- if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 ||
- esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) {
- u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2;
- *vcpu_pc(vcpu) -= adj;
- }
-
- return 1;
- }
-
exception_index = ARM_EXCEPTION_CODE(exception_index);
switch (exception_index) {
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 4a81eddabcd8..687598e41b21 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \
-DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
-obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o smccc_wa.o
+obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o
diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c
index ae56d8a4b382..f98cbe2626a1 100644
--- a/arch/arm64/kvm/hyp/aarch32.c
+++ b/arch/arm64/kvm/hyp/aarch32.c
@@ -123,13 +123,13 @@ static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
* kvm_skip_instr - skip a trapped instruction and proceed to the next
* @vcpu: The vcpu pointer
*/
-void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
+void kvm_skip_instr32(struct kvm_vcpu *vcpu)
{
u32 pc = *vcpu_pc(vcpu);
bool is_thumb;
is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT);
- if (is_thumb && !is_wide_instr)
+ if (is_thumb && !kvm_vcpu_trap_il_is32bit(vcpu))
pc += 2;
else
pc += 4;
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
new file mode 100644
index 000000000000..73629094f903
--- /dev/null
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Fault injection for both 32 and 64bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ */
+
+#include <hyp/adjust_pc.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+
+#if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__)
+#error Hypervisor code only!
+#endif
+
+static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
+{
+ u64 val;
+
+ if (__vcpu_read_sys_reg_from_cpu(reg, &val))
+ return val;
+
+ return __vcpu_sys_reg(vcpu, reg);
+}
+
+static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
+{
+ if (__vcpu_write_sys_reg_to_cpu(val, reg))
+ return;
+
+ __vcpu_sys_reg(vcpu, reg) = val;
+}
+
+static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
+{
+ write_sysreg_el1(val, SYS_SPSR);
+}
+
+static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
+{
+ if (has_vhe())
+ write_sysreg(val, spsr_abt);
+ else
+ vcpu->arch.ctxt.spsr_abt = val;
+}
+
+static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val)
+{
+ if (has_vhe())
+ write_sysreg(val, spsr_und);
+ else
+ vcpu->arch.ctxt.spsr_und = val;
+}
+
+/*
+ * This performs the exception entry at a given EL (@target_mode), stashing PC
+ * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE.
+ * The EL passed to this function *must* be a non-secure, privileged mode with
+ * bit 0 being set (PSTATE.SP == 1).
+ *
+ * When an exception is taken, most PSTATE fields are left unchanged in the
+ * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all
+ * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx
+ * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0.
+ *
+ * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429.
+ * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426.
+ *
+ * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from
+ * MSB to LSB.
+ */
+static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
+ enum exception_type type)
+{
+ unsigned long sctlr, vbar, old, new, mode;
+ u64 exc_offset;
+
+ mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+ if (mode == target_mode)
+ exc_offset = CURRENT_EL_SP_ELx_VECTOR;
+ else if ((mode | PSR_MODE_THREAD_BIT) == target_mode)
+ exc_offset = CURRENT_EL_SP_EL0_VECTOR;
+ else if (!(mode & PSR_MODE32_BIT))
+ exc_offset = LOWER_EL_AArch64_VECTOR;
+ else
+ exc_offset = LOWER_EL_AArch32_VECTOR;
+
+ switch (target_mode) {
+ case PSR_MODE_EL1h:
+ vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL1);
+ sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1);
+ break;
+ default:
+ /* Don't do that */
+ BUG();
+ }
+
+ *vcpu_pc(vcpu) = vbar + exc_offset + type;
+
+ old = *vcpu_cpsr(vcpu);
+ new = 0;
+
+ new |= (old & PSR_N_BIT);
+ new |= (old & PSR_Z_BIT);
+ new |= (old & PSR_C_BIT);
+ new |= (old & PSR_V_BIT);
+
+ // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
+
+ new |= (old & PSR_DIT_BIT);
+
+ // PSTATE.UAO is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D5-2579.
+
+ // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0
+ // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented
+ // See ARM DDI 0487E.a, page D5-2578.
+ new |= (old & PSR_PAN_BIT);
+ if (!(sctlr & SCTLR_EL1_SPAN))
+ new |= PSR_PAN_BIT;
+
+ // PSTATE.SS is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D2-2452.
+
+ // PSTATE.IL is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D1-2306.
+
+ // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64
+ // See ARM DDI 0487E.a, page D13-3258
+ if (sctlr & SCTLR_ELx_DSSBS)
+ new |= PSR_SSBS_BIT;
+
+ // PSTATE.BTYPE is set to zero upon any exception to AArch64
+ // See ARM DDI 0487E.a, pages D1-2293 to D1-2294.
+
+ new |= PSR_D_BIT;
+ new |= PSR_A_BIT;
+ new |= PSR_I_BIT;
+ new |= PSR_F_BIT;
+
+ new |= target_mode;
+
+ *vcpu_cpsr(vcpu) = new;
+ __vcpu_write_spsr(vcpu, old);
+}
+
+/*
+ * When an exception is taken, most CPSR fields are left unchanged in the
+ * handler. However, some are explicitly overridden (e.g. M[4:0]).
+ *
+ * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with
+ * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was
+ * obsoleted by the ARMv7 virtualization extensions and is RES0.
+ *
+ * For the SPSR layout seen from AArch32, see:
+ * - ARM DDI 0406C.d, page B1-1148
+ * - ARM DDI 0487E.a, page G8-6264
+ *
+ * For the SPSR_ELx layout for AArch32 seen from AArch64, see:
+ * - ARM DDI 0487E.a, page C5-426
+ *
+ * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from
+ * MSB to LSB.
+ */
+static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode)
+{
+ u32 sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ unsigned long old, new;
+
+ old = *vcpu_cpsr(vcpu);
+ new = 0;
+
+ new |= (old & PSR_AA32_N_BIT);
+ new |= (old & PSR_AA32_Z_BIT);
+ new |= (old & PSR_AA32_C_BIT);
+ new |= (old & PSR_AA32_V_BIT);
+ new |= (old & PSR_AA32_Q_BIT);
+
+ // CPSR.IT[7:0] are set to zero upon any exception
+ // See ARM DDI 0487E.a, section G1.12.3
+ // See ARM DDI 0406C.d, section B1.8.3
+
+ new |= (old & PSR_AA32_DIT_BIT);
+
+ // CPSR.SSBS is set to SCTLR.DSSBS upon any exception
+ // See ARM DDI 0487E.a, page G8-6244
+ if (sctlr & BIT(31))
+ new |= PSR_AA32_SSBS_BIT;
+
+ // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0
+ // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented
+ // See ARM DDI 0487E.a, page G8-6246
+ new |= (old & PSR_AA32_PAN_BIT);
+ if (!(sctlr & BIT(23)))
+ new |= PSR_AA32_PAN_BIT;
+
+ // SS does not exist in AArch32, so ignore
+
+ // CPSR.IL is set to zero upon any exception
+ // See ARM DDI 0487E.a, page G1-5527
+
+ new |= (old & PSR_AA32_GE_MASK);
+
+ // CPSR.IT[7:0] are set to zero upon any exception
+ // See prior comment above
+
+ // CPSR.E is set to SCTLR.EE upon any exception
+ // See ARM DDI 0487E.a, page G8-6245
+ // See ARM DDI 0406C.d, page B4-1701
+ if (sctlr & BIT(25))
+ new |= PSR_AA32_E_BIT;
+
+ // CPSR.A is unchanged upon an exception to Undefined, Supervisor
+ // CPSR.A is set upon an exception to other modes
+ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+ // See ARM DDI 0406C.d, page B1-1182
+ new |= (old & PSR_AA32_A_BIT);
+ if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC)
+ new |= PSR_AA32_A_BIT;
+
+ // CPSR.I is set upon any exception
+ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+ // See ARM DDI 0406C.d, page B1-1182
+ new |= PSR_AA32_I_BIT;
+
+ // CPSR.F is set upon an exception to FIQ
+ // CPSR.F is unchanged upon an exception to other modes
+ // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+ // See ARM DDI 0406C.d, page B1-1182
+ new |= (old & PSR_AA32_F_BIT);
+ if (mode == PSR_AA32_MODE_FIQ)
+ new |= PSR_AA32_F_BIT;
+
+ // CPSR.T is set to SCTLR.TE upon any exception
+ // See ARM DDI 0487E.a, page G8-5514
+ // See ARM DDI 0406C.d, page B1-1181
+ if (sctlr & BIT(30))
+ new |= PSR_AA32_T_BIT;
+
+ new |= mode;
+
+ return new;
+}
+
+/*
+ * Table taken from ARMv8 ARM DDI0487B-B, table G1-10.
+ */
+static const u8 return_offsets[8][2] = {
+ [0] = { 0, 0 }, /* Reset, unused */
+ [1] = { 4, 2 }, /* Undefined */
+ [2] = { 0, 0 }, /* SVC, unused */
+ [3] = { 4, 4 }, /* Prefetch abort */
+ [4] = { 8, 8 }, /* Data abort */
+ [5] = { 0, 0 }, /* HVC, unused */
+ [6] = { 4, 4 }, /* IRQ, unused */
+ [7] = { 4, 4 }, /* FIQ, unused */
+};
+
+static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
+{
+ unsigned long spsr = *vcpu_cpsr(vcpu);
+ bool is_thumb = (spsr & PSR_AA32_T_BIT);
+ u32 sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ u32 return_address;
+
+ *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode);
+ return_address = *vcpu_pc(vcpu);
+ return_address += return_offsets[vect_offset >> 2][is_thumb];
+
+ /* KVM only enters the ABT and UND modes, so only deal with those */
+ switch(mode) {
+ case PSR_AA32_MODE_ABT:
+ __vcpu_write_spsr_abt(vcpu, host_spsr_to_spsr32(spsr));
+ vcpu_gp_regs(vcpu)->compat_lr_abt = return_address;
+ break;
+
+ case PSR_AA32_MODE_UND:
+ __vcpu_write_spsr_und(vcpu, host_spsr_to_spsr32(spsr));
+ vcpu_gp_regs(vcpu)->compat_lr_und = return_address;
+ break;
+ }
+
+ /* Branch to exception vector */
+ if (sctlr & (1 << 13))
+ vect_offset += 0xffff0000;
+ else /* always have security exceptions */
+ vect_offset += __vcpu_read_sys_reg(vcpu, VBAR_EL1);
+
+ *vcpu_pc(vcpu) = vect_offset;
+}
+
+void kvm_inject_exception(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_el1_is_32bit(vcpu)) {
+ switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
+ case KVM_ARM64_EXCEPT_AA32_UND:
+ enter_exception32(vcpu, PSR_AA32_MODE_UND, 4);
+ break;
+ case KVM_ARM64_EXCEPT_AA32_IABT:
+ enter_exception32(vcpu, PSR_AA32_MODE_ABT, 12);
+ break;
+ case KVM_ARM64_EXCEPT_AA32_DABT:
+ enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16);
+ break;
+ default:
+ /* Err... */
+ break;
+ }
+ } else {
+ switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
+ case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
+ KVM_ARM64_EXCEPT_AA64_EL1):
+ enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
+ break;
+ default:
+ /*
+ * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ}
+ * will be implemented at some point. Everything
+ * else gets silently ignored.
+ */
+ break;
+ }
+ }
+}
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 0a5b36eb54b3..d179056e1af8 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -13,6 +13,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/mmu.h>
+#include <asm/spectre.h>
.macro save_caller_saved_regs_vect
/* x0 and x1 were saved in the vector entry */
@@ -187,52 +188,60 @@ SYM_CODE_START(__kvm_hyp_vector)
valid_vect el1_error // Error 32-bit EL1
SYM_CODE_END(__kvm_hyp_vector)
-.macro hyp_ventry
- .align 7
+.macro spectrev2_smccc_wa1_smc
+ sub sp, sp, #(8 * 4)
+ stp x2, x3, [sp, #(8 * 0)]
+ stp x0, x1, [sp, #(8 * 2)]
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+ smc #0
+ ldp x2, x3, [sp, #(8 * 0)]
+ add sp, sp, #(8 * 2)
+.endm
+
+.macro hyp_ventry indirect, spectrev2
+ .align 7
1: esb
- .rept 26
- nop
- .endr
-/*
- * The default sequence is to directly branch to the KVM vectors,
- * using the computed offset. This applies for VHE as well as
- * !ARM64_HARDEN_EL2_VECTORS. The first vector must always run the preamble.
- *
- * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
- * with:
- *
- * stp x0, x1, [sp, #-16]!
- * movz x0, #(addr & 0xffff)
- * movk x0, #((addr >> 16) & 0xffff), lsl #16
- * movk x0, #((addr >> 32) & 0xffff), lsl #32
- * br x0
- *
- * Where:
- * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE.
- * See kvm_patch_vector_branch for details.
- */
-alternative_cb kvm_patch_vector_branch
+ .if \spectrev2 != 0
+ spectrev2_smccc_wa1_smc
+ .else
stp x0, x1, [sp, #-16]!
- b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE)
+ .endif
+ .if \indirect != 0
+ alternative_cb kvm_patch_vector_branch
+ /*
+ * For ARM64_SPECTRE_V3A configurations, these NOPs get replaced with:
+ *
+ * movz x0, #(addr & 0xffff)
+ * movk x0, #((addr >> 16) & 0xffff), lsl #16
+ * movk x0, #((addr >> 32) & 0xffff), lsl #32
+ * br x0
+ *
+ * Where:
+ * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE.
+ * See kvm_patch_vector_branch for details.
+ */
nop
nop
nop
-alternative_cb_end
+ nop
+ alternative_cb_end
+ .endif
+ b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE)
.endm
-.macro generate_vectors
+.macro generate_vectors indirect, spectrev2
0:
.rept 16
- hyp_ventry
+ hyp_ventry \indirect, \spectrev2
.endr
.org 0b + SZ_2K // Safety measure
.endm
.align 11
SYM_CODE_START(__bp_harden_hyp_vecs)
- .rept BP_HARDEN_EL2_SLOTS
- generate_vectors
- .endr
+ generate_vectors indirect = 0, spectrev2 = 1 // HYP_VECTOR_SPECTRE_DIRECT
+ generate_vectors indirect = 1, spectrev2 = 0 // HYP_VECTOR_INDIRECT
+ generate_vectors indirect = 1, spectrev2 = 1 // HYP_VECTOR_SPECTRE_INDIRECT
1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ
.org 1b
SYM_CODE_END(__bp_harden_hyp_vecs)
diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h
new file mode 100644
index 000000000000..b1f60923a8fe
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Guest PC manipulation helpers
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Copyright (C) 2020 - Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#ifndef __ARM64_KVM_HYP_ADJUST_PC_H__
+#define __ARM64_KVM_HYP_ADJUST_PC_H__
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_host.h>
+
+void kvm_inject_exception(struct kvm_vcpu *vcpu);
+
+static inline void kvm_skip_instr(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_mode_is_32bit(vcpu)) {
+ kvm_skip_instr32(vcpu);
+ } else {
+ *vcpu_pc(vcpu) += 4;
+ *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK;
+ }
+
+ /* advance the singlestep state machine */
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
+}
+
+/*
+ * Skip an instruction which has been emulated at hyp while most guest sysregs
+ * are live.
+ */
+static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu)
+{
+ *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
+ vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR);
+
+ kvm_skip_instr(vcpu);
+
+ write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR);
+ write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+}
+
+/*
+ * Adjust the guest PC on entry, depending on flags provided by EL1
+ * for the purpose of emulation (MMIO, sysreg) or exception injection.
+ */
+static inline void __adjust_pc(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) {
+ kvm_inject_exception(vcpu);
+ vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
+ KVM_ARM64_EXCEPT_MASK);
+ } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) {
+ kvm_skip_instr(vcpu);
+ vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC;
+ }
+}
+
+#endif
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 1f875a8f20c4..84473574c2e7 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -7,6 +7,8 @@
#ifndef __ARM64_KVM_HYP_SWITCH_H__
#define __ARM64_KVM_HYP_SWITCH_H__
+#include <hyp/adjust_pc.h>
+
#include <linux/arm-smccc.h>
#include <linux/kvm_host.h>
#include <linux/types.h>
@@ -409,6 +411,21 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
+ if (ARM_SERROR_PENDING(*exit_code)) {
+ u8 esr_ec = kvm_vcpu_trap_get_class(vcpu);
+
+ /*
+ * HVC already have an adjusted PC, which we need to
+ * correct in order to return to after having injected
+ * the SError.
+ *
+ * SMC, on the other hand, is *trapped*, meaning its
+ * preferred return address is the SMC itself.
+ */
+ if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64)
+ write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
+ }
+
/*
* We're using the raw exception code in order to only process
* the trap if no SError is pending. We will come back to the
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index ddde15fe85f2..77b8c4e06f2f 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -8,7 +8,7 @@ ccflags-y := -D__KVM_NVHE_HYPERVISOR__
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
- ../fpsimd.o ../hyp-entry.o
+ ../fpsimd.o ../hyp-entry.o ../exception.o
##
## Build rules for compiling nVHE hyp code
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index ed27f06a31ba..fe2740b224cf 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -13,8 +13,6 @@
.text
SYM_FUNC_START(__host_exit)
- stp x0, x1, [sp, #-16]!
-
get_host_ctxt x0, x1
/* Store the host regs x2 and x3 */
@@ -99,13 +97,15 @@ SYM_FUNC_END(__hyp_do_panic)
mrs x0, esr_el2
lsr x0, x0, #ESR_ELx_EC_SHIFT
cmp x0, #ESR_ELx_EC_HVC64
- ldp x0, x1, [sp], #16
b.ne __host_exit
+ ldp x0, x1, [sp] // Don't fixup the stack yet
+
/* Check for a stub HVC call */
cmp x0, #HVC_STUB_HCALL_NR
b.hs __host_exit
+ add sp, sp, #16
/*
* Compute the idmap address of __kvm_handle_stub_hvc and
* jump there. Since we use kimage_voffset, do not use the
@@ -115,10 +115,7 @@ SYM_FUNC_END(__hyp_do_panic)
* Preserve x0-x4, which may contain stub parameters.
*/
ldr x5, =__kvm_handle_stub_hvc
- ldr_l x6, kimage_voffset
-
- /* x5 = __pa(x5) */
- sub x5, x5, x6
+ kimg_pa x5, x6
br x5
.L__vect_end\@:
.if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index e2eafe2c93af..82df7fc24760 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -12,106 +12,150 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
-#include <kvm/arm_hypercalls.h>
-
-static void handle_host_hcall(unsigned long func_id,
- struct kvm_cpu_context *host_ctxt)
-{
- unsigned long ret = 0;
-
- switch (func_id) {
- case KVM_HOST_SMCCC_FUNC(__kvm_vcpu_run): {
- unsigned long r1 = host_ctxt->regs.regs[1];
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)r1;
-
- ret = __kvm_vcpu_run(kern_hyp_va(vcpu));
- break;
- }
- case KVM_HOST_SMCCC_FUNC(__kvm_flush_vm_context):
- __kvm_flush_vm_context();
- break;
- case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid_ipa): {
- unsigned long r1 = host_ctxt->regs.regs[1];
- struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1;
- phys_addr_t ipa = host_ctxt->regs.regs[2];
- int level = host_ctxt->regs.regs[3];
-
- __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level);
- break;
- }
- case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid): {
- unsigned long r1 = host_ctxt->regs.regs[1];
- struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1;
-
- __kvm_tlb_flush_vmid(kern_hyp_va(mmu));
- break;
- }
- case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_local_vmid): {
- unsigned long r1 = host_ctxt->regs.regs[1];
- struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1;
-
- __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu));
- break;
- }
- case KVM_HOST_SMCCC_FUNC(__kvm_timer_set_cntvoff): {
- u64 cntvoff = host_ctxt->regs.regs[1];
-
- __kvm_timer_set_cntvoff(cntvoff);
- break;
- }
- case KVM_HOST_SMCCC_FUNC(__kvm_enable_ssbs):
- __kvm_enable_ssbs();
- break;
- case KVM_HOST_SMCCC_FUNC(__vgic_v3_get_ich_vtr_el2):
- ret = __vgic_v3_get_ich_vtr_el2();
- break;
- case KVM_HOST_SMCCC_FUNC(__vgic_v3_read_vmcr):
- ret = __vgic_v3_read_vmcr();
- break;
- case KVM_HOST_SMCCC_FUNC(__vgic_v3_write_vmcr): {
- u32 vmcr = host_ctxt->regs.regs[1];
-
- __vgic_v3_write_vmcr(vmcr);
- break;
- }
- case KVM_HOST_SMCCC_FUNC(__vgic_v3_init_lrs):
- __vgic_v3_init_lrs();
- break;
- case KVM_HOST_SMCCC_FUNC(__kvm_get_mdcr_el2):
- ret = __kvm_get_mdcr_el2();
- break;
- case KVM_HOST_SMCCC_FUNC(__vgic_v3_save_aprs): {
- unsigned long r1 = host_ctxt->regs.regs[1];
- struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1;
-
- __vgic_v3_save_aprs(kern_hyp_va(cpu_if));
- break;
- }
- case KVM_HOST_SMCCC_FUNC(__vgic_v3_restore_aprs): {
- unsigned long r1 = host_ctxt->regs.regs[1];
- struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1;
-
- __vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
- break;
- }
- default:
- /* Invalid host HVC. */
- host_ctxt->regs.regs[0] = SMCCC_RET_NOT_SUPPORTED;
- return;
- }
-
- host_ctxt->regs.regs[0] = SMCCC_RET_SUCCESS;
- host_ctxt->regs.regs[1] = ret;
+#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r]
+#define DECLARE_REG(type, name, ctxt, reg) \
+ type name = (type)cpu_reg(ctxt, (reg))
+
+static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
+
+ cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu));
+}
+
+static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt)
+{
+ __kvm_flush_vm_context();
+}
+
+static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+ DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2);
+ DECLARE_REG(int, level, host_ctxt, 3);
+
+ __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level);
+}
+
+static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+
+ __kvm_tlb_flush_vmid(kern_hyp_va(mmu));
+}
+
+static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+
+ __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu));
+}
+
+static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt)
+{
+ __kvm_timer_set_cntvoff(cpu_reg(host_ctxt, 1));
+}
+
+static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt)
+{
+ u64 tmp;
+
+ tmp = read_sysreg_el2(SYS_SCTLR);
+ tmp |= SCTLR_ELx_DSSBS;
+ write_sysreg_el2(tmp, SYS_SCTLR);
+}
+
+static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt)
+{
+ cpu_reg(host_ctxt, 1) = __vgic_v3_get_ich_vtr_el2();
+}
+
+static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt)
+{
+ cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr();
+}
+
+static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt)
+{
+ __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1));
+}
+
+static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
+{
+ __vgic_v3_init_lrs();
+}
+
+static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt)
+{
+ cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2();
+}
+
+static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
+
+ __vgic_v3_save_aprs(kern_hyp_va(cpu_if));
+}
+
+static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
+
+ __vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
+}
+
+typedef void (*hcall_t)(struct kvm_cpu_context *);
+
+#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x)
+
+static const hcall_t *host_hcall[] = {
+ HANDLE_FUNC(__kvm_vcpu_run),
+ HANDLE_FUNC(__kvm_flush_vm_context),
+ HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
+ HANDLE_FUNC(__kvm_tlb_flush_vmid),
+ HANDLE_FUNC(__kvm_tlb_flush_local_vmid),
+ HANDLE_FUNC(__kvm_timer_set_cntvoff),
+ HANDLE_FUNC(__kvm_enable_ssbs),
+ HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2),
+ HANDLE_FUNC(__vgic_v3_read_vmcr),
+ HANDLE_FUNC(__vgic_v3_write_vmcr),
+ HANDLE_FUNC(__vgic_v3_init_lrs),
+ HANDLE_FUNC(__kvm_get_mdcr_el2),
+ HANDLE_FUNC(__vgic_v3_save_aprs),
+ HANDLE_FUNC(__vgic_v3_restore_aprs),
+};
+
+static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(unsigned long, id, host_ctxt, 0);
+ const hcall_t *kfn;
+ hcall_t hfn;
+
+ id -= KVM_HOST_SMCCC_ID(0);
+
+ if (unlikely(id >= ARRAY_SIZE(host_hcall)))
+ goto inval;
+
+ kfn = host_hcall[id];
+ if (unlikely(!kfn))
+ goto inval;
+
+ cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS;
+
+ hfn = kimg_fn_hyp_va(kfn);
+ hfn(host_ctxt);
+
+ return;
+inval:
+ cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
}
void handle_trap(struct kvm_cpu_context *host_ctxt)
{
u64 esr = read_sysreg_el2(SYS_ESR);
- unsigned long func_id;
- if (ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64)
+ if (unlikely(ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64))
hyp_panic();
- func_id = host_ctxt->regs.regs[0];
- handle_host_hcall(func_id, host_ctxt);
+ handle_host_hcall(host_ctxt);
}
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 8ae8160bc93a..3e50ff35aa4f 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -4,6 +4,7 @@
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
+#include <hyp/adjust_pc.h>
#include <hyp/switch.h>
#include <hyp/sysreg-sr.h>
@@ -189,6 +190,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg_save_state_nvhe(host_ctxt);
+ __adjust_pc(vcpu);
+
/*
* We must restore the 32-bit state before the sysregs, thanks
* to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
index 88a25fc8fcd3..29305022bc04 100644
--- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
@@ -33,14 +33,3 @@ void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
__sysreg_restore_user_state(ctxt);
__sysreg_restore_el2_return_state(ctxt);
}
-
-void __kvm_enable_ssbs(void)
-{
- u64 tmp;
-
- asm volatile(
- "mrs %0, sctlr_el2\n"
- "orr %0, %0, %1\n"
- "msr sctlr_el2, %0"
- : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS));
-}
diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S
deleted file mode 100644
index b0441dbdf68b..000000000000
--- a/arch/arm64/kvm/hyp/smccc_wa.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015-2018 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/arm-smccc.h>
-#include <linux/linkage.h>
-
-#include <asm/kvm_asm.h>
-#include <asm/kvm_mmu.h>
-
- /*
- * This is not executed directly and is instead copied into the vectors
- * by install_bp_hardening_cb().
- */
- .data
- .pushsection .rodata
- .global __smccc_workaround_1_smc
-SYM_DATA_START(__smccc_workaround_1_smc)
- esb
- sub sp, sp, #(8 * 4)
- stp x2, x3, [sp, #(8 * 0)]
- stp x0, x1, [sp, #(8 * 2)]
- mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
- smc #0
- ldp x2, x3, [sp, #(8 * 0)]
- ldp x0, x1, [sp, #(8 * 2)]
- add sp, sp, #(8 * 4)
-1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ
- .org 1b
-SYM_DATA_END(__smccc_workaround_1_smc)
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index bd1bab551d48..8f0585640241 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -4,6 +4,8 @@
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
+#include <hyp/adjust_pc.h>
+
#include <linux/compiler.h>
#include <linux/irqchip/arm-gic.h>
#include <linux/kvm_host.h>
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 452f4cacd674..80406f463c28 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -4,6 +4,8 @@
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
+#include <hyp/adjust_pc.h>
+
#include <linux/compiler.h>
#include <linux/irqchip/arm-gic-v3.h>
#include <linux/kvm_host.h>
diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile
index 461e97c375cc..96bec0ecf9dd 100644
--- a/arch/arm64/kvm/hyp/vhe/Makefile
+++ b/arch/arm64/kvm/hyp/vhe/Makefile
@@ -8,4 +8,4 @@ ccflags-y := -D__KVM_VHE_HYPERVISOR__
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
- ../fpsimd.o ../hyp-entry.o
+ ../fpsimd.o ../hyp-entry.o ../exception.o
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 62546e20b251..af8e940d0f03 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -4,6 +4,7 @@
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
+#include <hyp/adjust_pc.h>
#include <hyp/switch.h>
#include <linux/arm-smccc.h>
@@ -133,6 +134,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__load_guest_stage2(vcpu->arch.hw_mmu);
__activate_traps(vcpu);
+ __adjust_pc(vcpu);
+
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 34a96ab244fa..b47df73e98d7 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -14,119 +14,15 @@
#include <asm/kvm_emulate.h>
#include <asm/esr.h>
-#define CURRENT_EL_SP_EL0_VECTOR 0x0
-#define CURRENT_EL_SP_ELx_VECTOR 0x200
-#define LOWER_EL_AArch64_VECTOR 0x400
-#define LOWER_EL_AArch32_VECTOR 0x600
-
-enum exception_type {
- except_type_sync = 0,
- except_type_irq = 0x80,
- except_type_fiq = 0x100,
- except_type_serror = 0x180,
-};
-
-/*
- * This performs the exception entry at a given EL (@target_mode), stashing PC
- * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE.
- * The EL passed to this function *must* be a non-secure, privileged mode with
- * bit 0 being set (PSTATE.SP == 1).
- *
- * When an exception is taken, most PSTATE fields are left unchanged in the
- * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all
- * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx
- * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0.
- *
- * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429.
- * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426.
- *
- * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from
- * MSB to LSB.
- */
-static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
- enum exception_type type)
-{
- unsigned long sctlr, vbar, old, new, mode;
- u64 exc_offset;
-
- mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
-
- if (mode == target_mode)
- exc_offset = CURRENT_EL_SP_ELx_VECTOR;
- else if ((mode | PSR_MODE_THREAD_BIT) == target_mode)
- exc_offset = CURRENT_EL_SP_EL0_VECTOR;
- else if (!(mode & PSR_MODE32_BIT))
- exc_offset = LOWER_EL_AArch64_VECTOR;
- else
- exc_offset = LOWER_EL_AArch32_VECTOR;
-
- switch (target_mode) {
- case PSR_MODE_EL1h:
- vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1);
- sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
- vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1);
- break;
- default:
- /* Don't do that */
- BUG();
- }
-
- *vcpu_pc(vcpu) = vbar + exc_offset + type;
-
- old = *vcpu_cpsr(vcpu);
- new = 0;
-
- new |= (old & PSR_N_BIT);
- new |= (old & PSR_Z_BIT);
- new |= (old & PSR_C_BIT);
- new |= (old & PSR_V_BIT);
-
- // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
-
- new |= (old & PSR_DIT_BIT);
-
- // PSTATE.UAO is set to zero upon any exception to AArch64
- // See ARM DDI 0487E.a, page D5-2579.
-
- // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0
- // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented
- // See ARM DDI 0487E.a, page D5-2578.
- new |= (old & PSR_PAN_BIT);
- if (!(sctlr & SCTLR_EL1_SPAN))
- new |= PSR_PAN_BIT;
-
- // PSTATE.SS is set to zero upon any exception to AArch64
- // See ARM DDI 0487E.a, page D2-2452.
-
- // PSTATE.IL is set to zero upon any exception to AArch64
- // See ARM DDI 0487E.a, page D1-2306.
-
- // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64
- // See ARM DDI 0487E.a, page D13-3258
- if (sctlr & SCTLR_ELx_DSSBS)
- new |= PSR_SSBS_BIT;
-
- // PSTATE.BTYPE is set to zero upon any exception to AArch64
- // See ARM DDI 0487E.a, pages D1-2293 to D1-2294.
-
- new |= PSR_D_BIT;
- new |= PSR_A_BIT;
- new |= PSR_I_BIT;
- new |= PSR_F_BIT;
-
- new |= target_mode;
-
- *vcpu_cpsr(vcpu) = new;
- vcpu_write_spsr(vcpu, old);
-}
-
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u32 esr = 0;
- enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
+ vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
+ KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
+ KVM_ARM64_PENDING_EXCEPTION);
vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
@@ -156,7 +52,9 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
{
u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
- enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
+ vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
+ KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
+ KVM_ARM64_PENDING_EXCEPTION);
/*
* Build an unknown exception, depending on the instruction
@@ -168,6 +66,53 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
}
+#define DFSR_FSC_EXTABT_LPAE 0x10
+#define DFSR_FSC_EXTABT_nLPAE 0x08
+#define DFSR_LPAE BIT(9)
+#define TTBCR_EAE BIT(31)
+
+static void inject_undef32(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND |
+ KVM_ARM64_PENDING_EXCEPTION);
+}
+
+/*
+ * Modelled after TakeDataAbortException() and TakePrefetchAbortException
+ * pseudocode.
+ */
+static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr)
+{
+ u64 far;
+ u32 fsr;
+
+ /* Give the guest an IMPLEMENTATION DEFINED exception */
+ if (vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE) {
+ fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE;
+ } else {
+ /* no need to shuffle FS[4] into DFSR[10] as its 0 */
+ fsr = DFSR_FSC_EXTABT_nLPAE;
+ }
+
+ far = vcpu_read_sys_reg(vcpu, FAR_EL1);
+
+ if (is_pabt) {
+ vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT |
+ KVM_ARM64_PENDING_EXCEPTION);
+ far &= GENMASK(31, 0);
+ far |= (u64)addr << 32;
+ vcpu_write_sys_reg(vcpu, fsr, IFSR32_EL2);
+ } else { /* !iabt */
+ vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT |
+ KVM_ARM64_PENDING_EXCEPTION);
+ far &= GENMASK(63, 32);
+ far |= addr;
+ vcpu_write_sys_reg(vcpu, fsr, ESR_EL1);
+ }
+
+ vcpu_write_sys_reg(vcpu, far, FAR_EL1);
+}
+
/**
* kvm_inject_dabt - inject a data abort into the guest
* @vcpu: The VCPU to receive the data abort
@@ -179,7 +124,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
{
if (vcpu_el1_is_32bit(vcpu))
- kvm_inject_dabt32(vcpu, addr);
+ inject_abt32(vcpu, false, addr);
else
inject_abt64(vcpu, false, addr);
}
@@ -195,7 +140,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
{
if (vcpu_el1_is_32bit(vcpu))
- kvm_inject_pabt32(vcpu, addr);
+ inject_abt32(vcpu, true, addr);
else
inject_abt64(vcpu, true, addr);
}
@@ -210,7 +155,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
void kvm_inject_undefined(struct kvm_vcpu *vcpu)
{
if (vcpu_el1_is_32bit(vcpu))
- kvm_inject_undef32(vcpu);
+ inject_undef32(vcpu);
else
inject_undef64(vcpu);
}
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index 6a2826f1bf5e..3e2d8ba11a02 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -115,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
* The MMIO instruction is emulated and should not be re-executed
* in the guest.
*/
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ kvm_incr_pc(vcpu);
return 0;
}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 57972bdb213a..1f41173e6149 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -788,10 +788,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
switch (vma_shift) {
+#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SHIFT:
if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
break;
fallthrough;
+#endif
case CONT_PMD_SHIFT:
vma_shift = PMD_SHIFT;
fallthrough;
@@ -1014,7 +1016,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
* cautious, and skip the instruction.
*/
if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) {
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ kvm_incr_pc(vcpu);
ret = 1;
goto out_unlock;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 2ed5ef8f274b..398f6df1bbe4 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -384,7 +384,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = &vcpu->arch.pmu;
bool overflow;
- if (!kvm_arm_pmu_v3_ready(vcpu))
+ if (!kvm_vcpu_has_pmu(vcpu))
return;
overflow = !!kvm_pmu_overflow_status(vcpu);
@@ -825,9 +825,12 @@ bool kvm_arm_support_pmu_v3(void)
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
{
- if (!vcpu->arch.pmu.created)
+ if (!kvm_vcpu_has_pmu(vcpu))
return 0;
+ if (!vcpu->arch.pmu.created)
+ return -EINVAL;
+
/*
* A valid interrupt configuration for the PMU is either to have a
* properly configured interrupt number and using an in-kernel
@@ -835,9 +838,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
*/
if (irqchip_in_kernel(vcpu->kvm)) {
int irq = vcpu->arch.pmu.irq_num;
- if (!kvm_arm_pmu_irq_initialized(vcpu))
- return -EINVAL;
-
/*
* If we are using an in-kernel vgic, at this point we know
* the vgic will be initialized, so we can check the PMU irq
@@ -851,7 +851,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
}
kvm_pmu_vcpu_reset(vcpu);
- vcpu->arch.pmu.ready = true;
return 0;
}
@@ -913,8 +912,7 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
- if (!kvm_arm_support_pmu_v3() ||
- !test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
+ if (!kvm_vcpu_has_pmu(vcpu))
return -ENODEV;
if (vcpu->arch.pmu.created)
@@ -1015,7 +1013,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (!irqchip_in_kernel(vcpu->kvm))
return -EINVAL;
- if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
+ if (!kvm_vcpu_has_pmu(vcpu))
return -ENODEV;
if (!kvm_arm_pmu_irq_initialized(vcpu))
@@ -1035,8 +1033,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
case KVM_ARM_VCPU_PMU_V3_IRQ:
case KVM_ARM_VCPU_PMU_V3_INIT:
case KVM_ARM_VCPU_PMU_V3_FILTER:
- if (kvm_arm_support_pmu_v3() &&
- test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
+ if (kvm_vcpu_has_pmu(vcpu))
return 0;
}
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
deleted file mode 100644
index accc1d5fba61..000000000000
--- a/arch/arm64/kvm/regmap.c
+++ /dev/null
@@ -1,224 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * Derived from arch/arm/kvm/emulate.c:
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/mm.h>
-#include <linux/kvm_host.h>
-#include <asm/kvm_emulate.h>
-#include <asm/ptrace.h>
-
-#define VCPU_NR_MODES 6
-#define REG_OFFSET(_reg) \
- (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long))
-
-#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R))
-
-static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
- /* USR Registers */
- {
- USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
- USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
- USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
- USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
- USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14),
- REG_OFFSET(pc)
- },
-
- /* FIQ Registers */
- {
- USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
- USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
- USR_REG_OFFSET(6), USR_REG_OFFSET(7),
- REG_OFFSET(compat_r8_fiq), /* r8 */
- REG_OFFSET(compat_r9_fiq), /* r9 */
- REG_OFFSET(compat_r10_fiq), /* r10 */
- REG_OFFSET(compat_r11_fiq), /* r11 */
- REG_OFFSET(compat_r12_fiq), /* r12 */
- REG_OFFSET(compat_sp_fiq), /* r13 */
- REG_OFFSET(compat_lr_fiq), /* r14 */
- REG_OFFSET(pc)
- },
-
- /* IRQ Registers */
- {
- USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
- USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
- USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
- USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
- USR_REG_OFFSET(12),
- REG_OFFSET(compat_sp_irq), /* r13 */
- REG_OFFSET(compat_lr_irq), /* r14 */
- REG_OFFSET(pc)
- },
-
- /* SVC Registers */
- {
- USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
- USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
- USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
- USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
- USR_REG_OFFSET(12),
- REG_OFFSET(compat_sp_svc), /* r13 */
- REG_OFFSET(compat_lr_svc), /* r14 */
- REG_OFFSET(pc)
- },
-
- /* ABT Registers */
- {
- USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
- USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
- USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
- USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
- USR_REG_OFFSET(12),
- REG_OFFSET(compat_sp_abt), /* r13 */
- REG_OFFSET(compat_lr_abt), /* r14 */
- REG_OFFSET(pc)
- },
-
- /* UND Registers */
- {
- USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
- USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
- USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
- USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
- USR_REG_OFFSET(12),
- REG_OFFSET(compat_sp_und), /* r13 */
- REG_OFFSET(compat_lr_und), /* r14 */
- REG_OFFSET(pc)
- },
-};
-
-/*
- * Return a pointer to the register number valid in the current mode of
- * the virtual CPU.
- */
-unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
-{
- unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.regs;
- unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK;
-
- switch (mode) {
- case PSR_AA32_MODE_USR ... PSR_AA32_MODE_SVC:
- mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */
- break;
-
- case PSR_AA32_MODE_ABT:
- mode = 4;
- break;
-
- case PSR_AA32_MODE_UND:
- mode = 5;
- break;
-
- case PSR_AA32_MODE_SYS:
- mode = 0; /* SYS maps to USR */
- break;
-
- default:
- BUG();
- }
-
- return reg_array + vcpu_reg_offsets[mode][reg_num];
-}
-
-/*
- * Return the SPSR for the current mode of the virtual CPU.
- */
-static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu)
-{
- unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK;
- switch (mode) {
- case PSR_AA32_MODE_SVC: return KVM_SPSR_SVC;
- case PSR_AA32_MODE_ABT: return KVM_SPSR_ABT;
- case PSR_AA32_MODE_UND: return KVM_SPSR_UND;
- case PSR_AA32_MODE_IRQ: return KVM_SPSR_IRQ;
- case PSR_AA32_MODE_FIQ: return KVM_SPSR_FIQ;
- default: BUG();
- }
-}
-
-unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
-{
- int spsr_idx = vcpu_spsr32_mode(vcpu);
-
- if (!vcpu->arch.sysregs_loaded_on_cpu) {
- switch (spsr_idx) {
- case KVM_SPSR_SVC:
- return __vcpu_sys_reg(vcpu, SPSR_EL1);
- case KVM_SPSR_ABT:
- return vcpu->arch.ctxt.spsr_abt;
- case KVM_SPSR_UND:
- return vcpu->arch.ctxt.spsr_und;
- case KVM_SPSR_IRQ:
- return vcpu->arch.ctxt.spsr_irq;
- case KVM_SPSR_FIQ:
- return vcpu->arch.ctxt.spsr_fiq;
- }
- }
-
- switch (spsr_idx) {
- case KVM_SPSR_SVC:
- return read_sysreg_el1(SYS_SPSR);
- case KVM_SPSR_ABT:
- return read_sysreg(spsr_abt);
- case KVM_SPSR_UND:
- return read_sysreg(spsr_und);
- case KVM_SPSR_IRQ:
- return read_sysreg(spsr_irq);
- case KVM_SPSR_FIQ:
- return read_sysreg(spsr_fiq);
- default:
- BUG();
- }
-}
-
-void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
-{
- int spsr_idx = vcpu_spsr32_mode(vcpu);
-
- if (!vcpu->arch.sysregs_loaded_on_cpu) {
- switch (spsr_idx) {
- case KVM_SPSR_SVC:
- __vcpu_sys_reg(vcpu, SPSR_EL1) = v;
- break;
- case KVM_SPSR_ABT:
- vcpu->arch.ctxt.spsr_abt = v;
- break;
- case KVM_SPSR_UND:
- vcpu->arch.ctxt.spsr_und = v;
- break;
- case KVM_SPSR_IRQ:
- vcpu->arch.ctxt.spsr_irq = v;
- break;
- case KVM_SPSR_FIQ:
- vcpu->arch.ctxt.spsr_fiq = v;
- break;
- }
-
- return;
- }
-
- switch (spsr_idx) {
- case KVM_SPSR_SVC:
- write_sysreg_el1(v, SYS_SPSR);
- break;
- case KVM_SPSR_ABT:
- write_sysreg(v, spsr_abt);
- break;
- case KVM_SPSR_UND:
- write_sysreg(v, spsr_und);
- break;
- case KVM_SPSR_IRQ:
- write_sysreg(v, spsr_irq);
- break;
- case KVM_SPSR_FIQ:
- write_sysreg(v, spsr_fiq);
- break;
- }
-}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index c7985f4607a9..47f3f035f3ea 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -25,7 +25,6 @@
#include <asm/ptrace.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/virt.h>
@@ -234,6 +233,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
pstate = VCPU_RESET_PSTATE_EL1;
}
+ if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
+ ret = -EINVAL;
+ goto out;
+ }
break;
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index fb12d3ef423a..3313dedfa505 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -20,7 +20,6 @@
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/kvm_arm.h>
-#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
@@ -64,87 +63,6 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
return false;
}
-static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
-{
- /*
- * System registers listed in the switch are not saved on every
- * exit from the guest but are only saved on vcpu_put.
- *
- * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
- * should never be listed below, because the guest cannot modify its
- * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
- * thread when emulating cross-VCPU communication.
- */
- switch (reg) {
- case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break;
- case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break;
- case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break;
- case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break;
- case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break;
- case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break;
- case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break;
- case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break;
- case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break;
- case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break;
- case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break;
- case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break;
- case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
- case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break;
- case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break;
- case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break;
- case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
- case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
- case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break;
- case PAR_EL1: *val = read_sysreg_par(); break;
- case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
- case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
- case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
- default: return false;
- }
-
- return true;
-}
-
-static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
-{
- /*
- * System registers listed in the switch are not restored on every
- * entry to the guest but are only restored on vcpu_load.
- *
- * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
- * should never be listed below, because the MPIDR should only be set
- * once, before running the VCPU, and never changed later.
- */
- switch (reg) {
- case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break;
- case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break;
- case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break;
- case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
- case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break;
- case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break;
- case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break;
- case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break;
- case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break;
- case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break;
- case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break;
- case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break;
- case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
- case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break;
- case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break;
- case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
- case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
- case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
- case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break;
- case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
- case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
- case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
- case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
- default: return false;
- }
-
- return true;
-}
-
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
{
u64 val = 0x8badf00d8badf00d;
@@ -169,7 +87,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
static u32 cache_levels;
/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
-#define CSSELR_MAX 12
+#define CSSELR_MAX 14
/* Which cache CCSIDR represents depends on CSSELR value. */
static u32 get_ccsidr(u32 csselr)
@@ -209,6 +127,24 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
return true;
}
+static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift)
+{
+ switch (r->aarch32_map) {
+ case AA32_LO:
+ *mask = GENMASK_ULL(31, 0);
+ *shift = 0;
+ break;
+ case AA32_HI:
+ *mask = GENMASK_ULL(63, 32);
+ *shift = 32;
+ break;
+ default:
+ *mask = GENMASK_ULL(63, 0);
+ *shift = 0;
+ break;
+ }
+}
+
/*
* Generic accessor for VM registers. Only called as long as HCR_TVM
* is set. If the guest enables the MMU, we stop trapping the VM
@@ -219,26 +155,21 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
bool was_enabled = vcpu_has_cache_enabled(vcpu);
- u64 val;
- int reg = r->reg;
+ u64 val, mask, shift;
BUG_ON(!p->is_write);
- /* See the 32bit mapping in kvm_host.h */
- if (p->is_aarch32)
- reg = r->reg / 2;
+ get_access_mask(r, &mask, &shift);
- if (!p->is_aarch32 || !p->is_32bit) {
- val = p->regval;
+ if (~mask) {
+ val = vcpu_read_sys_reg(vcpu, r->reg);
+ val &= ~mask;
} else {
- val = vcpu_read_sys_reg(vcpu, reg);
- if (r->reg % 2)
- val = (p->regval << 32) | (u64)lower_32_bits(val);
- else
- val = ((u64)upper_32_bits(val) << 32) |
- lower_32_bits(p->regval);
+ val = 0;
}
- vcpu_write_sys_reg(vcpu, val, reg);
+
+ val |= (p->regval & (mask >> shift)) << shift;
+ vcpu_write_sys_reg(vcpu, val, r->reg);
kvm_toggle_cache(vcpu, was_enabled);
return true;
@@ -248,17 +179,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
+ u64 mask, shift;
+
if (p->is_write)
return ignore_write(vcpu, p);
- p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1);
-
- if (p->is_aarch32) {
- if (r->Op2 & 2)
- p->regval = upper_32_bits(p->regval);
- else
- p->regval = lower_32_bits(p->regval);
- }
+ get_access_mask(r, &mask, &shift);
+ p->regval = (vcpu_read_sys_reg(vcpu, r->reg) & mask) >> shift;
return true;
}
@@ -285,7 +212,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
* equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure
* group.
*/
- if (p->is_aarch32) {
+ if (p->Op0 == 0) { /* AArch32 */
switch (p->Op1) {
default: /* Keep GCC quiet */
case 0: /* ICC_SGI1R */
@@ -296,7 +223,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
g1 = false;
break;
}
- } else {
+ } else { /* AArch64 */
switch (p->Op2) {
default: /* Keep GCC quiet */
case 5: /* ICC_SGI1R_EL1 */
@@ -438,26 +365,30 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
*/
static void reg_to_dbg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
+ const struct sys_reg_desc *rd,
u64 *dbg_reg)
{
- u64 val = p->regval;
+ u64 mask, shift, val;
- if (p->is_32bit) {
- val &= 0xffffffffUL;
- val |= ((*dbg_reg >> 32) << 32);
- }
+ get_access_mask(rd, &mask, &shift);
+ val = *dbg_reg;
+ val &= ~mask;
+ val |= (p->regval & (mask >> shift)) << shift;
*dbg_reg = val;
+
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
}
static void dbg_to_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
+ const struct sys_reg_desc *rd,
u64 *dbg_reg)
{
- p->regval = *dbg_reg;
- if (p->is_32bit)
- p->regval &= 0xffffffffUL;
+ u64 mask, shift;
+
+ get_access_mask(rd, &mask, &shift);
+ p->regval = (*dbg_reg & mask) >> shift;
}
static bool trap_bvr(struct kvm_vcpu *vcpu,
@@ -467,9 +398,9 @@ static bool trap_bvr(struct kvm_vcpu *vcpu,
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
if (p->is_write)
- reg_to_dbg(vcpu, p, dbg_reg);
+ reg_to_dbg(vcpu, p, rd, dbg_reg);
else
- dbg_to_reg(vcpu, p, dbg_reg);
+ dbg_to_reg(vcpu, p, rd, dbg_reg);
trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
@@ -509,9 +440,9 @@ static bool trap_bcr(struct kvm_vcpu *vcpu,
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
if (p->is_write)
- reg_to_dbg(vcpu, p, dbg_reg);
+ reg_to_dbg(vcpu, p, rd, dbg_reg);
else
- dbg_to_reg(vcpu, p, dbg_reg);
+ dbg_to_reg(vcpu, p, rd, dbg_reg);
trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
@@ -552,9 +483,9 @@ static bool trap_wvr(struct kvm_vcpu *vcpu,
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
if (p->is_write)
- reg_to_dbg(vcpu, p, dbg_reg);
+ reg_to_dbg(vcpu, p, rd, dbg_reg);
else
- dbg_to_reg(vcpu, p, dbg_reg);
+ dbg_to_reg(vcpu, p, rd, dbg_reg);
trace_trap_reg(__func__, rd->reg, p->is_write,
vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]);
@@ -595,9 +526,9 @@ static bool trap_wcr(struct kvm_vcpu *vcpu,
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
if (p->is_write)
- reg_to_dbg(vcpu, p, dbg_reg);
+ reg_to_dbg(vcpu, p, rd, dbg_reg);
else
- dbg_to_reg(vcpu, p, dbg_reg);
+ dbg_to_reg(vcpu, p, rd, dbg_reg);
trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
@@ -678,8 +609,9 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
{
u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0);
- bool enabled = (reg & flags) || vcpu_mode_priv(vcpu);
+ bool enabled = kvm_vcpu_has_pmu(vcpu);
+ enabled &= (reg & flags) || vcpu_mode_priv(vcpu);
if (!enabled)
kvm_inject_undefined(vcpu);
@@ -711,9 +643,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u64 val;
- if (!kvm_arm_pmu_v3_ready(vcpu))
- return trap_raz_wi(vcpu, p, r);
-
if (pmu_access_el0_disabled(vcpu))
return false;
@@ -740,9 +669,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- if (!kvm_arm_pmu_v3_ready(vcpu))
- return trap_raz_wi(vcpu, p, r);
-
if (pmu_access_event_counter_el0_disabled(vcpu))
return false;
@@ -761,9 +687,6 @@ static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u64 pmceid;
- if (!kvm_arm_pmu_v3_ready(vcpu))
- return trap_raz_wi(vcpu, p, r);
-
BUG_ON(p->is_write);
if (pmu_access_el0_disabled(vcpu))
@@ -794,10 +717,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- u64 idx;
-
- if (!kvm_arm_pmu_v3_ready(vcpu))
- return trap_raz_wi(vcpu, p, r);
+ u64 idx = ~0UL;
if (r->CRn == 9 && r->CRm == 13) {
if (r->Op2 == 2) {
@@ -813,8 +733,6 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
return false;
idx = ARMV8_PMU_CYCLE_IDX;
- } else {
- return false;
}
} else if (r->CRn == 0 && r->CRm == 9) {
/* PMCCNTR */
@@ -828,10 +746,11 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
return false;
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
- } else {
- return false;
}
+ /* Catch any decoding mistake */
+ WARN_ON(idx == ~0UL);
+
if (!pmu_counter_idx_valid(vcpu, idx))
return false;
@@ -852,9 +771,6 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u64 idx, reg;
- if (!kvm_arm_pmu_v3_ready(vcpu))
- return trap_raz_wi(vcpu, p, r);
-
if (pmu_access_el0_disabled(vcpu))
return false;
@@ -892,9 +808,6 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u64 val, mask;
- if (!kvm_arm_pmu_v3_ready(vcpu))
- return trap_raz_wi(vcpu, p, r);
-
if (pmu_access_el0_disabled(vcpu))
return false;
@@ -923,13 +836,8 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u64 mask = kvm_pmu_valid_counter_mask(vcpu);
- if (!kvm_arm_pmu_v3_ready(vcpu))
- return trap_raz_wi(vcpu, p, r);
-
- if (!vcpu_mode_priv(vcpu)) {
- kvm_inject_undefined(vcpu);
+ if (check_pmu_access_disabled(vcpu, 0))
return false;
- }
if (p->is_write) {
u64 val = p->regval & mask;
@@ -952,9 +860,6 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u64 mask = kvm_pmu_valid_counter_mask(vcpu);
- if (!kvm_arm_pmu_v3_ready(vcpu))
- return trap_raz_wi(vcpu, p, r);
-
if (pmu_access_el0_disabled(vcpu))
return false;
@@ -977,9 +882,6 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u64 mask;
- if (!kvm_arm_pmu_v3_ready(vcpu))
- return trap_raz_wi(vcpu, p, r);
-
if (!p->is_write)
return read_from_write_only(vcpu, p, r);
@@ -994,8 +896,10 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- if (!kvm_arm_pmu_v3_ready(vcpu))
- return trap_raz_wi(vcpu, p, r);
+ if (!kvm_vcpu_has_pmu(vcpu)) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
if (p->is_write) {
if (!vcpu_mode_priv(vcpu)) {
@@ -1038,8 +942,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{ SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \
access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
-static bool access_amu(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
{
kvm_inject_undefined(vcpu);
@@ -1047,33 +951,25 @@ static bool access_amu(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
}
/* Macro to expand the AMU counter and type registers*/
-#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), access_amu }
-#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), access_amu }
-#define AMU_AMEVCNTR1_EL0(n) { SYS_DESC(SYS_AMEVCNTR1_EL0(n)), access_amu }
-#define AMU_AMEVTYPER1_EL0(n) { SYS_DESC(SYS_AMEVTYPER1_EL0(n)), access_amu }
-
-static bool trap_ptrauth(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *rd)
-{
- /*
- * If we land here, that is because we didn't fixup the access on exit
- * by allowing the PtrAuth sysregs. The only way this happens is when
- * the guest does not have PtrAuth support enabled.
- */
- kvm_inject_undefined(vcpu);
-
- return false;
-}
+#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), undef_access }
+#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), undef_access }
+#define AMU_AMEVCNTR1_EL0(n) { SYS_DESC(SYS_AMEVCNTR1_EL0(n)), undef_access }
+#define AMU_AMEVTYPER1_EL0(n) { SYS_DESC(SYS_AMEVTYPER1_EL0(n)), undef_access }
static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
- return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN_USER | REG_HIDDEN_GUEST;
+ return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN;
}
+/*
+ * If we land here on a PtrAuth access, that is because we didn't
+ * fixup the access on exit by allowing the PtrAuth sysregs. The only
+ * way this happens is when the guest does not have PtrAuth support
+ * enabled.
+ */
#define __PTRAUTH_KEY(k) \
- { SYS_DESC(SYS_## k), trap_ptrauth, reset_unknown, k, \
+ { SYS_DESC(SYS_## k), undef_access, reset_unknown, k, \
.visibility = ptrauth_visibility}
#define PTRAUTH_KEY(k) \
@@ -1128,9 +1024,10 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
if (!vcpu_has_sve(vcpu))
val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT);
- if (!(val & (0xfUL << ID_AA64PFR0_CSV2_SHIFT)) &&
- arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED)
- val |= (1UL << ID_AA64PFR0_CSV2_SHIFT);
+ val &= ~(0xfUL << ID_AA64PFR0_CSV2_SHIFT);
+ val |= ((u64)vcpu->kvm->arch.pfr0_csv2 << ID_AA64PFR0_CSV2_SHIFT);
+ val &= ~(0xfUL << ID_AA64PFR0_CSV3_SHIFT);
+ val |= ((u64)vcpu->kvm->arch.pfr0_csv3 << ID_AA64PFR0_CSV3_SHIFT);
} else if (id == SYS_ID_AA64PFR1_EL1) {
val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT);
} else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) {
@@ -1139,10 +1036,15 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
(0xfUL << ID_AA64ISAR1_GPA_SHIFT) |
(0xfUL << ID_AA64ISAR1_GPI_SHIFT));
} else if (id == SYS_ID_AA64DFR0_EL1) {
+ u64 cap = 0;
+
/* Limit guests to PMUv3 for ARMv8.1 */
+ if (kvm_vcpu_has_pmu(vcpu))
+ cap = ID_AA64DFR0_PMUVER_8_1;
+
val = cpuid_feature_cap_perfmon_field(val,
ID_AA64DFR0_PMUVER_SHIFT,
- ID_AA64DFR0_PMUVER_8_1);
+ cap);
} else if (id == SYS_ID_DFR0_EL1) {
/* Limit guests to PMUv3 for ARMv8.1 */
val = cpuid_feature_cap_perfmon_field(val,
@@ -1153,6 +1055,22 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
return val;
}
+static unsigned int id_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
+ (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
+
+ switch (id) {
+ case SYS_ID_AA64ZFR0_EL1:
+ if (!vcpu_has_sve(vcpu))
+ return REG_RAZ;
+ break;
+ }
+
+ return 0;
+}
+
/* cpufeature ID register access trap handlers */
static bool __access_id_reg(struct kvm_vcpu *vcpu,
@@ -1171,7 +1089,9 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- return __access_id_reg(vcpu, p, r, false);
+ bool raz = sysreg_visible_as_raz(vcpu, r);
+
+ return __access_id_reg(vcpu, p, r, raz);
}
static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
@@ -1192,71 +1112,48 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
if (vcpu_has_sve(vcpu))
return 0;
- return REG_HIDDEN_USER | REG_HIDDEN_GUEST;
+ return REG_HIDDEN;
}
-/* Visibility overrides for SVE-specific ID registers */
-static unsigned int sve_id_visibility(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
-{
- if (vcpu_has_sve(vcpu))
- return 0;
-
- return REG_HIDDEN_USER;
-}
-
-/* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */
-static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu)
-{
- if (!vcpu_has_sve(vcpu))
- return 0;
-
- return read_sanitised_ftr_reg(SYS_ID_AA64ZFR0_EL1);
-}
-
-static bool access_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *rd)
-{
- if (p->is_write)
- return write_to_read_only(vcpu, p, rd);
-
- p->regval = guest_id_aa64zfr0_el1(vcpu);
- return true;
-}
-
-static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
-{
- u64 val;
-
- if (WARN_ON(!vcpu_has_sve(vcpu)))
- return -ENOENT;
-
- val = guest_id_aa64zfr0_el1(vcpu);
- return reg_to_user(uaddr, &val, reg->id);
-}
-
-static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
{
const u64 id = sys_reg_to_index(rd);
+ u8 csv2, csv3;
int err;
u64 val;
- if (WARN_ON(!vcpu_has_sve(vcpu)))
- return -ENOENT;
-
err = reg_from_user(&val, uaddr, id);
if (err)
return err;
- /* This is what we mean by invariant: you can't change it. */
- if (val != guest_id_aa64zfr0_el1(vcpu))
+ /*
+ * Allow AA64PFR0_EL1.CSV2 to be set from userspace as long as
+ * it doesn't promise more than what is actually provided (the
+ * guest could otherwise be covered in ectoplasmic residue).
+ */
+ csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV2_SHIFT);
+ if (csv2 > 1 ||
+ (csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED))
return -EINVAL;
+ /* Same thing for CSV3 */
+ csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV3_SHIFT);
+ if (csv3 > 1 ||
+ (csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED))
+ return -EINVAL;
+
+ /* We can only differ with CSV[23], and anything else is an error */
+ val ^= read_id_reg(vcpu, rd, false);
+ val &= ~((0xFUL << ID_AA64PFR0_CSV2_SHIFT) |
+ (0xFUL << ID_AA64PFR0_CSV3_SHIFT));
+ if (val)
+ return -EINVAL;
+
+ vcpu->kvm->arch.pfr0_csv2 = csv2;
+ vcpu->kvm->arch.pfr0_csv3 = csv3 ;
+
return 0;
}
@@ -1299,13 +1196,17 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu,
static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- return __get_id_reg(vcpu, rd, uaddr, false);
+ bool raz = sysreg_visible_as_raz(vcpu, rd);
+
+ return __get_id_reg(vcpu, rd, uaddr, raz);
}
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- return __set_id_reg(vcpu, rd, uaddr, false);
+ bool raz = sysreg_visible_as_raz(vcpu, rd);
+
+ return __set_id_reg(vcpu, rd, uaddr, raz);
}
static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
@@ -1345,10 +1246,6 @@ static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
int reg = r->reg;
- /* See the 32bit mapping in kvm_host.h */
- if (p->is_aarch32)
- reg = r->reg / 2;
-
if (p->is_write)
vcpu_write_sys_reg(vcpu, p->regval, reg);
else
@@ -1384,19 +1281,13 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return true;
}
-static bool access_mte_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
-{
- kvm_inject_undefined(vcpu);
- return false;
-}
-
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define ID_SANITISED(name) { \
SYS_DESC(SYS_##name), \
.access = access_id_reg, \
.get_user = get_id_reg, \
.set_user = set_id_reg, \
+ .visibility = id_visibility, \
}
/*
@@ -1514,11 +1405,12 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* AArch64 ID registers */
/* CRm=4 */
- ID_SANITISED(ID_AA64PFR0_EL1),
+ { SYS_DESC(SYS_ID_AA64PFR0_EL1), .access = access_id_reg,
+ .get_user = get_id_reg, .set_user = set_id_aa64pfr0_el1, },
ID_SANITISED(ID_AA64PFR1_EL1),
ID_UNALLOCATED(4,2),
ID_UNALLOCATED(4,3),
- { SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, .visibility = sve_id_visibility },
+ ID_SANITISED(ID_AA64ZFR0_EL1),
ID_UNALLOCATED(4,5),
ID_UNALLOCATED(4,6),
ID_UNALLOCATED(4,7),
@@ -1557,8 +1449,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
- { SYS_DESC(SYS_RGSR_EL1), access_mte_regs },
- { SYS_DESC(SYS_GCR_EL1), access_mte_regs },
+ { SYS_DESC(SYS_RGSR_EL1), undef_access },
+ { SYS_DESC(SYS_GCR_EL1), undef_access },
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
@@ -1584,8 +1476,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
- { SYS_DESC(SYS_TFSR_EL1), access_mte_regs },
- { SYS_DESC(SYS_TFSRE0_EL1), access_mte_regs },
+ { SYS_DESC(SYS_TFSR_EL1), undef_access },
+ { SYS_DESC(SYS_TFSRE0_EL1), undef_access },
{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
@@ -1621,6 +1513,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
{ SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
+ { SYS_DESC(SYS_SCXTNUM_EL1), undef_access },
+
{ SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
{ SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr },
@@ -1649,14 +1543,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
{ SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
- { SYS_DESC(SYS_AMCR_EL0), access_amu },
- { SYS_DESC(SYS_AMCFGR_EL0), access_amu },
- { SYS_DESC(SYS_AMCGCR_EL0), access_amu },
- { SYS_DESC(SYS_AMUSERENR_EL0), access_amu },
- { SYS_DESC(SYS_AMCNTENCLR0_EL0), access_amu },
- { SYS_DESC(SYS_AMCNTENSET0_EL0), access_amu },
- { SYS_DESC(SYS_AMCNTENCLR1_EL0), access_amu },
- { SYS_DESC(SYS_AMCNTENSET1_EL0), access_amu },
+ { SYS_DESC(SYS_SCXTNUM_EL0), undef_access },
+
+ { SYS_DESC(SYS_AMCR_EL0), undef_access },
+ { SYS_DESC(SYS_AMCFGR_EL0), undef_access },
+ { SYS_DESC(SYS_AMCGCR_EL0), undef_access },
+ { SYS_DESC(SYS_AMUSERENR_EL0), undef_access },
+ { SYS_DESC(SYS_AMCNTENCLR0_EL0), undef_access },
+ { SYS_DESC(SYS_AMCNTENSET0_EL0), undef_access },
+ { SYS_DESC(SYS_AMCNTENCLR1_EL0), undef_access },
+ { SYS_DESC(SYS_AMCNTENSET1_EL0), undef_access },
AMU_AMEVCNTR0_EL0(0),
AMU_AMEVCNTR0_EL0(1),
AMU_AMEVCNTR0_EL0(2),
@@ -1820,66 +1716,27 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
}
}
-static bool trap_debug32(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *r)
-{
- if (p->is_write) {
- vcpu_cp14(vcpu, r->reg) = p->regval;
- vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
- } else {
- p->regval = vcpu_cp14(vcpu, r->reg);
- }
-
- return true;
-}
-
-/* AArch32 debug register mappings
+/*
+ * AArch32 debug register mappings
*
* AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0]
* AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32]
*
- * All control registers and watchpoint value registers are mapped to
- * the lower 32 bits of their AArch64 equivalents. We share the trap
- * handlers with the above AArch64 code which checks what mode the
- * system is in.
+ * None of the other registers share their location, so treat them as
+ * if they were 64bit.
*/
-
-static bool trap_xvr(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *rd)
-{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
-
- if (p->is_write) {
- u64 val = *dbg_reg;
-
- val &= 0xffffffffUL;
- val |= p->regval << 32;
- *dbg_reg = val;
-
- vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
- } else {
- p->regval = *dbg_reg >> 32;
- }
-
- trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
-
- return true;
-}
-
-#define DBG_BCR_BVR_WCR_WVR(n) \
- /* DBGBVRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \
- /* DBGBCRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \
- /* DBGWVRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \
- /* DBGWCRn */ \
+#define DBG_BCR_BVR_WCR_WVR(n) \
+ /* DBGBVRn */ \
+ { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \
+ /* DBGBCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \
+ /* DBGWVRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \
+ /* DBGWCRn */ \
{ Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n }
-#define DBGBXVR(n) \
- { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n }
+#define DBGBXVR(n) \
+ { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_bvr, NULL, n }
/*
* Trapped cp14 registers. We generally ignore most of the external
@@ -1897,9 +1754,9 @@ static const struct sys_reg_desc cp14_regs[] = {
{ Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi },
DBG_BCR_BVR_WCR_WVR(1),
/* DBGDCCINT */
- { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT },
+ { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug_regs, NULL, MDCCINT_EL1 },
/* DBGDSCRext */
- { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext },
+ { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug_regs, NULL, MDSCR_EL1 },
DBG_BCR_BVR_WCR_WVR(2),
/* DBGDTR[RT]Xint */
{ Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi },
@@ -1914,7 +1771,7 @@ static const struct sys_reg_desc cp14_regs[] = {
{ Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi },
DBG_BCR_BVR_WCR_WVR(6),
/* DBGVCR */
- { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR },
+ { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug_regs, NULL, DBGVCR32_EL2 },
DBG_BCR_BVR_WCR_WVR(7),
DBG_BCR_BVR_WCR_WVR(8),
DBG_BCR_BVR_WCR_WVR(9),
@@ -2000,19 +1857,29 @@ static const struct sys_reg_desc cp14_64_regs[] = {
*/
static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr },
- { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
- { Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr },
- { Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr },
- { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
- { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
- { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
- { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
- { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
- { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
- { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR },
- { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR },
- { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
- { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, SCTLR_EL1 },
+ /* ACTLR */
+ { AA32(LO), Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr, NULL, ACTLR_EL1 },
+ /* ACTLR2 */
+ { AA32(HI), Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr, NULL, ACTLR_EL1 },
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 },
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, TTBR1_EL1 },
+ /* TTBCR */
+ { AA32(LO), Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, TCR_EL1 },
+ /* TTBCR2 */
+ { AA32(HI), Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, TCR_EL1 },
+ { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, DACR32_EL2 },
+ /* DFSR */
+ { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, ESR_EL1 },
+ { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, IFSR32_EL2 },
+ /* ADFSR */
+ { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, AFSR0_EL1 },
+ /* AIFSR */
+ { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, AFSR1_EL1 },
+ /* DFAR */
+ { AA32(LO), Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, FAR_EL1 },
+ /* IFAR */
+ { AA32(HI), Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, FAR_EL1 },
/*
* DC{C,I,CI}SW operations:
@@ -2038,15 +1905,19 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten },
{ Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs },
- { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
- { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
- { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
- { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
+ /* PRRR/MAIR0 */
+ { AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 },
+ /* NMRR/MAIR1 */
+ { AA32(HI), Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, MAIR_EL1 },
+ /* AMAIR0 */
+ { AA32(LO), Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, AMAIR_EL1 },
+ /* AMAIR1 */
+ { AA32(HI), Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, AMAIR_EL1 },
/* ICC_SRE */
{ Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
- { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+ { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, CONTEXTIDR_EL1 },
/* Arch Tmers */
{ SYS_DESC(SYS_AARCH32_CNTP_TVAL), access_arch_timer },
@@ -2121,14 +1992,14 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr },
{ Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr },
- { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, c0_CSSELR },
+ { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, CSSELR_EL1 },
};
static const struct sys_reg_desc cp15_64_regs[] = {
- { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+ { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 },
{ Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
- { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+ { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 },
{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
{ Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
{ SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer },
@@ -2185,7 +2056,7 @@ static void perform_access(struct kvm_vcpu *vcpu,
trace_kvm_sys_access(*vcpu_pc(vcpu), params, r);
/* Check for regs disabled by runtime config */
- if (sysreg_hidden_from_guest(vcpu, r)) {
+ if (sysreg_hidden(vcpu, r)) {
kvm_inject_undefined(vcpu);
return;
}
@@ -2199,7 +2070,7 @@ static void perform_access(struct kvm_vcpu *vcpu,
/* Skip instruction if instructed so */
if (likely(r->access(vcpu, params, r)))
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ kvm_incr_pc(vcpu);
}
/*
@@ -2272,8 +2143,6 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
int Rt = kvm_vcpu_sys_get_rt(vcpu);
int Rt2 = (esr >> 10) & 0x1f;
- params.is_aarch32 = true;
- params.is_32bit = false;
params.CRm = (esr >> 1) & 0xf;
params.is_write = ((esr & 1) == 0);
@@ -2323,8 +2192,6 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
u32 esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
- params.is_aarch32 = true;
- params.is_32bit = true;
params.CRm = (esr >> 1) & 0xf;
params.regval = vcpu_get_reg(vcpu, Rt);
params.is_write = ((esr & 1) == 0);
@@ -2418,8 +2285,6 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
trace_kvm_handle_sys_reg(esr);
- params.is_aarch32 = false;
- params.is_32bit = false;
params.Op0 = (esr >> 20) & 3;
params.Op1 = (esr >> 14) & 0x7;
params.CRn = (esr >> 10) & 0xf;
@@ -2684,7 +2549,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
return get_invariant_sys_reg(reg->id, uaddr);
/* Check for regs disabled by runtime config */
- if (sysreg_hidden_from_user(vcpu, r))
+ if (sysreg_hidden(vcpu, r))
return -ENOENT;
if (r->get_user)
@@ -2709,7 +2574,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
return set_invariant_sys_reg(reg->id, uaddr);
/* Check for regs disabled by runtime config */
- if (sysreg_hidden_from_user(vcpu, r))
+ if (sysreg_hidden(vcpu, r))
return -ENOENT;
if (r->set_user)
@@ -2780,7 +2645,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
if (!(rd->reg || rd->get_user))
return 0;
- if (sysreg_hidden_from_user(vcpu, rd))
+ if (sysreg_hidden(vcpu, rd))
return 0;
if (!copy_reg_to_user(rd, uind))
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 5a6fc30f5989..9d0621417c2a 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -19,14 +19,18 @@ struct sys_reg_params {
u8 Op2;
u64 regval;
bool is_write;
- bool is_aarch32;
- bool is_32bit; /* Only valid if is_aarch32 is true */
};
struct sys_reg_desc {
/* Sysreg string for debug */
const char *name;
+ enum {
+ AA32_ZEROHIGH,
+ AA32_LO,
+ AA32_HI,
+ } aarch32_map;
+
/* MRS/MSR instruction which accesses it. */
u8 Op0;
u8 Op1;
@@ -59,8 +63,8 @@ struct sys_reg_desc {
const struct sys_reg_desc *rd);
};
-#define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */
-#define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */
+#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */
+#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */
static __printf(2, 3)
inline void print_sys_reg_msg(const struct sys_reg_params *p,
@@ -111,22 +115,22 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r
__vcpu_sys_reg(vcpu, r->reg) = r->val;
}
-static inline bool sysreg_hidden_from_guest(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *r)
+static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
{
if (likely(!r->visibility))
return false;
- return r->visibility(vcpu, r) & REG_HIDDEN_GUEST;
+ return r->visibility(vcpu, r) & REG_HIDDEN;
}
-static inline bool sysreg_hidden_from_user(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *r)
+static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
{
if (likely(!r->visibility))
return false;
- return r->visibility(vcpu, r) & REG_HIDDEN_USER;
+ return r->visibility(vcpu, r) & REG_RAZ;
}
static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
@@ -153,6 +157,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id,
const struct sys_reg_desc table[],
unsigned int num);
+#define AA32(_x) .aarch32_map = AA32_##_x
#define Op0(_x) .Op0 = _x
#define Op1(_x) .Op1 = _x
#define CRn(_x) .CRn = _x
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index e0404bcab019..4130b72e6891 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -11,6 +11,7 @@
#include <asm/debug-monitors.h>
#include <asm/insn.h>
#include <asm/kvm_mmu.h>
+#include <asm/memory.h>
/*
* The LSB of the HYP VA tag
@@ -131,21 +132,16 @@ void __init kvm_update_va_mask(struct alt_instr *alt,
}
}
-void *__kvm_bp_vect_base;
-int __kvm_harden_el2_vector_slot;
-
void kvm_patch_vector_branch(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst)
{
u64 addr;
u32 insn;
- BUG_ON(nr_inst != 5);
+ BUG_ON(nr_inst != 4);
- if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
- WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS));
+ if (!cpus_have_const_cap(ARM64_SPECTRE_V3A) || WARN_ON_ONCE(has_vhe()))
return;
- }
/*
* Compute HYP VA by using the same computation as kern_hyp_va()
@@ -163,15 +159,6 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
*/
addr += KVM_VECTOR_PREAMBLE;
- /* stp x0, x1, [sp, #-16]! */
- insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0,
- AARCH64_INSN_REG_1,
- AARCH64_INSN_REG_SP,
- -16,
- AARCH64_INSN_VARIANT_64BIT,
- AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX);
- *updptr++ = cpu_to_le32(insn);
-
/* movz x0, #(addr & 0xffff) */
insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
(u16)addr,
@@ -201,3 +188,58 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
AARCH64_INSN_BRANCH_NOLINK);
*updptr++ = cpu_to_le32(insn);
}
+
+static void generate_mov_q(u64 val, __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ u32 insn, oinsn, rd;
+
+ BUG_ON(nr_inst != 4);
+
+ /* Compute target register */
+ oinsn = le32_to_cpu(*origptr);
+ rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn);
+
+ /* movz rd, #(val & 0xffff) */
+ insn = aarch64_insn_gen_movewide(rd,
+ (u16)val,
+ 0,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_ZERO);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movk rd, #((val >> 16) & 0xffff), lsl #16 */
+ insn = aarch64_insn_gen_movewide(rd,
+ (u16)(val >> 16),
+ 16,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_KEEP);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movk rd, #((val >> 32) & 0xffff), lsl #32 */
+ insn = aarch64_insn_gen_movewide(rd,
+ (u16)(val >> 32),
+ 32,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_KEEP);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movk rd, #((val >> 48) & 0xffff), lsl #48 */
+ insn = aarch64_insn_gen_movewide(rd,
+ (u16)(val >> 48),
+ 48,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_KEEP);
+ *updptr++ = cpu_to_le32(insn);
+}
+
+void kvm_update_kimg_phys_offset(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst);
+}
+
+void kvm_get_kimage_voffset(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ generate_mov_q(kimage_voffset, origptr, updptr, nr_inst);
+}
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index 2f92bdcb1188..07d5271e9f05 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -268,8 +268,6 @@ int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
params.regval = *reg;
params.is_write = is_write;
- params.is_aarch32 = false;
- params.is_32bit = false;
if (find_reg_by_id(sysreg, &params, gic_v3_icc_reg_descs,
ARRAY_SIZE(gic_v3_icc_reg_descs)))
@@ -288,8 +286,6 @@ int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id,
if (is_write)
params.regval = *reg;
params.is_write = is_write;
- params.is_aarch32 = false;
- params.is_32bit = false;
r = find_reg_by_id(sysreg, &params, gic_v3_icc_reg_descs,
ARRAY_SIZE(gic_v3_icc_reg_descs));
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 1c0f3e02f731..ca692a815731 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1444,11 +1444,28 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
free_empty_tables(start, end, PAGE_OFFSET, PAGE_END);
}
+static bool inside_linear_region(u64 start, u64 size)
+{
+ /*
+ * Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)]
+ * accommodating both its ends but excluding PAGE_END. Max physical
+ * range which can be mapped inside this linear mapping range, must
+ * also be derived from its end points.
+ */
+ return start >= __pa(_PAGE_OFFSET(vabits_actual)) &&
+ (start + size - 1) <= __pa(PAGE_END - 1);
+}
+
int arch_add_memory(int nid, u64 start, u64 size,
struct mhp_params *params)
{
int ret, flags = 0;
+ if (!inside_linear_region(start, size)) {
+ pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size);
+ return -EINVAL;
+ }
+
if (rodata_full || debug_pagealloc_enabled())
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
diff --git a/arch/csky/kernel/perf_regs.c b/arch/csky/kernel/perf_regs.c
index eb32838b8210..09b7f88a2d6a 100644
--- a/arch/csky/kernel/perf_regs.c
+++ b/arch/csky/kernel/perf_regs.c
@@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task)
}
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ed4fcccf8a9..7b25548ec42b 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1336,7 +1336,7 @@ static void dump_trace_imc_data(struct perf_event *event)
/* If this is a valid record, create the sample */
struct perf_output_handle handle;
- if (perf_output_begin(&handle, event, header.size))
+ if (perf_output_begin(&handle, &data, event, header.size))
return;
perf_output_sample(&handle, &header, &data, event);
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index 8e53f2fc3fe0..6f681b105eec 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -144,8 +144,7 @@ u64 perf_reg_abi(struct task_struct *task)
}
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
diff --git a/arch/riscv/kernel/perf_regs.c b/arch/riscv/kernel/perf_regs.c
index 04a38fbeb9c7..fd304a248de6 100644
--- a/arch/riscv/kernel/perf_regs.c
+++ b/arch/riscv/kernel/perf_regs.c
@@ -36,8 +36,7 @@ u64 perf_reg_abi(struct task_struct *task)
}
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 4f9e4626df55..00255ae3979d 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -672,7 +672,7 @@ static void cpumsf_output_event_pid(struct perf_event *event,
rcu_read_lock();
perf_prepare_sample(&header, data, event, regs);
- if (perf_output_begin(&handle, event, header.size))
+ if (perf_output_begin(&handle, data, event, header.size))
goto out;
/* Update the process ID (see also kernel/events/core.c) */
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index 4352a504f235..6e9e5d5e927e 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -53,8 +53,7 @@ u64 perf_reg_abi(struct task_struct *task)
}
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
/*
* Use the regs from the first interruption and let
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index 5393e13e07e0..2bbf28cf3aa9 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -33,7 +33,13 @@ do { \
} while (0)
#ifdef CONFIG_3_LEVEL_PGTABLES
-#define __pmd_free_tlb(tlb,x, address) tlb_remove_page((tlb),virt_to_page(x))
+
+#define __pmd_free_tlb(tlb, pmd, address) \
+do { \
+ pgtable_pmd_page_dtor(virt_to_page(pmd)); \
+ tlb_remove_page((tlb),virt_to_page(pmd)); \
+} while (0) \
+
#endif
#endif
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index f1926e9f2143..af457f8cb29d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2630,7 +2630,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
u64 pebs_enabled = cpuc->pebs_enabled;
handled++;
- x86_pmu.drain_pebs(regs);
+ x86_pmu.drain_pebs(regs, &data);
status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
/*
@@ -4987,6 +4987,12 @@ __init int intel_pmu_init(void)
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
+ if (version >= 5) {
+ x86_pmu.intel_cap.anythread_deprecated = edx.split.anythread_deprecated;
+ if (x86_pmu.intel_cap.anythread_deprecated)
+ pr_cont(" AnyThread deprecated, ");
+ }
+
/*
* Install the hw-cache-events table:
*/
@@ -5512,6 +5518,10 @@ __init int intel_pmu_init(void)
x86_pmu.intel_ctrl |=
((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
+ /* AnyThread may be deprecated on arch perfmon v5 or later */
+ if (x86_pmu.intel_cap.anythread_deprecated)
+ x86_pmu.format_attrs = intel_arch_formats_attr;
+
if (x86_pmu.event_constraints) {
/*
* event on fixed counter2 (REF_CYCLES) only works on this
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 404315df1e16..b47cc4226934 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -642,8 +642,8 @@ int intel_pmu_drain_bts_buffer(void)
rcu_read_lock();
perf_prepare_sample(&header, &data, event, &regs);
- if (perf_output_begin(&handle, event, header.size *
- (top - base - skip)))
+ if (perf_output_begin(&handle, &data, event,
+ header.size * (top - base - skip)))
goto unlock;
for (at = base; at < top; at++) {
@@ -670,7 +670,9 @@ unlock:
static inline void intel_pmu_drain_pebs_buffer(void)
{
- x86_pmu.drain_pebs(NULL);
+ struct perf_sample_data data;
+
+ x86_pmu.drain_pebs(NULL, &data);
}
/*
@@ -1719,23 +1721,24 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
return 0;
}
-static void __intel_pmu_pebs_event(struct perf_event *event,
- struct pt_regs *iregs,
- void *base, void *top,
- int bit, int count,
- void (*setup_sample)(struct perf_event *,
- struct pt_regs *,
- void *,
- struct perf_sample_data *,
- struct pt_regs *))
+static __always_inline void
+__intel_pmu_pebs_event(struct perf_event *event,
+ struct pt_regs *iregs,
+ struct perf_sample_data *data,
+ void *base, void *top,
+ int bit, int count,
+ void (*setup_sample)(struct perf_event *,
+ struct pt_regs *,
+ void *,
+ struct perf_sample_data *,
+ struct pt_regs *))
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- struct perf_sample_data data;
struct x86_perf_regs perf_regs;
struct pt_regs *regs = &perf_regs.regs;
void *at = get_next_pebs_record_by_bit(base, top, bit);
- struct pt_regs dummy_iregs;
+ static struct pt_regs dummy_iregs;
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
/*
@@ -1752,14 +1755,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
iregs = &dummy_iregs;
while (count > 1) {
- setup_sample(event, iregs, at, &data, regs);
- perf_event_output(event, &data, regs);
+ setup_sample(event, iregs, at, data, regs);
+ perf_event_output(event, data, regs);
at += cpuc->pebs_record_size;
at = get_next_pebs_record_by_bit(at, top, bit);
count--;
}
- setup_sample(event, iregs, at, &data, regs);
+ setup_sample(event, iregs, at, data, regs);
if (iregs == &dummy_iregs) {
/*
* The PEBS records may be drained in the non-overflow context,
@@ -1767,18 +1770,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* last record the same as other PEBS records, and doesn't
* invoke the generic overflow handler.
*/
- perf_event_output(event, &data, regs);
+ perf_event_output(event, data, regs);
} else {
/*
* All but the last records are processed.
* The last one is left to be able to call the overflow handler.
*/
- if (perf_event_overflow(event, &data, regs))
+ if (perf_event_overflow(event, data, regs))
x86_pmu_stop(event, 0);
}
}
-static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
+static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct debug_store *ds = cpuc->ds;
@@ -1812,7 +1815,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
return;
}
- __intel_pmu_pebs_event(event, iregs, at, top, 0, n,
+ __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
setup_pebs_fixed_sample_data);
}
@@ -1835,7 +1838,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int
}
}
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct debug_store *ds = cpuc->ds;
@@ -1942,14 +1945,14 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
}
if (counts[bit]) {
- __intel_pmu_pebs_event(event, iregs, base,
+ __intel_pmu_pebs_event(event, iregs, data, base,
top, bit, counts[bit],
setup_pebs_fixed_sample_data);
}
}
}
-static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs)
+static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
{
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1997,7 +2000,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs)
if (WARN_ON_ONCE(!event->attr.precise_ip))
continue;
- __intel_pmu_pebs_event(event, iregs, base,
+ __intel_pmu_pebs_event(event, iregs, data, base,
top, bit, counts[bit],
setup_pebs_adaptive_sample_data);
}
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 39e632ed6ca9..bbd1120ae161 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -475,7 +475,7 @@ enum perf_snb_uncore_imc_freerunning_types {
static struct freerunning_counters snb_uncore_imc_freerunning[] = {
[SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
0x0, 0x0, 1, 32 },
- [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE,
+ [SNB_PCI_UNCORE_IMC_DATA_WRITES] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE,
0x0, 0x0, 1, 32 },
[SNB_PCI_UNCORE_IMC_GT_REQUESTS] = { SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE,
0x0, 0x0, 1, 32 },
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ee2b9b9fc2a5..6a8edfe59b09 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -585,6 +585,7 @@ union perf_capabilities {
u64 pebs_baseline:1;
u64 perf_metrics:1;
u64 pebs_output_pt_available:1;
+ u64 anythread_deprecated:1;
};
u64 capabilities;
};
@@ -727,7 +728,7 @@ struct x86_pmu {
int pebs_record_size;
int pebs_buffer_size;
int max_pebs_events;
- void (*drain_pebs)(struct pt_regs *regs);
+ void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
unsigned long large_pebs_flags;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d44858b69353..324ddd7fd0aa 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -639,6 +639,7 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
+ unsigned long cr3_lm_rsvd_bits;
int maxphyaddr;
int max_tdp_level;
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 6960cd6d1f23..b9a7fd0a27e2 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -137,7 +137,9 @@ union cpuid10_edx {
struct {
unsigned int num_counters_fixed:5;
unsigned int bit_width_fixed:8;
- unsigned int reserved:19;
+ unsigned int reserved1:2;
+ unsigned int anythread_deprecated:1;
+ unsigned int reserved2:16;
} split;
unsigned int full;
};
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 172d3e4a9e4b..648eb23fe7f0 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -2,14 +2,8 @@
#ifndef _ASM_X86_UV_UV_H
#define _ASM_X86_UV_UV_H
-#include <asm/tlbflush.h>
-
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC};
-struct cpumask;
-struct mm_struct;
-struct flush_tlb_info;
-
#ifdef CONFIG_X86_UV
#include <linux/efi.h>
@@ -44,10 +38,6 @@ static inline int is_uv_system(void) { return 0; }
static inline int is_uv_hubbed(int uv) { return 0; }
static inline void uv_cpu_init(void) { }
static inline void uv_system_init(void) { }
-static inline const struct cpumask *
-uv_flush_tlb_others(const struct cpumask *cpumask,
- const struct flush_tlb_info *info)
-{ return cpumask; }
#endif /* X86_UV */
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 3115caa7d7d0..1b98f8c12b96 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -33,7 +33,7 @@ static union uvh_apicid uvh_apicid;
static int uv_node_id;
/* Unpack AT/OEM/TABLE ID's to be NULL terminated strings */
-static u8 uv_archtype[UV_AT_SIZE];
+static u8 uv_archtype[UV_AT_SIZE + 1];
static u8 oem_id[ACPI_OEM_ID_SIZE + 1];
static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
@@ -320,7 +320,7 @@ static int __init decode_arch_type(unsigned long ptr)
if (n > 0 && n < sizeof(uv_ate->archtype)) {
pr_info("UV: UVarchtype received from BIOS\n");
- uv_stringify(UV_AT_SIZE, uv_archtype, uv_ate->archtype);
+ uv_stringify(sizeof(uv_archtype), uv_archtype, uv_ate->archtype);
return 1;
}
return 0;
@@ -378,7 +378,7 @@ static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id)
if (!early_get_arch_type())
/* If not use OEM ID for UVarchtype */
- uv_stringify(UV_AT_SIZE, uv_archtype, _oem_id);
+ uv_stringify(sizeof(uv_archtype), uv_archtype, oem_id);
/* Check if not hubbed */
if (strncmp(uv_archtype, "SGI", 3) != 0) {
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index bb7e1132290b..f9e5352b3bef 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -101,8 +101,7 @@ u64 perf_reg_abi(struct task_struct *task)
}
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
@@ -129,12 +128,20 @@ u64 perf_reg_abi(struct task_struct *task)
return PERF_SAMPLE_REGS_ABI_64;
}
+static DEFINE_PER_CPU(struct pt_regs, nmi_user_regs);
+
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
+ struct pt_regs *regs_user_copy = this_cpu_ptr(&nmi_user_regs);
struct pt_regs *user_regs = task_pt_regs(current);
+ if (!in_nmi()) {
+ regs_user->regs = user_regs;
+ regs_user->abi = perf_reg_abi(current);
+ return;
+ }
+
/*
* If we're in an NMI that interrupted task_pt_regs setup, then
* we can't sample user regs at all. This check isn't really
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 06a278b3701d..83637a2ff605 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -90,6 +90,20 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
return 0;
}
+void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+
+ /*
+ * save the feature bitmap to avoid cpuid lookup for every PV
+ * operation
+ */
+ if (best)
+ vcpu->arch.pv_cpuid.features = best->eax;
+}
+
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -124,13 +138,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
- /*
- * save the feature bitmap to avoid cpuid lookup for every PV
- * operation
- */
- if (best)
- vcpu->arch.pv_cpuid.features = best->eax;
-
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
if (best)
@@ -162,6 +169,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.guest_supported_xcr0 =
(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
+ kvm_update_pv_runtime(vcpu);
+
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
kvm_mmu_reset_context(vcpu);
@@ -169,6 +178,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.cr4_guest_rsvd_bits =
__cr4_reserved_bits(guest_cpuid_has, vcpu);
+ vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
+
/* Invoke the vendor callback only after the above state is updated. */
kvm_x86_ops.vcpu_after_set_cpuid(vcpu);
}
@@ -672,7 +683,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS);
edx.split.bit_width_fixed = cap.bit_width_fixed;
- edx.split.reserved = 0;
+ edx.split.anythread_deprecated = 1;
+ edx.split.reserved1 = 0;
+ edx.split.reserved2 = 0;
entry->eax = eax.full;
entry->ebx = cap.events_mask;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index bf8577947ed2..f7a6e8f83783 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -11,6 +11,7 @@ extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
void kvm_set_cpu_caps(void);
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
+void kvm_update_pv_runtime(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0d917eb70319..56cae1ff9e3f 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4046,6 +4046,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
+{
+ /* emulating clflushopt regardless of cpuid */
+ return X86EMUL_CONTINUE;
+}
+
static int em_movsxd(struct x86_emulate_ctxt *ctxt)
{
ctxt->dst.val = (s32) ctxt->src.val;
@@ -4585,7 +4591,7 @@ static const struct opcode group11[] = {
};
static const struct gprefix pfx_0f_ae_7 = {
- I(SrcMem | ByteOp, em_clflush), N, N, N,
+ I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
};
static const struct group_dual group15 = { {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 1f96adff8dc4..5bb1939b65d8 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -856,12 +856,14 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
} else {
rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
- while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
- desc = desc->more;
+ while (desc->sptes[PTE_LIST_EXT-1]) {
count += PTE_LIST_EXT;
- }
- if (desc->sptes[PTE_LIST_EXT-1]) {
- desc->more = mmu_alloc_pte_list_desc(vcpu);
+
+ if (!desc->more) {
+ desc->more = mmu_alloc_pte_list_desc(vcpu);
+ desc = desc->more;
+ break;
+ }
desc = desc->more;
}
for (i = 0; desc->sptes[i]; ++i)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 27e381c9da6c..ff28a5c6abd6 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -49,7 +49,14 @@ bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
{
struct kvm_mmu_page *sp;
+ if (!kvm->arch.tdp_mmu_enabled)
+ return false;
+ if (WARN_ON(!VALID_PAGE(hpa)))
+ return false;
+
sp = to_shadow_page(hpa);
+ if (WARN_ON(!sp))
+ return false;
return sp->tdp_mmu_page && sp->root_count;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2f32fd09e259..1e81cfebd491 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3741,6 +3741,7 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_cpuid_entry2 *best;
vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
boot_cpu_has(X86_FEATURE_XSAVE) &&
@@ -3753,6 +3754,13 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
/* Check again if INVPCID interception if required */
svm_check_invpcid(svm);
+ /* For sev guests, the memory encryption bit is not reserved in CR3. */
+ if (sev_guest(vcpu->kvm)) {
+ best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0);
+ if (best)
+ vcpu->arch.cr3_lm_rsvd_bits &= ~(1UL << (best->ebx & 0x3f));
+ }
+
if (!kvm_vcpu_apicv_active(vcpu))
return;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f5ede41bf9e6..078a39d489fe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -255,11 +255,10 @@ static struct kmem_cache *x86_emulator_cache;
/*
* When called, it means the previous get/set msr reached an invalid msr.
- * Return 0 if we want to ignore/silent this failed msr access, or 1 if we want
- * to fail the caller.
+ * Return true if we want to ignore/silent this failed msr access.
*/
-static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
- u64 data, bool write)
+static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
+ u64 data, bool write)
{
const char *op = write ? "wrmsr" : "rdmsr";
@@ -268,11 +267,11 @@ static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
op, msr, data);
/* Mask the error */
- return 0;
+ return true;
} else {
kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
op, msr, data);
- return -ENOENT;
+ return false;
}
}
@@ -1042,7 +1041,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
}
if (is_long_mode(vcpu) &&
- (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
+ (cr3 & vcpu->arch.cr3_lm_rsvd_bits))
return 1;
else if (is_pae_paging(vcpu) &&
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
@@ -1416,7 +1415,8 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
if (r == KVM_MSR_RET_INVALID) {
/* Unconditionally clear the output for simplicity */
*data = 0;
- r = kvm_msr_ignored_check(vcpu, index, 0, false);
+ if (kvm_msr_ignored_check(vcpu, index, 0, false))
+ r = 0;
}
if (r)
@@ -1540,7 +1540,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
struct msr_data msr;
if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
- return -EPERM;
+ return KVM_MSR_RET_FILTERED;
switch (index) {
case MSR_FS_BASE:
@@ -1581,7 +1581,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
if (ret == KVM_MSR_RET_INVALID)
- ret = kvm_msr_ignored_check(vcpu, index, data, true);
+ if (kvm_msr_ignored_check(vcpu, index, data, true))
+ ret = 0;
return ret;
}
@@ -1599,7 +1600,7 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
int ret;
if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
- return -EPERM;
+ return KVM_MSR_RET_FILTERED;
msr.index = index;
msr.host_initiated = host_initiated;
@@ -1618,7 +1619,8 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
if (ret == KVM_MSR_RET_INVALID) {
/* Unconditionally clear *data for simplicity */
*data = 0;
- ret = kvm_msr_ignored_check(vcpu, index, 0, false);
+ if (kvm_msr_ignored_check(vcpu, index, 0, false))
+ ret = 0;
}
return ret;
@@ -1662,9 +1664,9 @@ static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
static u64 kvm_msr_reason(int r)
{
switch (r) {
- case -ENOENT:
+ case KVM_MSR_RET_INVALID:
return KVM_MSR_EXIT_REASON_UNKNOWN;
- case -EPERM:
+ case KVM_MSR_RET_FILTERED:
return KVM_MSR_EXIT_REASON_FILTER;
default:
return KVM_MSR_EXIT_REASON_INVAL;
@@ -1965,7 +1967,7 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
struct kvm_arch *ka = &vcpu->kvm->arch;
if (vcpu->vcpu_id == 0 && !host_initiated) {
- if (ka->boot_vcpu_runs_old_kvmclock && old_msr)
+ if (ka->boot_vcpu_runs_old_kvmclock != old_msr)
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
ka->boot_vcpu_runs_old_kvmclock = old_msr;
@@ -3063,9 +3065,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* Values other than LBR and BTF are vendor-specific,
thus reserved and should throw a #GP */
return 1;
- }
- vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
- __func__, data);
+ } else if (report_ignored_msrs)
+ vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
+ __func__, data);
break;
case 0x200 ... 0x2ff:
return kvm_mtrr_set_msr(vcpu, msr, data);
@@ -3463,29 +3465,63 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.efer;
break;
case MSR_KVM_WALL_CLOCK:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
+ return 1;
+
+ msr_info->data = vcpu->kvm->arch.wall_clock;
+ break;
case MSR_KVM_WALL_CLOCK_NEW:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
+ return 1;
+
msr_info->data = vcpu->kvm->arch.wall_clock;
break;
case MSR_KVM_SYSTEM_TIME:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
+ return 1;
+
+ msr_info->data = vcpu->arch.time;
+ break;
case MSR_KVM_SYSTEM_TIME_NEW:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
+ return 1;
+
msr_info->data = vcpu->arch.time;
break;
case MSR_KVM_ASYNC_PF_EN:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+ return 1;
+
msr_info->data = vcpu->arch.apf.msr_en_val;
break;
case MSR_KVM_ASYNC_PF_INT:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
+ return 1;
+
msr_info->data = vcpu->arch.apf.msr_int_val;
break;
case MSR_KVM_ASYNC_PF_ACK:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+ return 1;
+
msr_info->data = 0;
break;
case MSR_KVM_STEAL_TIME:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
+ return 1;
+
msr_info->data = vcpu->arch.st.msr_val;
break;
case MSR_KVM_PV_EOI_EN:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
+ return 1;
+
msr_info->data = vcpu->arch.pv_eoi.msr_val;
break;
case MSR_KVM_POLL_CONTROL:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
+ return 1;
+
msr_info->data = vcpu->arch.msr_kvm_poll_control;
break;
case MSR_IA32_P5_MC_ADDR:
@@ -4575,6 +4611,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
vcpu->arch.pv_cpuid.enforce = cap->args[0];
+ if (vcpu->arch.pv_cpuid.enforce)
+ kvm_update_pv_runtime(vcpu);
return 0;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 3900ab0c6004..e7ca622a468f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -376,7 +376,13 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
-#define KVM_MSR_RET_INVALID 2
+/*
+ * Internal error codes that are used to indicate that MSR emulation encountered
+ * an error that should result in #GP in the guest, unless userspace
+ * handles it.
+ */
+#define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */
+#define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */
#define __cr4_reserved_bits(__cpu_has, __c) \
({ \