diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-22 13:52:44 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-22 13:52:44 -0700 |
commit | b372115311942202346d93849991f07382783ef1 (patch) | |
tree | 14d52554acb0cdba1774be95d3877c47fda8bbff /arch | |
parent | 5bed49adfe899667887db0739830190309c9011b (diff) | |
parent | 0027ff2a75f9dcf0537ac0a65c5840b0e21a4950 (diff) | |
download | lwn-b372115311942202346d93849991f07382783ef1.tar.gz lwn-b372115311942202346d93849991f07382783ef1.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull second set of KVM updates from Paolo Bonzini:
"ARM:
- Support for Group0 interrupts in guests
- Cache management optimizations for ARMv8.4 systems
- Userspace interface for RAS
- Fault path optimization
- Emulated physical timer fixes
- Random cleanups
x86:
- fixes for L1TF
- a new test case
- non-support for SGX (inject the right exception in the guest)
- fix lockdep false positive"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (49 commits)
KVM: VMX: fixes for vmentry_l1d_flush module parameter
kvm: selftest: add dirty logging test
kvm: selftest: pass in extra memory when create vm
kvm: selftest: include the tools headers
kvm: selftest: unify the guest port macros
tools: introduce test_and_clear_bit
KVM: x86: SVM: Call x86_spec_ctrl_set_guest/host() with interrupts disabled
KVM: vmx: Inject #UD for SGX ENCLS instruction in guest
KVM: vmx: Add defines for SGX ENCLS exiting
x86/kvm/vmx: Fix coding style in vmx_setup_l1d_flush()
x86: kvm: avoid unused variable warning
KVM: Documentation: rename the capability of KVM_CAP_ARM_SET_SERROR_ESR
KVM: arm/arm64: Skip updating PTE entry if no change
KVM: arm/arm64: Skip updating PMD entry if no change
KVM: arm: Use true and false for boolean values
KVM: arm/arm64: vgic: Do not use spin_lock_irqsave/restore with irq disabled
KVM: arm/arm64: vgic: Move DEBUG_SPINLOCK_BUG_ON to vgic.h
KVM: arm: vgic-v3: Add support for ICC_SGI0R and ICC_ASGI1R accesses
KVM: arm64: vgic-v3: Add support for ICC_SGI0R_EL1 and ICC_ASGI1R_EL1 accesses
KVM: arm/arm64: vgic-v3: Add core support for Group0 SGIs
...
Diffstat (limited to 'arch')
26 files changed, 359 insertions, 79 deletions
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index fe2fb1ddd771..77121b713bef 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -107,9 +107,19 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr; } +static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr &= ~HCR_TWE; +} + +static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr |= HCR_TWE; +} + static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) { - return 1; + return true; } static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 1f1fe4109b02..79906cecb091 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -216,6 +216,11 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); unsigned long kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); +int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); + +int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 8553d68b7c8a..265ea9cf7df7 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -75,17 +75,9 @@ phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); -static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd) -{ - *pmd = new_pmd; - dsb(ishst); -} - -static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) -{ - *pte = new_pte; - dsb(ishst); -} +#define kvm_mk_pmd(ptep) __pmd(__pa(ptep) | PMD_TYPE_TABLE) +#define kvm_mk_pud(pmdp) __pud(__pa(pmdp) | PMD_TYPE_TABLE) +#define kvm_mk_pgd(pudp) ({ BUILD_BUG(); 0; }) static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 16e006f708ca..4602464ebdfb 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -27,6 +27,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -125,6 +126,18 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 3a02e76699a6..450c7a4fbc8a 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -246,6 +246,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, const struct coproc_reg *r) { u64 reg; + bool g1; if (!p->is_write) return read_from_write_only(vcpu, p); @@ -253,7 +254,25 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, reg = (u64)*vcpu_reg(vcpu, p->Rt2) << 32; reg |= *vcpu_reg(vcpu, p->Rt1) ; - vgic_v3_dispatch_sgi(vcpu, reg); + /* + * In a system where GICD_CTLR.DS=1, a ICC_SGI0R access generates + * Group0 SGIs only, while ICC_SGI1R can generate either group, + * depending on the SGI configuration. ICC_ASGI1R is effectively + * equivalent to ICC_SGI0R, as there is no "alternative" secure + * group. + */ + switch (p->Op1) { + default: /* Keep GCC quiet */ + case 0: /* ICC_SGI1R */ + g1 = true; + break; + case 1: /* ICC_ASGI1R */ + case 2: /* ICC_SGI0R */ + g1 = false; + break; + } + + vgic_v3_dispatch_sgi(vcpu, reg, g1); return true; } @@ -459,6 +478,10 @@ static const struct coproc_reg cp15_regs[] = { /* ICC_SGI1R */ { CRm64(12), Op1( 0), is64, access_gic_sgi}, + /* ICC_ASGI1R */ + { CRm64(12), Op1( 1), is64, access_gic_sgi}, + /* ICC_SGI0R */ + { CRm64(12), Op1( 2), is64, access_gic_sgi}, /* VBAR: swapped by interrupt.S. */ { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index a18f33edc471..2b8de885b2bf 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -261,6 +261,29 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return -EINVAL; } + +int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA); + + return 0; +} + +int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + bool serror_pending = events->exception.serror_pending; + bool has_esr = events->exception.serror_has_esr; + + if (serror_pending && has_esr) + return -EINVAL; + else if (serror_pending) + kvm_inject_vabt(vcpu); + + return 0; +} + int __attribute_const__ kvm_target_cpu(void) { switch (read_cpuid_part()) { diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index be3bf3d08916..ae1f70450fb2 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -50,7 +50,8 @@ #define ARM64_HW_DBM 29 #define ARM64_SSBD 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 +#define ARM64_HAS_STAGE2_FWB 32 -#define ARM64_NCAPS 32 +#define ARM64_NCAPS 33 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 6dd285e979c9..aa45df752a16 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -23,6 +23,7 @@ #include <asm/types.h> /* Hyp Configuration Register (HCR) bits */ +#define HCR_FWB (UL(1) << 46) #define HCR_TEA (UL(1) << 37) #define HCR_TERR (UL(1) << 36) #define HCR_TLOR (UL(1) << 35) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 0c97e45d1dc3..6106a85ae0be 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -63,6 +63,8 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) /* trap error record accesses */ vcpu->arch.hcr_el2 |= HCR_TERR; } + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + vcpu->arch.hcr_el2 |= HCR_FWB; if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) vcpu->arch.hcr_el2 &= ~HCR_RW; @@ -81,6 +83,21 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr_el2; } +static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 &= ~HCR_TWE; +} + +static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 |= HCR_TWE; +} + +static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vsesr_el2; +} + static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) { vcpu->arch.vsesr_el2 = vsesr; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index fe8777b12f86..f26055f2306e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -350,6 +350,11 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); + +int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); @@ -378,16 +383,23 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int kvm_perf_init(void); int kvm_perf_teardown(void); +void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); + struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); -void __kvm_set_tpidr_el2(u64 tpidr_el2); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { - u64 tpidr_el2; + /* + * Calculate the raw per-cpu offset without a translation from the + * kernel's mapping to the linear mapping, and store it in tpidr_el2 + * so that we can use adr_l to access per-cpu variables in EL2. + */ + u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_cpu_state) - + (u64)kvm_ksym_ref(kvm_host_cpu_state)); /* * Call initialization code, and switch to the full blown HYP code. @@ -396,17 +408,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, * cpus_have_const_cap() wrapper. */ BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); - __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); - - /* - * Calculate the raw per-cpu offset without a translation from the - * kernel's mapping to the linear mapping, and store it in tpidr_el2 - * so that we can use adr_l to access per-cpu variables in EL2. - */ - tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state) - - (u64)kvm_ksym_ref(kvm_host_cpu_state); - - kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2); + __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); } static inline bool kvm_arch_check_sve_has_vhe(void) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index fb9a7127bb75..d6fff7de5539 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -169,8 +169,12 @@ phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); -#define kvm_set_pte(ptep, pte) set_pte(ptep, pte) -#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) +#define kvm_mk_pmd(ptep) \ + __pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE) +#define kvm_mk_pud(pmdp) \ + __pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE) +#define kvm_mk_pgd(pudp) \ + __pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE) static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { @@ -267,6 +271,15 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) { void *va = page_address(pfn_to_page(pfn)); + /* + * With FWB, we ensure that the guest always accesses memory using + * cacheable attributes, and we don't have to clean to PoC when + * faulting in pages. Furthermore, FWB implies IDC, so cleaning to + * PoU is not required either in this case. + */ + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + return; + kvm_flush_dcache_to_poc(va, size); } @@ -287,20 +300,26 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, static inline void __kvm_flush_dcache_pte(pte_t pte) { - struct page *page = pte_page(pte); - kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE); + if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { + struct page *page = pte_page(pte); + kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE); + } } static inline void __kvm_flush_dcache_pmd(pmd_t pmd) { - struct page *page = pmd_page(pmd); - kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE); + if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { + struct page *page = pmd_page(pmd); + kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE); + } } static inline void __kvm_flush_dcache_pud(pud_t pud) { - struct page *page = pud_page(pud); - kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE); + if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { + struct page *page = pud_page(pud); + kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE); + } } #define kvm_virt_to_phys(x) __pa_symbol(x) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 49d99214f43c..b96442960aea 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -155,6 +155,13 @@ #define MT_S2_NORMAL 0xf #define MT_S2_DEVICE_nGnRE 0x1 +/* + * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001 + * Stage-2 enforces Normal-WB and Device-nGnRE + */ +#define MT_S2_FWB_NORMAL 6 +#define MT_S2_FWB_DEVICE_nGnRE 1 + #ifdef CONFIG_ARM64_4K_PAGES #define IOREMAP_MAX_ORDER (PUD_SHIFT) #else diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 108ecad7acc5..78b942c1bea4 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -67,8 +67,28 @@ #define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) -#define PAGE_S2 __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN) -#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN) +#define PAGE_S2_MEMATTR(attr) \ + ({ \ + u64 __val; \ + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) \ + __val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \ + else \ + __val = PTE_S2_MEMATTR(MT_S2_ ## attr); \ + __val; \ + }) + +#define PAGE_S2_XN \ + ({ \ + u64 __val; \ + if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) \ + __val = 0; \ + else \ + __val = PTE_S2_XN; \ + __val; \ + }) + +#define PAGE_S2 __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN) +#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PAGE_S2_XN) #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e205ec8489e9..c1470931b897 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -314,6 +314,8 @@ #define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) #define SYS_ICC_RPR_EL1 sys_reg(3, 0, 12, 11, 3) #define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) +#define SYS_ICC_ASGI1R_EL1 sys_reg(3, 0, 12, 11, 6) +#define SYS_ICC_SGI0R_EL1 sys_reg(3, 0, 12, 11, 7) #define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) #define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) #define SYS_ICC_HPPIR1_EL1 sys_reg(3, 0, 12, 12, 2) @@ -579,6 +581,7 @@ #define ID_AA64MMFR1_VMIDBITS_16 2 /* id_aa64mmfr2 */ +#define ID_AA64MMFR2_FWB_SHIFT 40 #define ID_AA64MMFR2_AT_SHIFT 32 #define ID_AA64MMFR2_LVA_SHIFT 16 #define ID_AA64MMFR2_IESB_SHIFT 12 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 4e76630dd655..97c3478ee6e7 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -39,6 +39,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -154,6 +155,18 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 611e8921c3d4..e238b7932096 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -192,6 +192,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), @@ -1026,6 +1027,14 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) } #endif +static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) +{ + u64 val = read_sysreg_s(SYS_CLIDR_EL1); + + /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */ + WARN_ON(val & (7 << 27 | 7 << 21)); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1182,6 +1191,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_dic, }, + { + .desc = "Stage-2 Force Write-Back", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_HAS_STAGE2_FWB, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_FWB_SHIFT, + .min_field_value = 1, + .matches = has_cpuid_feature, + .cpu_enable = cpu_has_fwb, + }, #ifdef CONFIG_ARM64_HW_AFDBM { /* diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index cdd4d9d6d575..07256b08226c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -289,6 +289,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return -EINVAL; } +int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE); + events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); + + if (events->exception.serror_pending && events->exception.serror_has_esr) + events->exception.serror_esr = vcpu_get_vsesr(vcpu); + + return 0; +} + +int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + bool serror_pending = events->exception.serror_pending; + bool has_esr = events->exception.serror_has_esr; + + if (serror_pending && has_esr) { + if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) + return -EINVAL; + + if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK)) + kvm_set_sei_esr(vcpu, events->exception.serror_esr); + else + return -EINVAL; + } else if (serror_pending) { + kvm_inject_vabt(vcpu); + } + + return 0; +} + int __attribute_const__ kvm_target_cpu(void) { unsigned long implementor = read_cpuid_implementor(); diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 6fd91b31a131..ea9225160786 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -57,6 +57,7 @@ __invalid: * x0: HYP pgd * x1: HYP stack * x2: HYP vectors + * x3: per-CPU offset */ __do_hyp_init: /* Check for a stub HVC call */ @@ -119,9 +120,8 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE) mov sp, x1 msr vbar_el2, x2 - /* copy tpidr_el1 into tpidr_el2 for use by HYP */ - mrs x1, tpidr_el1 - msr tpidr_el2, x1 + /* Set tpidr_el2 for use by HYP */ + msr tpidr_el2, x3 /* Hello, World! */ eret diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 35bc16832efe..9ce223944983 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -288,8 +288,3 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) vcpu->arch.sysregs_loaded_on_cpu = false; } - -void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2) -{ - asm("msr tpidr_el2, %0": : "r" (tpidr_el2)); -} diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index d8e71659ba7e..a55e91dfcf8f 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -164,9 +164,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) inject_undef64(vcpu); } -static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr) +void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr) { - vcpu_set_vsesr(vcpu, esr); + vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK); *vcpu_hcr(vcpu) |= HCR_VSE; } @@ -184,5 +184,5 @@ static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr) */ void kvm_inject_vabt(struct kvm_vcpu *vcpu) { - pend_guest_serror(vcpu, ESR_ELx_ISV); + kvm_set_sei_esr(vcpu, ESR_ELx_ISV); } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 4e4aedaf7ab7..e37c78bbe1ca 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -77,8 +77,12 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_PMU_V3: r = kvm_arm_support_pmu_v3(); break; + case KVM_CAP_ARM_INJECT_SERROR_ESR: + r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); + break; case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: + case KVM_CAP_VCPU_EVENTS: r = 1; break; default: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a4363735d3f8..22fbbdbece3c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -194,7 +194,16 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, if (!p->is_write) return read_from_write_only(vcpu, p, r); - kvm_set_way_flush(vcpu); + /* + * Only track S/W ops if we don't have FWB. It still indicates + * that the guest is a bit broken (S/W operations should only + * be done by firmware, knowing that there is only a single + * CPU left in the system, and certainly not from non-secure + * software). + */ + if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + kvm_set_way_flush(vcpu); + return true; } @@ -243,10 +252,43 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { + bool g1; + if (!p->is_write) return read_from_write_only(vcpu, p, r); - vgic_v3_dispatch_sgi(vcpu, p->regval); + /* + * In a system where GICD_CTLR.DS=1, a ICC_SGI0R_EL1 access generates + * Group0 SGIs only, while ICC_SGI1R_EL1 can generate either group, + * depending on the SGI configuration. ICC_ASGI1R_EL1 is effectively + * equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure + * group. + */ + if (p->is_aarch32) { + switch (p->Op1) { + default: /* Keep GCC quiet */ + case 0: /* ICC_SGI1R */ + g1 = true; + break; + case 1: /* ICC_ASGI1R */ + case 2: /* ICC_SGI0R */ + g1 = false; + break; + } + } else { + switch (p->Op2) { + default: /* Keep GCC quiet */ + case 5: /* ICC_SGI1R_EL1 */ + g1 = true; + break; + case 6: /* ICC_ASGI1R_EL1 */ + case 7: /* ICC_SGI0R_EL1 */ + g1 = false; + break; + } + } + + vgic_v3_dispatch_sgi(vcpu, p->regval, g1); return true; } @@ -1303,6 +1345,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only }, { SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, + { SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi }, + { SYS_DESC(SYS_ICC_SGI0R_EL1), access_gic_sgi }, { SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only }, { SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only }, @@ -1613,8 +1657,6 @@ static const struct sys_reg_desc cp14_64_regs[] = { * register). */ static const struct sys_reg_desc cp15_regs[] = { - { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, @@ -1737,8 +1779,10 @@ static const struct sys_reg_desc cp15_regs[] = { static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr }, - { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, + { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */ { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, + { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ + { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval }, }; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 95f9107449bf..9527ba5d62da 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -74,6 +74,7 @@ #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 #define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000 #define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 #define SECONDARY_EXEC_XSAVES 0x00100000 @@ -213,6 +214,8 @@ enum vmcs_field { VMWRITE_BITMAP_HIGH = 0x00002029, XSS_EXIT_BITMAP = 0x0000202C, XSS_EXIT_BITMAP_HIGH = 0x0000202D, + ENCLS_EXITING_BITMAP = 0x0000202E, + ENCLS_EXITING_BITMAP_HIGH = 0x0000202F, TSC_MULTIPLIER = 0x00002032, TSC_MULTIPLIER_HIGH = 0x00002033, GUEST_PHYSICAL_ADDRESS = 0x00002400, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 73e27a98456f..6276140044d0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5586,8 +5586,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) clgi(); - local_irq_enable(); - /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if * it's non-zero. Since vmentry is serialising on affected CPUs, there @@ -5596,6 +5594,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) */ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); + local_irq_enable(); + asm volatile ( "push %%" _ASM_BP "; \n\t" "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" @@ -5718,12 +5718,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); - reload_tss(vcpu); local_irq_disable(); + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); + vcpu->arch.cr2 = svm->vmcb->save.cr2; vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1519f030fd73..8dae47e7267a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -198,12 +198,14 @@ static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_ static const struct { const char *option; - enum vmx_l1d_flush_state cmd; + bool for_parse; } vmentry_l1d_param[] = { - {"auto", VMENTER_L1D_FLUSH_AUTO}, - {"never", VMENTER_L1D_FLUSH_NEVER}, - {"cond", VMENTER_L1D_FLUSH_COND}, - {"always", VMENTER_L1D_FLUSH_ALWAYS}, + [VMENTER_L1D_FLUSH_AUTO] = {"auto", true}, + [VMENTER_L1D_FLUSH_NEVER] = {"never", true}, + [VMENTER_L1D_FLUSH_COND] = {"cond", true}, + [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true}, + [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false}, + [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false}, }; #define L1D_CACHE_ORDER 4 @@ -219,15 +221,15 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) return 0; } - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { - u64 msr; + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { + u64 msr; - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); - if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; - return 0; - } - } + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); + if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; + return 0; + } + } /* If set to auto use the default l1tf mitigation method */ if (l1tf == VMENTER_L1D_FLUSH_AUTO) { @@ -287,8 +289,9 @@ static int vmentry_l1d_flush_parse(const char *s) if (s) { for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { - if (sysfs_streq(s, vmentry_l1d_param[i].option)) - return vmentry_l1d_param[i].cmd; + if (vmentry_l1d_param[i].for_parse && + sysfs_streq(s, vmentry_l1d_param[i].option)) + return i; } } return -EINVAL; @@ -298,13 +301,13 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) { int l1tf, ret; - if (!boot_cpu_has(X86_BUG_L1TF)) - return 0; - l1tf = vmentry_l1d_flush_parse(s); if (l1tf < 0) return l1tf; + if (!boot_cpu_has(X86_BUG_L1TF)) + return 0; + /* * Has vmx_init() run already? If not then this is the pre init * parameter parsing. In that case just store the value and let @@ -324,6 +327,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) { + if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param))) + return sprintf(s, "???\n"); + return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); } @@ -1684,6 +1690,12 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; } +static inline bool cpu_has_vmx_encls_vmexit(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENCLS_EXITING; +} + /* * Comment's format: document - errata name - stepping - processor name. * Refer from @@ -4551,7 +4563,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_RDRAND_EXITING | SECONDARY_EXEC_ENABLE_PML | SECONDARY_EXEC_TSC_SCALING | - SECONDARY_EXEC_ENABLE_VMFUNC; + SECONDARY_EXEC_ENABLE_VMFUNC | + SECONDARY_EXEC_ENCLS_EXITING; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -6648,6 +6661,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); } + + if (cpu_has_vmx_encls_vmexit()) + vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); } static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -9314,6 +9330,17 @@ fail: return 1; } +static int handle_encls(struct kvm_vcpu *vcpu) +{ + /* + * SGX virtualization is not yet supported. There is no software + * enable bit for SGX, so we have to trap ENCLS and inject a #UD + * to prevent the guest from executing ENCLS. + */ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -9371,6 +9398,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_INVPCID] = handle_invpcid, [EXIT_REASON_VMFUNC] = handle_vmfunc, [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, + [EXIT_REASON_ENCLS] = handle_encls, }; static const int kvm_vmx_max_exit_handlers = @@ -9741,6 +9769,9 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) case EXIT_REASON_VMFUNC: /* VM functions are emulated through L2->L0 vmexits. */ return false; + case EXIT_REASON_ENCLS: + /* SGX is never exposed to L1 */ + return false; default: return true; } @@ -12101,6 +12132,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) vmcs_write64(APIC_ACCESS_ADDR, -1ull); + if (exec_control & SECONDARY_EXEC_ENCLS_EXITING) + vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4a74a7cf0a8b..506bd2b4b8bb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6576,14 +6576,12 @@ static void kvm_set_mmio_spte_mask(void) /* Set the present bit. */ mask |= 1ull; -#ifdef CONFIG_X86_64 /* * If reserved bit is not supported, clear the present bit to disable * mmio page fault. */ - if (maxphyaddr == 52) + if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52) mask &= ~1ull; -#endif kvm_mmu_set_mmio_spte_mask(mask, mask); } |