diff options
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm.c | 510 |
1 files changed, 247 insertions, 263 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a3e32d61d60c..851e9cc79930 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -48,6 +48,7 @@ #include <asm/kvm_para.h> #include <asm/irq_remapping.h> #include <asm/spec-ctrl.h> +#include <asm/cpu_device_id.h> #include <asm/virtext.h> #include "trace.h" @@ -57,11 +58,13 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +#ifdef MODULE static const struct x86_cpu_id svm_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_SVM), + X86_MATCH_FEATURE(X86_FEATURE_SVM, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); +#endif #define IOPM_ALLOC_ORDER 2 #define MSRPM_ALLOC_ORDER 1 @@ -519,10 +522,31 @@ static void recalc_intercepts(struct vcpu_svm *svm) h = &svm->nested.hsave->control; g = &svm->nested; - c->intercept_cr = h->intercept_cr | g->intercept_cr; - c->intercept_dr = h->intercept_dr | g->intercept_dr; - c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; - c->intercept = h->intercept | g->intercept; + c->intercept_cr = h->intercept_cr; + c->intercept_dr = h->intercept_dr; + c->intercept_exceptions = h->intercept_exceptions; + c->intercept = h->intercept; + + if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { + /* We only want the cr8 intercept bits of L1 */ + c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ); + c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE); + + /* + * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not + * affect any interrupt we may want to inject; therefore, + * interrupt window vmexits are irrelevant to L0. + */ + c->intercept &= ~(1ULL << INTERCEPT_VINTR); + } + + /* We don't want to see VMMCALLs from a nested guest */ + c->intercept &= ~(1ULL << INTERCEPT_VMMCALL); + + c->intercept_cr |= g->intercept_cr; + c->intercept_dr |= g->intercept_dr; + c->intercept_exceptions |= g->intercept_exceptions; + c->intercept |= g->intercept; } static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm) @@ -627,6 +651,11 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit) recalc_intercepts(svm); } +static inline bool is_intercept(struct vcpu_svm *svm, int bit) +{ + return (svm->vmcb->control.intercept & (1ULL << bit)) != 0; +} + static inline bool vgif_enabled(struct vcpu_svm *svm) { return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK); @@ -1005,33 +1034,32 @@ static void svm_cpu_uninit(int cpu) static int svm_cpu_init(int cpu) { struct svm_cpu_data *sd; - int r; sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); if (!sd) return -ENOMEM; sd->cpu = cpu; - r = -ENOMEM; sd->save_area = alloc_page(GFP_KERNEL); if (!sd->save_area) - goto err_1; + goto free_cpu_data; if (svm_sev_enabled()) { - r = -ENOMEM; sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1, sizeof(void *), GFP_KERNEL); if (!sd->sev_vmcbs) - goto err_1; + goto free_save_area; } per_cpu(svm_data, cpu) = sd; return 0; -err_1: +free_save_area: + __free_page(sd->save_area); +free_cpu_data: kfree(sd); - return r; + return -ENOMEM; } @@ -1207,6 +1235,7 @@ static int avic_ga_log_notifier(u32 ga_tag) u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag); pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id); + trace_kvm_avic_ga_log(vm_id, vcpu_id); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) { @@ -1350,6 +1379,47 @@ static __init void svm_adjust_mmio_mask(void) kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); } +static void svm_hardware_teardown(void) +{ + int cpu; + + if (svm_sev_enabled()) { + bitmap_free(sev_asid_bitmap); + bitmap_free(sev_reclaim_asid_bitmap); + + sev_flush_asids(); + } + + for_each_possible_cpu(cpu) + svm_cpu_uninit(cpu); + + __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); + iopm_base = 0; +} + +static __init void svm_set_cpu_caps(void) +{ + kvm_set_cpu_caps(); + + supported_xss = 0; + + /* CPUID 0x80000001 and 0x8000000A (SVM features) */ + if (nested) { + kvm_cpu_cap_set(X86_FEATURE_SVM); + + if (nrips) + kvm_cpu_cap_set(X86_FEATURE_NRIPS); + + if (npt_enabled) + kvm_cpu_cap_set(X86_FEATURE_NPT); + } + + /* CPUID 0x80000008 */ + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) || + boot_cpu_has(X86_FEATURE_AMD_SSBD)) + kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD); +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1368,6 +1438,8 @@ static __init int svm_hardware_setup(void) init_msrpm_offsets(); + supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); + if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); @@ -1415,16 +1487,11 @@ static __init int svm_hardware_setup(void) if (!boot_cpu_has(X86_FEATURE_NPT)) npt_enabled = false; - if (npt_enabled && !npt) { - printk(KERN_INFO "kvm: Nested Paging disabled\n"); + if (npt_enabled && !npt) npt_enabled = false; - } - if (npt_enabled) { - printk(KERN_INFO "kvm: Nested Paging enabled\n"); - kvm_enable_tdp(); - } else - kvm_disable_tdp(); + kvm_configure_mmu(npt_enabled, PT_PDPE_LEVEL); + pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); if (nrips) { if (!boot_cpu_has(X86_FEATURE_NRIPS)) @@ -1460,32 +1527,15 @@ static __init int svm_hardware_setup(void) pr_info("Virtual GIF supported\n"); } + svm_set_cpu_caps(); + return 0; err: - __free_pages(iopm_pages, IOPM_ALLOC_ORDER); - iopm_base = 0; + svm_hardware_teardown(); return r; } -static __exit void svm_hardware_unsetup(void) -{ - int cpu; - - if (svm_sev_enabled()) { - bitmap_free(sev_asid_bitmap); - bitmap_free(sev_reclaim_asid_bitmap); - - sev_flush_asids(); - } - - for_each_possible_cpu(cpu) - svm_cpu_uninit(cpu); - - __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); - iopm_base = 0; -} - static void init_seg(struct vmcb_seg *seg) { seg->selector = 0; @@ -1933,32 +1983,11 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) static void __unregister_enc_region_locked(struct kvm *kvm, struct enc_region *region) { - /* - * The guest may change the memory encryption attribute from C=0 -> C=1 - * or vice versa for this memory range. Lets make sure caches are - * flushed to ensure that guest data gets written into memory with - * correct C-bit. - */ - sev_clflush_pages(region->pages, region->npages); - sev_unpin_memory(kvm, region->pages, region->npages); list_del(®ion->list); kfree(region); } -static struct kvm *svm_vm_alloc(void) -{ - struct kvm_svm *kvm_svm = __vmalloc(sizeof(struct kvm_svm), - GFP_KERNEL_ACCOUNT | __GFP_ZERO, - PAGE_KERNEL); - return &kvm_svm->kvm; -} - -static void svm_vm_free(struct kvm *kvm) -{ - vfree(to_kvm_svm(kvm)); -} - static void sev_vm_destroy(struct kvm *kvm) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; @@ -1971,6 +2000,13 @@ static void sev_vm_destroy(struct kvm *kvm) mutex_lock(&kvm->lock); /* + * Ensure that all guest tagged cache entries are flushed before + * releasing the pages back to the system for use. CLFLUSH will + * not do this, so issue a WBINVD. + */ + wbinvd_on_all_cpus(); + + /* * if userspace was terminated before unregistering the memory regions * then lets unpin all the registered memory. */ @@ -2175,7 +2211,6 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; - vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; svm->virt_spec_ctrl = 0; @@ -2187,7 +2222,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) } init_vmcb(svm); - kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true); + kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, false); kvm_rdx_write(vcpu, eax); if (kvm_vcpu_apicv_active(vcpu) && !init_event) @@ -2197,8 +2232,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static int avic_init_vcpu(struct vcpu_svm *svm) { int ret; + struct kvm_vcpu *vcpu = &svm->vcpu; - if (!kvm_vcpu_apicv_active(&svm->vcpu)) + if (!avic || !irqchip_in_kernel(vcpu->kvm)) return 0; ret = avic_init_backing_page(&svm->vcpu); @@ -2266,6 +2302,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) init_vmcb(svm); svm_init_osvw(vcpu); + vcpu->arch.microcode_version = 0x01000065; return 0; @@ -2419,14 +2456,38 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } +static inline void svm_enable_vintr(struct vcpu_svm *svm) +{ + struct vmcb_control_area *control; + + /* The following fields are ignored when AVIC is enabled */ + WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu)); + + /* + * This is just a dummy VINTR to actually cause a vmexit to happen. + * Actual injection of virtual interrupts happens through EVENTINJ. + */ + control = &svm->vmcb->control; + control->int_vector = 0x0; + control->int_ctl &= ~V_INTR_PRIO_MASK; + control->int_ctl |= V_IRQ_MASK | + ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); + mark_dirty(svm->vmcb, VMCB_INTR); +} + static void svm_set_vintr(struct vcpu_svm *svm) { set_intercept(svm, INTERCEPT_VINTR); + if (is_intercept(svm, INTERCEPT_VINTR)) + svm_enable_vintr(svm); } static void svm_clear_vintr(struct vcpu_svm *svm) { clr_intercept(svm, INTERCEPT_VINTR); + + svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; + mark_dirty(svm->vmcb, VMCB_INTR); } static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) @@ -2982,15 +3043,6 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) return pdpte; } -static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, - unsigned long root) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - svm->vmcb->control.nested_cr3 = __sme_set(root); - mark_dirty(svm->vmcb, VMCB_NPT); -} - static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, struct x86_exception *fault) { @@ -3026,8 +3078,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) vcpu->arch.mmu = &vcpu->arch.guest_mmu; kvm_init_shadow_mmu(vcpu); - vcpu->arch.mmu->set_cr3 = nested_svm_set_tdp_cr3; - vcpu->arch.mmu->get_cr3 = nested_svm_get_tdp_cr3; + vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3; vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr; vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit; vcpu->arch.mmu->shadow_root_level = get_npt_level(vcpu); @@ -3088,43 +3139,36 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, return vmexit; } -/* This function returns true if it is save to enable the irq window */ -static inline bool nested_svm_intr(struct vcpu_svm *svm) +static void nested_svm_intr(struct vcpu_svm *svm) { - if (!is_guest_mode(&svm->vcpu)) - return true; - - if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) - return true; - - if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) - return false; - - /* - * if vmexit was already requested (by intercepted exception - * for instance) do not overwrite it with "external interrupt" - * vmexit. - */ - if (svm->nested.exit_required) - return false; - svm->vmcb->control.exit_code = SVM_EXIT_INTR; svm->vmcb->control.exit_info_1 = 0; svm->vmcb->control.exit_info_2 = 0; - if (svm->nested.intercept & 1ULL) { - /* - * The #vmexit can't be emulated here directly because this - * code path runs with irqs and preemption disabled. A - * #vmexit emulation might sleep. Only signal request for - * the #vmexit here. - */ - svm->nested.exit_required = true; - trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); - return false; + /* nested_svm_vmexit this gets called afterwards from handle_exit */ + svm->nested.exit_required = true; + trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); +} + +static bool nested_exit_on_intr(struct vcpu_svm *svm) +{ + return (svm->nested.intercept & 1ULL); +} + +static int svm_check_nested_events(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + bool block_nested_events = + kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required; + + if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) { + if (block_nested_events) + return -EBUSY; + nested_svm_intr(svm); + return 0; } - return true; + return 0; } /* This function returns true if it is save to enable the nmi window */ @@ -3243,9 +3287,6 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) return NESTED_EXIT_CONTINUE; } -/* - * If this function returns true, this #vmexit was already handled - */ static int nested_svm_intercept(struct vcpu_svm *svm) { u32 exit_code = svm->vmcb->control.exit_code; @@ -3520,6 +3561,9 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) static bool nested_vmcb_checks(struct vmcb *vmcb) { + if ((vmcb->save.efer & EFER_SVME) == 0) + return false; + if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0) return false; @@ -3536,6 +3580,10 @@ static bool nested_vmcb_checks(struct vmcb *vmcb) static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, struct vmcb *nested_vmcb, struct kvm_host_map *map) { + bool evaluate_pending_interrupts = + is_intercept(svm, INTERCEPT_VINTR) || + is_intercept(svm, INTERCEPT_IRET); + if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) svm->vcpu.arch.hflags |= HF_HIF_MASK; else @@ -3595,15 +3643,6 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, else svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; - if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { - /* We only want the cr8 intercept bits of the guest */ - clr_cr_intercept(svm, INTERCEPT_CR8_READ); - clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); - } - - /* We don't want to see VMMCALLs from a nested guest */ - clr_intercept(svm, INTERCEPT_VMMCALL); - svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset; svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset; @@ -3631,7 +3670,21 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, svm->nested.vmcb = vmcb_gpa; + /* + * If L1 had a pending IRQ/NMI before executing VMRUN, + * which wasn't delivered because it was disallowed (e.g. + * interrupts disabled), L0 needs to evaluate if this pending + * event should cause an exit from L2 to L1 or be delivered + * directly to L2. + * + * Usually this would be handled by the processor noticing an + * IRQ/NMI window request. However, VMRUN can unblock interrupts + * by implicitly setting GIF, so force L0 to perform pending event + * evaluation by requesting a KVM_REQ_EVENT. + */ enable_gif(svm); + if (unlikely(evaluate_pending_interrupts)) + kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); mark_all_dirty(svm->vmcb); } @@ -3833,11 +3886,8 @@ static int clgi_interception(struct vcpu_svm *svm) disable_gif(svm); /* After a CLGI no interrupts should come */ - if (!kvm_vcpu_apicv_active(&svm->vcpu)) { + if (!kvm_vcpu_apicv_active(&svm->vcpu)) svm_clear_vintr(svm); - svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; - mark_dirty(svm->vmcb, VMCB_INTR); - } return ret; } @@ -5123,19 +5173,6 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) ++vcpu->stat.nmi_injections; } -static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) -{ - struct vmcb_control_area *control; - - /* The following fields are ignored when AVIC is enabled */ - control = &svm->vmcb->control; - control->int_vector = irq; - control->int_ctl &= ~V_INTR_PRIO_MASK; - control->int_ctl |= V_IRQ_MASK | - ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); - mark_dirty(svm->vmcb, VMCB_INTR); -} - static void svm_set_irq(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5232,6 +5269,9 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) struct vmcb *vmcb = svm->vmcb; bool activated = kvm_vcpu_apicv_active(vcpu); + if (!avic) + return; + if (activated) { /** * During AVIC temporary deactivation, guest could update @@ -5255,8 +5295,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) +static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) { + if (!vcpu->arch.apicv_active) + return -1; + kvm_lapic_set_irr(vec, vcpu->arch.apic); smp_mb__after_atomic(); @@ -5268,6 +5311,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) put_cpu(); } else kvm_vcpu_wake_up(vcpu); + + return 0; } static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) @@ -5516,18 +5561,15 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - int ret; if (!gif_set(svm) || (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) return 0; - ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); - - if (is_guest_mode(vcpu)) - return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); - - return ret; + if (is_guest_mode(vcpu) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)) + return !!(svm->vcpu.arch.hflags & HF_HIF_MASK); + else + return !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); } static void enable_irq_window(struct kvm_vcpu *vcpu) @@ -5542,7 +5584,7 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) * enabled, the STGI interception will not occur. Enable the irq * window under the assumption that the hardware will set the GIF. */ - if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) { + if (vgif_enabled(svm) || gif_set(svm)) { /* * IRQ window is not needed when AVIC is enabled, * unless we have pending ExtINT since it cannot be injected @@ -5551,7 +5593,6 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) */ svm_toggle_avic_for_irq_window(vcpu, false); svm_set_vintr(svm); - svm_inject_irq(svm, 0x0); } } @@ -5937,24 +5978,30 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(svm_vcpu_run); -static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) +static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); + bool update_guest_cr3 = true; + unsigned long cr3; - svm->vmcb->save.cr3 = __sme_set(root); - mark_dirty(svm->vmcb, VMCB_CR); -} - -static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) -{ - struct vcpu_svm *svm = to_svm(vcpu); + cr3 = __sme_set(root); + if (npt_enabled) { + svm->vmcb->control.nested_cr3 = cr3; + mark_dirty(svm->vmcb, VMCB_NPT); - svm->vmcb->control.nested_cr3 = __sme_set(root); - mark_dirty(svm->vmcb, VMCB_NPT); + /* Loading L2's CR3 is handled by enter_svm_guest_mode. */ + if (is_guest_mode(vcpu)) + update_guest_cr3 = false; + else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) + cr3 = vcpu->arch.cr3; + else /* CR3 is already up-to-date. */ + update_guest_cr3 = false; + } - /* Also sync guest cr3 here in case we live migrate */ - svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); - mark_dirty(svm->vmcb, VMCB_CR); + if (update_guest_cr3) { + svm->vmcb->save.cr3 = cr3; + mark_dirty(svm->vmcb, VMCB_CR); + } } static int is_disabled(void) @@ -6016,12 +6063,19 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) boot_cpu_has(X86_FEATURE_XSAVES); /* Update nrips enabled cache */ - svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); + svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) && + guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); if (!kvm_vcpu_apicv_active(vcpu)) return; - guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC); + /* + * AVIC does not work with an x2APIC mode guest. If the X2APIC feature + * is exposed to the guest, disable AVIC. + */ + if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC)) + kvm_request_apicv_update(vcpu->kvm, false, + APICV_INHIBIT_REASON_X2APIC); /* * Currently, AVIC does not work with nested virtualization. @@ -6032,88 +6086,11 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) APICV_INHIBIT_REASON_NESTED); } -#define F feature_bit - -static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) -{ - switch (func) { - case 0x1: - if (avic) - entry->ecx &= ~F(X2APIC); - break; - case 0x80000001: - if (nested) - entry->ecx |= (1 << 2); /* Set SVM bit */ - break; - case 0x80000008: - if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) || - boot_cpu_has(X86_FEATURE_AMD_SSBD)) - entry->ebx |= F(VIRT_SSBD); - break; - case 0x8000000A: - entry->eax = 1; /* SVM revision 1 */ - entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper - ASID emulation to nested SVM */ - entry->ecx = 0; /* Reserved */ - entry->edx = 0; /* Per default do not support any - additional features */ - - /* Support next_rip if host supports it */ - if (boot_cpu_has(X86_FEATURE_NRIPS)) - entry->edx |= F(NRIPS); - - /* Support NPT for the guest if enabled */ - if (npt_enabled) - entry->edx |= F(NPT); - - } -} - -static int svm_get_lpage_level(void) -{ - return PT_PDPE_LEVEL; -} - -static bool svm_rdtscp_supported(void) -{ - return boot_cpu_has(X86_FEATURE_RDTSCP); -} - -static bool svm_invpcid_supported(void) -{ - return false; -} - -static bool svm_mpx_supported(void) -{ - return false; -} - -static bool svm_xsaves_supported(void) -{ - return boot_cpu_has(X86_FEATURE_XSAVES); -} - -static bool svm_umip_emulated(void) -{ - return false; -} - -static bool svm_pt_supported(void) -{ - return false; -} - static bool svm_has_wbinvd_exit(void) { return true; } -static bool svm_pku_supported(void) -{ - return false; -} - #define PRE_EX(exit) { .exit_code = (exit), \ .stage = X86_ICPT_PRE_EXCEPT, } #define POST_EX(exit) { .exit_code = (exit), \ @@ -6180,7 +6157,8 @@ static const struct __x86_intercept { static int svm_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, - enum x86_intercept_stage stage) + enum x86_intercept_stage stage, + struct x86_exception *exception) { struct vcpu_svm *svm = to_svm(vcpu); int vmexit, ret = X86EMUL_CONTINUE; @@ -6303,7 +6281,8 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu, enum exit_fastpath_completion *exit_fastpath) { if (!is_guest_mode(vcpu) && - to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE) + to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && + to_svm(vcpu)->vmcb->control.exit_info_1) *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } @@ -7148,6 +7127,9 @@ static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) if (!svm_sev_enabled()) return -ENOTTY; + if (!argp) + return 0; + if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd))) return -EFAULT; @@ -7275,6 +7257,13 @@ static int svm_unregister_enc_region(struct kvm *kvm, goto failed; } + /* + * Ensure that all guest tagged cache entries are flushed before + * releasing the pages back to the system for use. CLFLUSH will + * not do this, so issue a WBINVD. + */ + wbinvd_on_all_cpus(); + __unregister_enc_region_locked(kvm, region); mutex_unlock(&kvm->lock); @@ -7351,7 +7340,7 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) * TODO: Last condition latch INIT signals on vCPU when * vCPU is in guest-mode and vmcb12 defines intercept on INIT. * To properly emulate the INIT intercept, SVM should implement - * kvm_x86_ops->check_nested_events() and call nested_svm_vmexit() + * kvm_x86_ops.check_nested_events() and call nested_svm_vmexit() * there if an INIT signal is pending. */ return !gif_set(svm) || @@ -7364,7 +7353,8 @@ static bool svm_check_apicv_inhibit_reasons(ulong bit) BIT(APICV_INHIBIT_REASON_HYPERV) | BIT(APICV_INHIBIT_REASON_NESTED) | BIT(APICV_INHIBIT_REASON_IRQWIN) | - BIT(APICV_INHIBIT_REASON_PIT_REINJ); + BIT(APICV_INHIBIT_REASON_PIT_REINJ) | + BIT(APICV_INHIBIT_REASON_X2APIC); return supported & BIT(bit); } @@ -7374,12 +7364,8 @@ static void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate) avic_update_access_page(kvm, activate); } -static struct kvm_x86_ops svm_x86_ops __ro_after_init = { - .cpu_has_kvm_support = has_svm, - .disabled_by_bios = is_disabled, - .hardware_setup = svm_hardware_setup, - .hardware_unsetup = svm_hardware_unsetup, - .check_processor_compatibility = svm_check_processor_compat, +static struct kvm_x86_ops svm_x86_ops __initdata = { + .hardware_unsetup = svm_hardware_teardown, .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, @@ -7389,8 +7375,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .vcpu_free = svm_free_vcpu, .vcpu_reset = svm_vcpu_reset, - .vm_alloc = svm_vm_alloc, - .vm_free = svm_vm_free, + .vm_size = sizeof(struct kvm_svm), .vm_init = svm_vm_init, .vm_destroy = svm_vm_destroy, @@ -7412,7 +7397,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, .set_cr0 = svm_set_cr0, - .set_cr3 = svm_set_cr3, .set_cr4 = svm_set_cr4, .set_efer = svm_set_efer, .get_idt = svm_get_idt, @@ -7433,6 +7417,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .run = svm_vcpu_run, .handle_exit = handle_exit, .skip_emulated_instruction = skip_emulated_instruction, + .update_emulated_instruction = NULL, .set_interrupt_shadow = svm_set_interrupt_shadow, .get_interrupt_shadow = svm_get_interrupt_shadow, .patch_hypercall = svm_patch_hypercall, @@ -7464,26 +7449,14 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .get_exit_info = svm_get_exit_info, - .get_lpage_level = svm_get_lpage_level, - .cpuid_update = svm_cpuid_update, - .rdtscp_supported = svm_rdtscp_supported, - .invpcid_supported = svm_invpcid_supported, - .mpx_supported = svm_mpx_supported, - .xsaves_supported = svm_xsaves_supported, - .umip_emulated = svm_umip_emulated, - .pt_supported = svm_pt_supported, - .pku_supported = svm_pku_supported, - - .set_supported_cpuid = svm_set_supported_cpuid, - .has_wbinvd_exit = svm_has_wbinvd_exit, .read_l1_tsc_offset = svm_read_l1_tsc_offset, .write_l1_tsc_offset = svm_write_l1_tsc_offset, - .set_tdp_cr3 = set_tdp_cr3, + .load_mmu_pgd = svm_load_mmu_pgd, .check_intercept = svm_check_intercept, .handle_exit_irqoff = svm_handle_exit_irqoff, @@ -7513,11 +7486,22 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, .apic_init_signal_blocked = svm_apic_init_signal_blocked, + + .check_nested_events = svm_check_nested_events, +}; + +static struct kvm_x86_init_ops svm_init_ops __initdata = { + .cpu_has_kvm_support = has_svm, + .disabled_by_bios = is_disabled, + .hardware_setup = svm_hardware_setup, + .check_processor_compatibility = svm_check_processor_compat, + + .runtime_ops = &svm_x86_ops, }; static int __init svm_init(void) { - return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), + return kvm_init(&svm_init_ops, sizeof(struct vcpu_svm), __alignof__(struct vcpu_svm), THIS_MODULE); } |