diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 214 |
1 files changed, 169 insertions, 45 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index efc7a82ab140..cebdaa1e3cf5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -75,6 +75,7 @@ #include <asm/tlbflush.h> #include <asm/intel_pt.h> #include <asm/emulate_prefix.h> +#include <asm/sgx.h> #include <clocksource/hyperv_timer.h> #define CREATE_TRACE_POINTS @@ -245,6 +246,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("l1d_flush", l1d_flush), VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VCPU_STAT("nested_run", nested_run), + VCPU_STAT("directed_yield_attempted", directed_yield_attempted), + VCPU_STAT("directed_yield_successful", directed_yield_successful), VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped), VM_STAT("mmu_pte_write", mmu_pte_write), VM_STAT("mmu_pde_zapped", mmu_pde_zapped), @@ -543,8 +547,6 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) { queue: - if (has_error && !is_protmode(vcpu)) - has_error = false; if (reinject) { /* * On vmentry, vcpu->arch.exception.pending is only @@ -983,14 +985,17 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) return 0; } -int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) +int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) { - if (static_call(kvm_x86_get_cpl)(vcpu) == 0) - return __kvm_set_xcr(vcpu, index, xcr); + if (static_call(kvm_x86_get_cpl)(vcpu) != 0 || + __kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) { + kvm_inject_gp(vcpu, 0); + return 1; + } - return 1; + return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_set_xcr); +EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv); bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { @@ -1072,10 +1077,15 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) return 0; } - if (is_long_mode(vcpu) && kvm_vcpu_is_illegal_gpa(vcpu, cr3)) + /* + * Do not condition the GPA check on long mode, this helper is used to + * stuff CR3, e.g. for RSM emulation, and there is no guarantee that + * the current vCPU mode is accurate. + */ + if (kvm_vcpu_is_illegal_gpa(vcpu, cr3)) return 1; - else if (is_pae_paging(vcpu) && - !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) + + if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush); @@ -1191,20 +1201,21 @@ void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) } EXPORT_SYMBOL_GPL(kvm_get_dr); -bool kvm_rdpmc(struct kvm_vcpu *vcpu) +int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) { u32 ecx = kvm_rcx_read(vcpu); u64 data; - int err; - err = kvm_pmu_rdpmc(vcpu, ecx, &data); - if (err) - return err; + if (kvm_pmu_rdpmc(vcpu, ecx, &data)) { + kvm_inject_gp(vcpu, 0); + return 1; + } + kvm_rax_write(vcpu, (u32)data); kvm_rdx_write(vcpu, data >> 32); - return err; + return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_rdpmc); +EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc); /* * List of msr numbers which we expose to userspace through KVM_GET_MSRS @@ -1791,6 +1802,40 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); +int kvm_emulate_as_nop(struct kvm_vcpu *vcpu) +{ + return kvm_skip_emulated_instruction(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_emulate_as_nop); + +int kvm_emulate_invd(struct kvm_vcpu *vcpu) +{ + /* Treat an INVD instruction as a NOP and just skip it. */ + return kvm_emulate_as_nop(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_emulate_invd); + +int kvm_emulate_mwait(struct kvm_vcpu *vcpu) +{ + pr_warn_once("kvm: MWAIT instruction emulated as NOP!\n"); + return kvm_emulate_as_nop(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_emulate_mwait); + +int kvm_handle_invalid_op(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} +EXPORT_SYMBOL_GPL(kvm_handle_invalid_op); + +int kvm_emulate_monitor(struct kvm_vcpu *vcpu) +{ + pr_warn_once("kvm: MONITOR instruction emulated as NOP!\n"); + return kvm_emulate_as_nop(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_emulate_monitor); + static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) { xfer_to_guest_mode_prepare(); @@ -3382,6 +3427,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0; break; case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: + if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) + return kvm_pmu_get_msr(vcpu, msr_info); + if (!msr_info->host_initiated) + return 1; + msr_info->data = 0; + break; case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: @@ -3771,8 +3822,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_X86_USER_SPACE_MSR: case KVM_CAP_X86_MSR_FILTER: case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: +#ifdef CONFIG_X86_SGX_KVM + case KVM_CAP_SGX_ATTRIBUTE: +#endif + case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM: r = 1; break; + case KVM_CAP_SET_GUEST_DEBUG2: + return KVM_GUESTDBG_VALID_MASK; #ifdef CONFIG_KVM_XEN case KVM_CAP_XEN_HVM: r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR | @@ -4673,7 +4730,6 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, kvm_update_pv_runtime(vcpu); return 0; - default: return -EINVAL; } @@ -5355,6 +5411,28 @@ split_irqchip_unlock: kvm->arch.bus_lock_detection_enabled = true; r = 0; break; +#ifdef CONFIG_X86_SGX_KVM + case KVM_CAP_SGX_ATTRIBUTE: { + unsigned long allowed_attributes = 0; + + r = sgx_set_attribute(&allowed_attributes, cap->args[0]); + if (r) + break; + + /* KVM only supports the PROVISIONKEY privileged attribute. */ + if ((allowed_attributes & SGX_ATTR_PROVISIONKEY) && + !(allowed_attributes & ~SGX_ATTR_PROVISIONKEY)) + kvm->arch.sgx_provisioning_allowed = true; + else + r = -EINVAL; + break; + } +#endif + case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM: + r = -EINVAL; + if (kvm_x86_ops.vm_copy_enc_context_from) + r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]); + return r; default: r = -EINVAL; break; @@ -5999,6 +6077,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); } +EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read); gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception) @@ -6015,6 +6094,7 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, access |= PFERR_WRITE_MASK; return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); } +EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write); /* uses this to access any guest's mapped memory without checking CPL */ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, @@ -6934,12 +7014,12 @@ static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt) static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) { - return kvm_register_read(emul_to_vcpu(ctxt), reg); + return kvm_register_read_raw(emul_to_vcpu(ctxt), reg); } static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val) { - kvm_register_write(emul_to_vcpu(ctxt), reg, val); + kvm_register_write_raw(emul_to_vcpu(ctxt), reg, val); } static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked) @@ -8043,9 +8123,6 @@ int kvm_arch_init(void *opaque) if (r) goto out_free_percpu; - kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, - PT_DIRTY_MASK, PT64_NX_MASK, 0, - PT_PRESENT_MASK, 0, sme_me_mask); kvm_timer_init(); perf_register_guest_info_callbacks(&kvm_guest_cbs); @@ -8205,21 +8282,35 @@ void kvm_apicv_init(struct kvm *kvm, bool enable) } EXPORT_SYMBOL_GPL(kvm_apicv_init); -static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id) +static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) { struct kvm_vcpu *target = NULL; struct kvm_apic_map *map; + vcpu->stat.directed_yield_attempted++; + rcu_read_lock(); - map = rcu_dereference(kvm->arch.apic_map); + map = rcu_dereference(vcpu->kvm->arch.apic_map); if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id]) target = map->phys_map[dest_id]->vcpu; rcu_read_unlock(); - if (target && READ_ONCE(target->ready)) - kvm_vcpu_yield_to(target); + if (!target || !READ_ONCE(target->ready)) + goto no_yield; + + /* Ignore requests to yield to self */ + if (vcpu == target) + goto no_yield; + + if (kvm_vcpu_yield_to(target) <= 0) + goto no_yield; + + vcpu->stat.directed_yield_successful++; + +no_yield: + return; } int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) @@ -8266,7 +8357,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) break; kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); - kvm_sched_yield(vcpu->kvm, a1); + kvm_sched_yield(vcpu, a1); ret = 0; break; #ifdef CONFIG_X86_64 @@ -8284,7 +8375,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD)) break; - kvm_sched_yield(vcpu->kvm, a0); + kvm_sched_yield(vcpu, a0); ret = 0; break; default: @@ -8367,6 +8458,27 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) static_call(kvm_x86_update_cr8_intercept)(vcpu, tpr, max_irr); } + +int kvm_check_nested_events(struct kvm_vcpu *vcpu) +{ + if (WARN_ON_ONCE(!is_guest_mode(vcpu))) + return -EIO; + + if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { + kvm_x86_ops.nested_ops->triple_fault(vcpu); + return 1; + } + + return kvm_x86_ops.nested_ops->check_events(vcpu); +} + +static void kvm_inject_exception(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.exception.error_code && !is_protmode(vcpu)) + vcpu->arch.exception.error_code = false; + static_call(kvm_x86_queue_exception)(vcpu); +} + static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) { int r; @@ -8375,7 +8487,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit /* try to reinject previous events if any */ if (vcpu->arch.exception.injected) { - static_call(kvm_x86_queue_exception)(vcpu); + kvm_inject_exception(vcpu); can_inject = false; } /* @@ -8412,7 +8524,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit * from L2 to L1. */ if (is_guest_mode(vcpu)) { - r = kvm_x86_ops.nested_ops->check_events(vcpu); + r = kvm_check_nested_events(vcpu); if (r < 0) goto busy; } @@ -8438,7 +8550,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit } } - static_call(kvm_x86_queue_exception)(vcpu); + kvm_inject_exception(vcpu); can_inject = false; } @@ -8587,7 +8699,7 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf) put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu)); for (i = 0; i < 8; i++) - put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i)); + put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i)); kvm_get_dr(vcpu, 6, &val); put_smstate(u32, buf, 0x7fcc, (u32)val); @@ -8633,7 +8745,7 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) int i; for (i = 0; i < 16; i++) - put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i)); + put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i)); put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu)); put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu)); @@ -8975,10 +9087,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto out; } if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { - vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; - vcpu->mmio_needed = 0; - r = 0; - goto out; + if (is_guest_mode(vcpu)) { + kvm_x86_ops.nested_ops->triple_fault(vcpu); + } else { + vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->mmio_needed = 0; + r = 0; + goto out; + } } if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { /* Page is swapped out. Do synthetic halt */ @@ -9276,7 +9392,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) - kvm_x86_ops.nested_ops->check_events(vcpu); + kvm_check_nested_events(vcpu); return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted); @@ -11002,6 +11118,14 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); } +bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.apicv_active && static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu)) + return true; + + return false; +} + bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) { if (READ_ONCE(vcpu->arch.pv.pv_unhalted)) @@ -11012,14 +11136,14 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) kvm_test_request(KVM_REQ_EVENT, vcpu)) return true; - if (vcpu->arch.apicv_active && static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu)) - return true; - - return false; + return kvm_arch_dy_has_pending_interrupt(vcpu); } bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { + if (vcpu->arch.guest_state_protected) + return true; + return vcpu->arch.preempted_in_kernel; } @@ -11290,7 +11414,7 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) if (!kvm_pv_async_pf_enabled(vcpu)) return true; else - return apf_pageready_slot_free(vcpu); + return kvm_lapic_enabled(vcpu) && apf_pageready_slot_free(vcpu); } void kvm_arch_start_assignment(struct kvm *kvm) @@ -11539,7 +11663,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) fallthrough; case INVPCID_TYPE_ALL_INCL_GLOBAL: - kvm_mmu_unload(vcpu); + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); return kvm_skip_emulated_instruction(vcpu); default: |