summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/vmx/nested.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx/nested.c')
-rw-r--r--arch/x86/kvm/vmx/nested.c113
1 files changed, 70 insertions, 43 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index aa78b6f38dfe..ed8a3cb53961 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -257,7 +257,7 @@ static bool nested_evmcs_handle_vmclear(struct kvm_vcpu *vcpu, gpa_t vmptr)
* state. It is possible that the area will stay mapped as
* vmx->nested.hv_evmcs but this shouldn't be a problem.
*/
- if (!guest_cpuid_has_evmcs(vcpu) ||
+ if (!guest_cpu_cap_has_evmcs(vcpu) ||
!evmptr_is_valid(nested_get_evmptr(vcpu)))
return false;
@@ -2089,7 +2089,7 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
bool evmcs_gpa_changed = false;
u64 evmcs_gpa;
- if (likely(!guest_cpuid_has_evmcs(vcpu)))
+ if (likely(!guest_cpu_cap_has_evmcs(vcpu)))
return EVMPTRLD_DISABLED;
evmcs_gpa = nested_get_evmptr(vcpu);
@@ -2992,7 +2992,7 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
return -EINVAL;
#ifdef CONFIG_KVM_HYPERV
- if (guest_cpuid_has_evmcs(vcpu))
+ if (guest_cpu_cap_has_evmcs(vcpu))
return nested_evmcs_check_controls(vmcs12);
#endif
@@ -3287,7 +3287,7 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
* L2 was running), map it here to make sure vmcs12 changes are
* properly reflected.
*/
- if (guest_cpuid_has_evmcs(vcpu) &&
+ if (guest_cpu_cap_has_evmcs(vcpu) &&
vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) {
enum nested_evmptrld_status evmptrld_status =
nested_vmx_handle_enlightened_vmptrld(vcpu, false);
@@ -3442,7 +3442,7 @@ static int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
if (!nested_cpu_has_pml(vmcs12))
return 0;
- if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
+ if (vmcs12->guest_pml_index >= PML_LOG_NR_ENTRIES) {
vmx->nested.pml_full = true;
return 1;
}
@@ -3481,14 +3481,6 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
return 1;
}
-static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
-{
- u8 rvi = vmx_get_rvi();
- u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
-
- return ((rvi & 0xf0) > (vppr & 0xf0));
-}
-
static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12);
@@ -3508,7 +3500,6 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
enum vm_entry_failure_code entry_failure_code;
- bool evaluate_pending_interrupts;
union vmx_exit_reason exit_reason = {
.basic = EXIT_REASON_INVALID_STATE,
.failed_vmentry = 1,
@@ -3527,13 +3518,6 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
kvm_service_local_tlb_flush_requests(vcpu);
- evaluate_pending_interrupts = exec_controls_get(vmx) &
- (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
- if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
- evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
- if (!evaluate_pending_interrupts)
- evaluate_pending_interrupts |= kvm_apic_has_pending_init_or_sipi(vcpu);
-
if (!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
@@ -3616,9 +3600,13 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
* Re-evaluate pending events if L1 had a pending IRQ/NMI/INIT/SIPI
* when it executed VMLAUNCH/VMRESUME, as entering non-root mode can
* effectively unblock various events, e.g. INIT/SIPI cause VM-Exit
- * unconditionally.
+ * unconditionally. Take care to pull data from vmcs01 as appropriate,
+ * e.g. when checking for interrupt windows, as vmcs02 is now loaded.
*/
- if (unlikely(evaluate_pending_interrupts))
+ if ((__exec_controls_get(&vmx->vmcs01) & (CPU_BASED_INTR_WINDOW_EXITING |
+ CPU_BASED_NMI_WINDOW_EXITING)) ||
+ kvm_apic_has_pending_init_or_sipi(vcpu) ||
+ kvm_apic_has_interrupt(vcpu))
kvm_make_request(KVM_REQ_EVENT, vcpu);
/*
@@ -3751,14 +3739,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (unlikely(status != NVMX_VMENTRY_SUCCESS))
goto vmentry_failed;
- /* Emulate processing of posted interrupts on VM-Enter. */
- if (nested_cpu_has_posted_intr(vmcs12) &&
- kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) {
- vmx->nested.pi_pending = true;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv);
- }
-
/* Hide L1D cache contents from the nested guest. */
vmx->vcpu.arch.l1tf_flush_l1d = true;
@@ -4220,13 +4200,25 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
*/
bool block_nested_exceptions = vmx->nested.nested_run_pending;
/*
- * New events (not exceptions) are only recognized at instruction
+ * Events that don't require injection, i.e. that are virtualized by
+ * hardware, aren't blocked by a pending VM-Enter as KVM doesn't need
+ * to regain control in order to deliver the event, and hardware will
+ * handle event ordering, e.g. with respect to injected exceptions.
+ *
+ * But, new events (not exceptions) are only recognized at instruction
* boundaries. If an event needs reinjection, then KVM is handling a
- * VM-Exit that occurred _during_ instruction execution; new events are
- * blocked until the instruction completes.
+ * VM-Exit that occurred _during_ instruction execution; new events,
+ * irrespective of whether or not they're injected, are blocked until
+ * the instruction completes.
+ */
+ bool block_non_injected_events = kvm_event_needs_reinjection(vcpu);
+ /*
+ * Inject events are blocked by nested VM-Enter, as KVM is responsible
+ * for managing priority between concurrent events, i.e. KVM needs to
+ * wait until after VM-Enter completes to deliver injected events.
*/
bool block_nested_events = block_nested_exceptions ||
- kvm_event_needs_reinjection(vcpu);
+ block_non_injected_events;
if (lapic_in_kernel(vcpu) &&
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
@@ -4338,18 +4330,26 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
int irq;
- if (block_nested_events)
- return -EBUSY;
- if (!nested_exit_on_intr(vcpu))
+ if (!nested_exit_on_intr(vcpu)) {
+ if (block_nested_events)
+ return -EBUSY;
+
goto no_vmexit;
+ }
if (!nested_exit_intr_ack_set(vcpu)) {
+ if (block_nested_events)
+ return -EBUSY;
+
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
return 0;
}
irq = kvm_cpu_get_extint(vcpu);
if (irq != -1) {
+ if (block_nested_events)
+ return -EBUSY;
+
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0);
return 0;
@@ -4368,11 +4368,22 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
* and enabling posted interrupts requires ACK-on-exit.
*/
if (irq == vmx->nested.posted_intr_nv) {
+ /*
+ * Nested posted interrupts are delivered via RVI, i.e.
+ * aren't injected by KVM, and so can be queued even if
+ * manual event injection is disallowed.
+ */
+ if (block_non_injected_events)
+ return -EBUSY;
+
vmx->nested.pi_pending = true;
kvm_apic_clear_irr(vcpu, irq);
goto no_vmexit;
}
+ if (block_nested_events)
+ return -EBUSY;
+
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0);
@@ -5015,7 +5026,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
* doesn't isolate different VMCSs, i.e. in this case, doesn't provide
* separate modes for L2 vs L1.
*/
- if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL))
indirect_branch_prediction_barrier();
/* Update any VMCS fields that might have changed while L2 ran */
@@ -5050,6 +5061,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
}
+ if (vmx->nested.update_vmcs01_hwapic_isr) {
+ vmx->nested.update_vmcs01_hwapic_isr = false;
+ kvm_apic_update_hwapic_isr(vcpu);
+ }
+
if ((vm_exit_reason != -1) &&
(enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx)))
vmx->nested.need_vmcs12_to_shadow_sync = true;
@@ -5068,6 +5084,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
load_vmcs12_host_state(vcpu, vmcs12);
+ /*
+ * Process events if an injectable IRQ or NMI is pending, even
+ * if the event is blocked (RFLAGS.IF is cleared on VM-Exit).
+ * If an event became pending while L2 was active, KVM needs to
+ * either inject the event or request an IRQ/NMI window. SMIs
+ * don't need to be processed as SMM is mutually exclusive with
+ * non-root mode. INIT/SIPI don't need to be checked as INIT
+ * is blocked post-VMXON, and SIPIs are ignored.
+ */
+ if (kvm_cpu_has_injectable_intr(vcpu) || vcpu->arch.nmi_pending)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
return;
}
@@ -6279,7 +6306,7 @@ static bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu,
{
u32 encls_leaf;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SGX) ||
!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING))
return false;
@@ -6617,7 +6644,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
vmx = to_vmx(vcpu);
vmcs12 = get_vmcs12(vcpu);
- if (guest_can_use(vcpu, X86_FEATURE_VMX) &&
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_VMX) &&
(vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
@@ -6758,7 +6785,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
return -EINVAL;
} else {
- if (!guest_can_use(vcpu, X86_FEATURE_VMX))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX))
return -EINVAL;
if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
@@ -6792,7 +6819,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
- (!guest_can_use(vcpu, X86_FEATURE_VMX) ||
+ (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX) ||
!vmx->nested.enlightened_vmcs_enabled))
return -EINVAL;