diff options
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 140 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 7 |
2 files changed, 127 insertions, 20 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0d9a239b9df9..879f14fa384e 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3376,20 +3376,57 @@ static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; } -/* - * Update the guest state fields of vmcs12 to reflect changes that - * occurred while L2 was running. (The "IA-32e mode guest" bit of the - * VM-entry controls is also updated, since this is really a guest - * state bit.) - */ -static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +static bool is_vmcs12_ext_field(unsigned long field) { - vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); - vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); + switch (field) { + case GUEST_ES_SELECTOR: + case GUEST_CS_SELECTOR: + case GUEST_SS_SELECTOR: + case GUEST_DS_SELECTOR: + case GUEST_FS_SELECTOR: + case GUEST_GS_SELECTOR: + case GUEST_LDTR_SELECTOR: + case GUEST_TR_SELECTOR: + case GUEST_ES_LIMIT: + case GUEST_CS_LIMIT: + case GUEST_SS_LIMIT: + case GUEST_DS_LIMIT: + case GUEST_FS_LIMIT: + case GUEST_GS_LIMIT: + case GUEST_LDTR_LIMIT: + case GUEST_TR_LIMIT: + case GUEST_GDTR_LIMIT: + case GUEST_IDTR_LIMIT: + case GUEST_ES_AR_BYTES: + case GUEST_DS_AR_BYTES: + case GUEST_FS_AR_BYTES: + case GUEST_GS_AR_BYTES: + case GUEST_LDTR_AR_BYTES: + case GUEST_TR_AR_BYTES: + case GUEST_ES_BASE: + case GUEST_CS_BASE: + case GUEST_SS_BASE: + case GUEST_DS_BASE: + case GUEST_FS_BASE: + case GUEST_GS_BASE: + case GUEST_LDTR_BASE: + case GUEST_TR_BASE: + case GUEST_GDTR_BASE: + case GUEST_IDTR_BASE: + case GUEST_PENDING_DBG_EXCEPTIONS: + case GUEST_BNDCFGS: + return true; + default: + break; + } - vmcs12->guest_rsp = kvm_rsp_read(vcpu); - vmcs12->guest_rip = kvm_rip_read(vcpu); - vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); + return false; +} + +static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR); @@ -3410,8 +3447,6 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT); vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT); vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES); - vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); - vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES); vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES); vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES); @@ -3427,11 +3462,65 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE); vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); + vmcs12->guest_pending_dbg_exceptions = + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + if (kvm_mpx_supported()) + vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); + + vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false; +} + +static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int cpu; + + if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare) + return; + + + WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01); + + cpu = get_cpu(); + vmx->loaded_vmcs = &vmx->nested.vmcs02; + vmx_vcpu_load(&vmx->vcpu, cpu); + + sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + + vmx->loaded_vmcs = &vmx->vmcs01; + vmx_vcpu_load(&vmx->vcpu, cpu); + put_cpu(); +} + +/* + * Update the guest state fields of vmcs12 to reflect changes that + * occurred while L2 was running. (The "IA-32e mode guest" bit of the + * VM-entry controls is also updated, since this is really a guest + * state bit.) + */ +static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (vmx->nested.hv_evmcs) + sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + + vmx->nested.need_sync_vmcs02_to_vmcs12_rare = !vmx->nested.hv_evmcs; + + vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); + vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); + + vmcs12->guest_rsp = kvm_rsp_read(vcpu); + vmcs12->guest_rip = kvm_rip_read(vcpu); + vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); + + vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); + vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); vmcs12->guest_interruptibility_info = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - vmcs12->guest_pending_dbg_exceptions = - vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; else @@ -3481,8 +3570,6 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); - if (kvm_mpx_supported()) - vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); } /* @@ -4280,6 +4367,8 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu) if (vmx->nested.current_vmptr == -1ull) return; + copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu)); + if (enable_shadow_vmcs) { /* copy to memory all shadowed fields in case they were modified */ @@ -4397,6 +4486,9 @@ static int handle_vmread(struct kvm_vcpu *vcpu) return nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) + copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + /* Read the field, zero-extended to a u64 field_value */ field_value = vmcs12_read_any(vmcs12, field, offset); @@ -4495,9 +4587,16 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return nested_vmx_failValid(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); - if (!is_guest_mode(vcpu)) + if (!is_guest_mode(vcpu)) { vmcs12 = get_vmcs12(vcpu); - else { + + /* + * Ensure vmcs12 is up-to-date before any VMWRITE that dirties + * vmcs12, else we may crush a field or consume a stale value. + */ + if (!is_shadow_field_rw(field)) + copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + } else { /* * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE * to shadowed-field sets the ALU flags for VMfailInvalid. @@ -5317,6 +5416,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, */ if (is_guest_mode(vcpu)) { sync_vmcs02_to_vmcs12(vcpu, vmcs12); + sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); } else if (!vmx->nested.need_vmcs12_to_shadow_sync) { if (vmx->nested.hv_evmcs) copy_enlightened_to_vmcs12(vmx); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index f4448292df0f..f03af64e9934 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -109,6 +109,7 @@ struct nested_vmx { * to guest memory during VM exit. */ struct vmcs12 *cached_shadow_vmcs12; + /* * Indicates if the shadow vmcs or enlightened vmcs must be updated * with the data held by struct vmcs12. @@ -117,6 +118,12 @@ struct nested_vmx { bool dirty_vmcs12; /* + * Indicates lazily loaded guest state has not yet been decached from + * vmcs02. + */ + bool need_sync_vmcs02_to_vmcs12_rare; + + /* * vmcs02 has been initialized, i.e. state that is constant for * vmcs02 has been written to the backing VMCS. Initialization * is delayed until L1 actually attempts to run a nested VM. |