diff options
author | Sean Christopherson <seanjc@google.com> | 2021-02-12 16:50:07 -0800 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2021-02-19 03:08:32 -0500 |
commit | c3bb9a20834ffe72d3031afe460ff03d3b3b6e90 (patch) | |
tree | be142b09834d49462fde1ffa10d942fa181ace6c | |
parent | 9eba50f8d7fcb61774f160890f98239fa3ab68a6 (diff) | |
download | lwn-c3bb9a20834ffe72d3031afe460ff03d3b3b6e90.tar.gz lwn-c3bb9a20834ffe72d3031afe460ff03d3b3b6e90.zip |
KVM: nVMX: Disable PML in hardware when running L2
Unconditionally disable PML in vmcs02, KVM emulates PML purely in the
MMU, e.g. vmx_flush_pml_buffer() doesn't even try to copy the L2 GPAs
from vmcs02's buffer to vmcs12. At best, enabling PML is a nop. At
worst, it will cause vmx_flush_pml_buffer() to record bogus GFNs in the
dirty logs.
Initialize vmcs02.GUEST_PML_INDEX such that PML writes would trigger
VM-Exit if PML was somehow enabled, skip flushing the buffer for guest
mode since the index is bogus, and freak out if a PML full exit occurs
when L2 is active.
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210213005015.1651772-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 29 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 12 |
2 files changed, 25 insertions, 16 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 066156ff9c7c..486c7ac6d813 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2167,15 +2167,13 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); /* - * The PML address never changes, so it is constant in vmcs02. - * Conceptually we want to copy the PML index from vmcs01 here, - * and then back to vmcs01 on nested vmexit. But since we flush - * the log and reset GUEST_PML_INDEX on each vmexit, the PML - * index is also effectively constant in vmcs02. + * PML is emulated for L2, but never enabled in hardware as the MMU + * handles A/D emulation. Disabling PML for L2 also avoids having to + * deal with filtering out L2 GPAs from the buffer. */ if (enable_pml) { - vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); + vmcs_write64(PML_ADDRESS, 0); + vmcs_write16(GUEST_PML_INDEX, -1); } if (cpu_has_vmx_encls_vmexit()) @@ -2210,7 +2208,7 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx, static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) { - u32 exec_control, vmcs12_exec_ctrl; + u32 exec_control; u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12); if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) @@ -2284,11 +2282,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_ENABLE_VMFUNC); if (nested_cpu_has(vmcs12, - CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) { - vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control & - ~SECONDARY_EXEC_ENABLE_PML; - exec_control |= vmcs12_exec_ctrl; - } + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) + exec_control |= vmcs12->secondary_vm_exec_control; + + /* PML is emulated and never enabled in hardware for L2. */ + exec_control &= ~SECONDARY_EXEC_ENABLE_PML; /* VMCS shadowing for L2 is emulated for now */ exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; @@ -5790,7 +5788,10 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, case EXIT_REASON_PREEMPTION_TIMER: return true; case EXIT_REASON_PML_FULL: - /* We emulate PML support to L1. */ + /* + * PML is emulated for an L1 VMM and should never be enabled in + * vmcs02, always "handle" PML_FULL by exiting to userspace. + */ return true; case EXIT_REASON_VMFUNC: /* VM functions are emulated through L2->L0 vmexits. */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5ddaf7e4f601..559a0f16263e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5966,9 +5966,10 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before * querying dirty_bitmap, we only need to kick all vcpus out of guest * mode as if vcpus is in root mode, the PML buffer must has been - * flushed already. + * flushed already. Note, PML is never enabled in hardware while + * running L2. */ - if (enable_pml) + if (enable_pml && !is_guest_mode(vcpu)) vmx_flush_pml_buffer(vcpu); /* @@ -5985,6 +5986,13 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) if (is_guest_mode(vcpu)) { /* + * PML is never enabled when running L2, bail immediately if a + * PML full exit occurs as something is horribly wrong. + */ + if (exit_reason.basic == EXIT_REASON_PML_FULL) + goto unexpected_vmexit; + + /* * The host physical addresses of some pages of guest memory * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC * Page). The CPU may write to these pages via their host |