diff options
author | Sean Christopherson <sean.j.christopherson@intel.com> | 2019-12-06 15:57:14 -0800 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2020-01-08 18:16:02 +0100 |
commit | 736c291c9f36b07f8889c61764c28edce20e715d (patch) | |
tree | 4cd8d7f851dc796efe60fbae2b4ef692e40e8acc /arch/x86/kvm/mmu/mmu.c | |
parent | 95145c25a78cc0a9d3cbc75708abde432310c5a1 (diff) | |
download | lwn-736c291c9f36b07f8889c61764c28edce20e715d.tar.gz lwn-736c291c9f36b07f8889c61764c28edce20e715d.zip |
KVM: x86: Use gpa_t for cr2/gpa to fix TDP support on 32-bit KVM
Convert a plethora of parameters and variables in the MMU and page fault
flows from type gva_t to gpa_t to properly handle TDP on 32-bit KVM.
Thanks to PSE and PAE paging, 32-bit kernels can access 64-bit physical
addresses. When TDP is enabled, the fault address is a guest physical
address and thus can be a 64-bit value, even when both KVM and its guest
are using 32-bit virtual addressing, e.g. VMX's VMCS.GUEST_PHYSICAL is a
64-bit field, not a natural width field.
Using a gva_t for the fault address means KVM will incorrectly drop the
upper 32-bits of the GPA. Ditto for gva_to_gpa() when it is used to
translate L2 GPAs to L1 GPAs.
Opportunistically rename variables and parameters to better reflect the
dual address modes, e.g. use "cr2_or_gpa" for fault addresses and plain
"addr" instead of "vaddr" when the address may be either a GVA or an L2
GPA. Similarly, use "gpa" in the nonpaging_page_fault() flows to avoid
a confusing "gpa_t gva" declaration; this also sets the stage for a
future patch to combing nonpaging_page_fault() and tdp_page_fault() with
minimal churn.
Sprinkle in a few comments to document flows where an address is known
to be a GVA and thus can be safely truncated to a 32-bit value. Add
WARNs in kvm_handle_page_fault() and FNAME(gva_to_gpa_nested)() to help
document such cases and detect bugs.
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm/mmu/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 69 |
1 files changed, 40 insertions, 29 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c19f3ccaace3..2cb199817837 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3532,7 +3532,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte) * - true: let the vcpu to access on the same address again. * - false: let the real page fault path to fix it. */ -static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, +static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int level, u32 error_code) { struct kvm_shadow_walk_iterator iterator; @@ -3552,7 +3552,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, do { u64 new_spte; - for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) + for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte) if (!is_shadow_present_pte(spte) || iterator.level < level) break; @@ -3630,7 +3630,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, } while (true); - trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, + trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep, spte, fault_handled); walk_shadow_page_lockless_end(vcpu); @@ -3638,10 +3638,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); + gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, + bool *writable); static int make_mmu_pages_available(struct kvm_vcpu *vcpu); -static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, +static int nonpaging_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, gfn_t gfn, bool prefault) { int r; @@ -3667,16 +3668,16 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); } - if (fast_page_fault(vcpu, v, level, error_code)) + if (fast_page_fault(vcpu, gpa, level, error_code)) return RET_PF_RETRY; mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) + if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) return RET_PF_RETRY; - if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) + if (handle_abnormal_pfn(vcpu, gpa, gfn, pfn, ACC_ALL, &r)) return r; r = RET_PF_RETRY; @@ -3687,7 +3688,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); - r = __direct_map(vcpu, v, write, map_writable, level, pfn, + r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault, false); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); @@ -3985,7 +3986,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); -static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, +static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { if (exception) @@ -3993,7 +3994,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, return vaddr; } -static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, +static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { @@ -4153,13 +4154,14 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) walk_shadow_page_lockless_end(vcpu); } -static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, +static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, bool prefault) { - gfn_t gfn = gva >> PAGE_SHIFT; + gfn_t gfn = gpa >> PAGE_SHIFT; int r; - pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); + /* Note, paging is disabled, ergo gva == gpa. */ + pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code); if (page_fault_handle_page_track(vcpu, error_code, gfn)) return RET_PF_EMULATE; @@ -4171,11 +4173,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); - return nonpaging_map(vcpu, gva & PAGE_MASK, + return nonpaging_map(vcpu, gpa & PAGE_MASK, error_code, gfn, prefault); } -static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) +static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + gfn_t gfn) { struct kvm_arch_async_pf arch; @@ -4184,11 +4187,13 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) arch.direct_map = vcpu->arch.mmu->direct_map; arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu); - return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); + return kvm_setup_async_pf(vcpu, cr2_or_gpa, + kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) + gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, + bool *writable) { struct kvm_memory_slot *slot; bool async; @@ -4208,12 +4213,12 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return false; /* *pfn has correct page already */ if (!prefault && kvm_can_do_async_pf(vcpu)) { - trace_kvm_try_async_get_page(gva, gfn); + trace_kvm_try_async_get_page(cr2_or_gpa, gfn); if (kvm_find_async_pf_gfn(vcpu, gfn)) { - trace_kvm_async_pf_doublefault(gva, gfn); + trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); return true; - } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn)) + } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn)) return true; } @@ -4226,6 +4231,12 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, { int r = 1; +#ifndef CONFIG_X86_64 + /* A 64-bit CR2 should be impossible on 32-bit KVM. */ + if (WARN_ON_ONCE(fault_address >> 32)) + return -EFAULT; +#endif + vcpu->arch.l1tf_flush_l1d = true; switch (vcpu->arch.apf.host_apf_reason) { default: @@ -4263,7 +4274,7 @@ check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num); } -static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, bool prefault) { kvm_pfn_t pfn; @@ -5520,7 +5531,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu) return 0; } -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len) { int r, emulation_type = 0; @@ -5529,18 +5540,18 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, /* With shadow page tables, fault_address contains a GVA or nGPA. */ if (vcpu->arch.mmu->direct_map) { vcpu->arch.gpa_available = true; - vcpu->arch.gpa_val = cr2; + vcpu->arch.gpa_val = cr2_or_gpa; } r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, cr2, direct); + r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); if (r == RET_PF_EMULATE) goto emulate; } if (r == RET_PF_INVALID) { - r = vcpu->arch.mmu->page_fault(vcpu, cr2, + r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, lower_32_bits(error_code), false); WARN_ON(r == RET_PF_INVALID); @@ -5560,7 +5571,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, */ if (vcpu->arch.mmu->direct_map && (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); return 1; } @@ -5575,7 +5586,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, * explicitly shadowing L1's page tables, i.e. unprotecting something * for L1 isn't going to magically fix whatever issue cause L2 to fail. */ - if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) + if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) emulation_type = EMULTYPE_ALLOW_RETRY; emulate: /* @@ -5590,7 +5601,7 @@ emulate: return 1; } - return x86_emulate_instruction(vcpu, cr2, emulation_type, insn, + return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, insn_len); } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); |