diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-24 13:07:18 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-24 13:07:18 -0800 |
commit | 89f883372fa60f604d136924baf3e89ff1870e9e (patch) | |
tree | cb69b0a14957945ba00d3d392bf9ccbbef56f3b8 /arch/x86/kvm/x86.c | |
parent | 9e2d59ad580d590134285f361a0e80f0e98c0207 (diff) | |
parent | 6b73a96065e89dc9fa75ba4f78b1aa3a3bbd0470 (diff) | |
download | lwn-89f883372fa60f604d136924baf3e89ff1870e9e.tar.gz lwn-89f883372fa60f604d136924baf3e89ff1870e9e.zip |
Merge tag 'kvm-3.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Marcelo Tosatti:
"KVM updates for the 3.9 merge window, including x86 real mode
emulation fixes, stronger memory slot interface restrictions, mmu_lock
spinlock hold time reduction, improved handling of large page faults
on shadow, initial APICv HW acceleration support, s390 channel IO
based virtio, amongst others"
* tag 'kvm-3.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (143 commits)
Revert "KVM: MMU: lazily drop large spte"
x86: pvclock kvm: align allocation size to page size
KVM: nVMX: Remove redundant get_vmcs12 from nested_vmx_exit_handled_msr
x86 emulator: fix parity calculation for AAD instruction
KVM: PPC: BookE: Handle alignment interrupts
booke: Added DBCR4 SPR number
KVM: PPC: booke: Allow multiple exception types
KVM: PPC: booke: use vcpu reference from thread_struct
KVM: Remove user_alloc from struct kvm_memory_slot
KVM: VMX: disable apicv by default
KVM: s390: Fix handling of iscs.
KVM: MMU: cleanup __direct_map
KVM: MMU: remove pt_access in mmu_set_spte
KVM: MMU: cleanup mapping-level
KVM: MMU: lazily drop large spte
KVM: VMX: cleanup vmx_set_cr0().
KVM: VMX: add missing exit names to VMX_EXIT_REASONS array
KVM: VMX: disable SMEP feature when guest is in non-paging mode
KVM: Remove duplicate text in api.txt
Revert "KVM: MMU: split kvm_mmu_free_page"
...
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 168 |
1 files changed, 111 insertions, 57 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 37040079cd6b..f71500af1f81 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -872,8 +872,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) kvm_x86_ops->set_efer(vcpu, efer); - vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; - /* Update reserved bits */ if ((efer ^ old_efer) & EFER_NX) kvm_mmu_reset_context(vcpu); @@ -2522,7 +2520,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_MAX_VCPUS; break; case KVM_CAP_NR_MEMSLOTS: - r = KVM_MEMORY_SLOTS; + r = KVM_USER_MEM_SLOTS; break; case KVM_CAP_PV_MMU: /* obsolete */ r = 0; @@ -3274,12 +3272,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, return -EINVAL; mutex_lock(&kvm->slots_lock); - spin_lock(&kvm->mmu_lock); kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; - spin_unlock(&kvm->mmu_lock); mutex_unlock(&kvm->slots_lock); return 0; } @@ -3439,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) mutex_lock(&kvm->slots_lock); r = -EINVAL; - if (log->slot >= KVM_MEMORY_SLOTS) + if (log->slot >= KVM_USER_MEM_SLOTS) goto out; memslot = id_to_memslot(kvm->memslots, log->slot); @@ -4495,8 +4491,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); *selector = var.selector; - if (var.unusable) + if (var.unusable) { + memset(desc, 0, sizeof(*desc)); return false; + } if (var.g) var.limit >>= 12; @@ -4757,26 +4755,26 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) return r; } -static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) +static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, + bool write_fault_to_shadow_pgtable) { - gpa_t gpa; + gpa_t gpa = cr2; pfn_t pfn; - if (tdp_enabled) - return false; - - /* - * if emulation was due to access to shadowed page table - * and it failed try to unshadow page and re-enter the - * guest to let CPU execute the instruction. - */ - if (kvm_mmu_unprotect_page_virt(vcpu, gva)) - return true; - - gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL); + if (!vcpu->arch.mmu.direct_map) { + /* + * Write permission should be allowed since only + * write access need to be emulated. + */ + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); - if (gpa == UNMAPPED_GVA) - return true; /* let cpu generate fault */ + /* + * If the mapping is invalid in guest, let cpu retry + * it to generate fault. + */ + if (gpa == UNMAPPED_GVA) + return true; + } /* * Do not retry the unhandleable instruction if it faults on the @@ -4785,12 +4783,43 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) * instruction -> ... */ pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); - if (!is_error_noslot_pfn(pfn)) { - kvm_release_pfn_clean(pfn); + + /* + * If the instruction failed on the error pfn, it can not be fixed, + * report the error to userspace. + */ + if (is_error_noslot_pfn(pfn)) + return false; + + kvm_release_pfn_clean(pfn); + + /* The instructions are well-emulated on direct mmu. */ + if (vcpu->arch.mmu.direct_map) { + unsigned int indirect_shadow_pages; + + spin_lock(&vcpu->kvm->mmu_lock); + indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages; + spin_unlock(&vcpu->kvm->mmu_lock); + + if (indirect_shadow_pages) + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + return true; } - return false; + /* + * if emulation was due to access to shadowed page table + * and it failed try to unshadow page and re-enter the + * guest to let CPU execute the instruction. + */ + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + + /* + * If the access faults on its page table, it can not + * be fixed by unprotecting shadow page and it should + * be reported to userspace. + */ + return !write_fault_to_shadow_pgtable; } static bool retry_instruction(struct x86_emulate_ctxt *ctxt, @@ -4832,7 +4861,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (!vcpu->arch.mmu.direct_map) gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); - kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); return true; } @@ -4849,7 +4878,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, int r; struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; bool writeback = true; + bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; + /* + * Clear write_fault_to_shadow_pgtable here to ensure it is + * never reused. + */ + vcpu->arch.write_fault_to_shadow_pgtable = false; kvm_clear_exception_queue(vcpu); if (!(emulation_type & EMULTYPE_NO_DECODE)) { @@ -4868,7 +4903,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (r != EMULATION_OK) { if (emulation_type & EMULTYPE_TRAP_UD) return EMULATE_FAIL; - if (reexecute_instruction(vcpu, cr2)) + if (reexecute_instruction(vcpu, cr2, + write_fault_to_spt)) return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; @@ -4898,7 +4934,7 @@ restart: return EMULATE_DONE; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2)) + if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) return EMULATE_DONE; return handle_emulation_failure(vcpu); @@ -5541,7 +5577,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) vcpu->arch.nmi_injected = true; kvm_x86_ops->set_nmi(vcpu); } - } else if (kvm_cpu_has_interrupt(vcpu)) { + } else if (kvm_cpu_has_injectable_intr(vcpu)) { if (kvm_x86_ops->interrupt_allowed(vcpu)) { kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false); @@ -5609,6 +5645,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) #endif } +static void update_eoi_exitmap(struct kvm_vcpu *vcpu) +{ + u64 eoi_exit_bitmap[4]; + + memset(eoi_exit_bitmap, 0, 32); + + kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); + kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); +} + static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; @@ -5662,6 +5708,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_handle_pmu_event(vcpu); if (kvm_check_request(KVM_REQ_PMI, vcpu)) kvm_deliver_pmi(vcpu); + if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) + update_eoi_exitmap(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { @@ -5670,10 +5718,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* enable NMI/IRQ window open exits if needed */ if (vcpu->arch.nmi_pending) kvm_x86_ops->enable_nmi_window(vcpu); - else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) + else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) kvm_x86_ops->enable_irq_window(vcpu); if (kvm_lapic_enabled(vcpu)) { + /* + * Update architecture specific hints for APIC + * virtual interrupt delivery. + */ + if (kvm_x86_ops->hwapic_irr_update) + kvm_x86_ops->hwapic_irr_update(vcpu, + kvm_lapic_find_highest_irr(vcpu)); update_cr8_intercept(vcpu); kvm_lapic_sync_to_vapic(vcpu); } @@ -6853,48 +6908,43 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { int npages = memslot->npages; - int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; - /* Prevent internal slot pages from being moved by fork()/COW. */ - if (memslot->id >= KVM_MEMORY_SLOTS) - map_flags = MAP_SHARED | MAP_ANONYMOUS; - - /*To keep backward compatibility with older userspace, - *x86 needs to handle !user_alloc case. + /* + * Only private memory slots need to be mapped here since + * KVM_SET_MEMORY_REGION ioctl is no longer supported. */ - if (!user_alloc) { - if (npages && !old.npages) { - unsigned long userspace_addr; + if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { + unsigned long userspace_addr; - userspace_addr = vm_mmap(NULL, 0, - npages * PAGE_SIZE, - PROT_READ | PROT_WRITE, - map_flags, - 0); + /* + * MAP_SHARED to prevent internal slot pages from being moved + * by fork()/COW. + */ + userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, 0); - if (IS_ERR((void *)userspace_addr)) - return PTR_ERR((void *)userspace_addr); + if (IS_ERR((void *)userspace_addr)) + return PTR_ERR((void *)userspace_addr); - memslot->userspace_addr = userspace_addr; - } + memslot->userspace_addr = userspace_addr; } - return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, - int user_alloc) + bool user_alloc) { int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; - if (!user_alloc && !old.user_alloc && old.npages && !npages) { + if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { int ret; ret = vm_munmap(old.userspace_addr, @@ -6908,11 +6958,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, if (!kvm->arch.n_requested_mmu_pages) nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); - spin_lock(&kvm->mmu_lock); if (nr_mmu_pages) kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); - kvm_mmu_slot_remove_write_access(kvm, mem->slot); - spin_unlock(&kvm->mmu_lock); + /* + * Write protect all pages for dirty logging. + * Existing largepage mappings are destroyed here and new ones will + * not be created until the end of the logging. + */ + if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) + kvm_mmu_slot_remove_write_access(kvm, mem->slot); /* * If memory slot is created, or moved, we need to clear all * mmio sptes. |