diff options
author | Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> | 2010-11-23 11:13:00 +0800 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2011-01-12 11:29:51 +0200 |
commit | a4ee1ca4a36e7857d90ae8c2b85f1bde9a042c10 (patch) | |
tree | 29707dd004ef14df318ac35321b95ac62570fc99 | |
parent | 407c61c6bd6a51b56d02f8bbad8aadf19db8c7b5 (diff) | |
download | lwn-a4ee1ca4a36e7857d90ae8c2b85f1bde9a042c10.tar.gz lwn-a4ee1ca4a36e7857d90ae8c2b85f1bde9a042c10.zip |
KVM: MMU: delay flush all tlbs on sync_page path
Quote from Avi:
| I don't think we need to flush immediately; set a "tlb dirty" bit somewhere
| that is cleareded when we flush the tlb. kvm_mmu_notifier_invalidate_page()
| can consult the bit and force a flush if set.
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 12 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 2 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 7 |
3 files changed, 18 insertions, 3 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a43f4ccd30bb..2b3d66c7b68d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -746,6 +746,14 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, * Using the cached information from sp->gfns is safe because: * - The spte has a reference to the struct page, so the pfn for a given gfn * can't change unless all sptes pointing to it are nuked first. + * + * Note: + * We should flush all tlbs if spte is dropped even though guest is + * responsible for it. Since if we don't, kvm_mmu_notifier_invalidate_page + * and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't + * used by guest then tlbs are not flushed, so guest is allowed to access the + * freed pages. + * And we increase kvm->tlbs_dirty to delay tlbs flush in this case. */ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { @@ -781,14 +789,14 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) gfn = gpte_to_gfn(gpte); if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { - kvm_flush_remote_tlbs(vcpu->kvm); + vcpu->kvm->tlbs_dirty++; continue; } if (gfn != sp->gfns[i]) { drop_spte(vcpu->kvm, &sp->spt[i], shadow_trap_nonpresent_pte); - kvm_flush_remote_tlbs(vcpu->kvm); + vcpu->kvm->tlbs_dirty++; continue; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index da0794f707f6..ac4e83a1a10d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -254,6 +254,7 @@ struct kvm { struct mmu_notifier mmu_notifier; unsigned long mmu_notifier_seq; long mmu_notifier_count; + long tlbs_dirty; #endif }; @@ -382,6 +383,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); void kvm_resched(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); + void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5156d458a84d..ee99b77e4451 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -168,8 +168,12 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) void kvm_flush_remote_tlbs(struct kvm *kvm) { + int dirty_count = kvm->tlbs_dirty; + + smp_mb(); if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) ++kvm->stat.remote_tlb_flush; + cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); } void kvm_reload_remote_mmus(struct kvm *kvm) @@ -249,7 +253,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); kvm->mmu_notifier_seq++; - need_tlb_flush = kvm_unmap_hva(kvm, address); + need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; spin_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); @@ -293,6 +297,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm->mmu_notifier_count++; for (; start < end; start += PAGE_SIZE) need_tlb_flush |= kvm_unmap_hva(kvm, start); + need_tlb_flush |= kvm->tlbs_dirty; spin_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); |