summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>2013-05-31 08:36:29 +0800
committerGleb Natapov <gleb@redhat.com>2013-06-05 12:33:33 +0300
commit365c886860c4ba670d245e762b23987c912c129a (patch)
tree81f59edf5ba5e4c944d5590a631bbd23419e2cd8 /arch
parentf34d251d66ba263c077ed9d2bbd1874339a4c887 (diff)
downloadlwn-365c886860c4ba670d245e762b23987c912c129a.tar.gz
lwn-365c886860c4ba670d245e762b23987c912c129a.zip
KVM: MMU: reclaim the zapped-obsolete page first
As Marcelo pointed out that | "(retention of large number of pages while zapping) | can be fatal, it can lead to OOM and host crash" We introduce a list, kvm->arch.zapped_obsolete_pages, to link all the pages which are deleted from the mmu cache but not actually freed. When page reclaiming is needed, we always zap this kind of pages first. Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Gleb Natapov <gleb@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kvm/mmu.c21
-rw-r--r--arch/x86/kvm/x86.c1
3 files changed, 20 insertions, 4 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bff7d464a6ae..1f98c1bb5b7a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -536,6 +536,8 @@ struct kvm_arch {
* Hash table of struct kvm_mmu_page.
*/
struct list_head active_mmu_pages;
+ struct list_head zapped_obsolete_pages;
+
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
int iommu_flags;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 674c0442ac89..79af88ab2f1d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4211,7 +4211,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
static void kvm_zap_obsolete_pages(struct kvm *kvm)
{
struct kvm_mmu_page *sp, *node;
- LIST_HEAD(invalid_list);
int batch = 0;
restart:
@@ -4244,7 +4243,8 @@ restart:
goto restart;
}
- ret = kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+ ret = kvm_mmu_prepare_zap_page(kvm, sp,
+ &kvm->arch.zapped_obsolete_pages);
batch += ret;
if (ret)
@@ -4255,7 +4255,7 @@ restart:
* Should flush tlb before free page tables since lockless-walking
* may use the pages.
*/
- kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
}
/*
@@ -4306,6 +4306,11 @@ restart:
spin_unlock(&kvm->mmu_lock);
}
+static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
+{
+ return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
+}
+
static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
{
struct kvm *kvm;
@@ -4334,15 +4339,23 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
* want to shrink a VM that only started to populate its MMU
* anyway.
*/
- if (!kvm->arch.n_used_mmu_pages)
+ if (!kvm->arch.n_used_mmu_pages &&
+ !kvm_has_zapped_obsolete_pages(kvm))
continue;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
+ if (kvm_has_zapped_obsolete_pages(kvm)) {
+ kvm_mmu_commit_zap_page(kvm,
+ &kvm->arch.zapped_obsolete_pages);
+ goto unlock;
+ }
+
prepare_zap_oldest_mmu_page(kvm, &invalid_list);
kvm_mmu_commit_zap_page(kvm, &invalid_list);
+unlock:
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 15e10f7e68ac..6402951d5f3b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6832,6 +6832,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return -EINVAL;
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */