diff options
author | Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> | 2013-06-19 17:09:20 +0800 |
---|---|---|
committer | Gleb Natapov <gleb@redhat.com> | 2013-06-27 14:20:42 +0300 |
commit | accaefe07ddbeb12c0de4cec1d62dba6a0ea1605 (patch) | |
tree | 9363694003f6263cc15b8fcf2c5351d065a2a0a4 /arch/x86 | |
parent | a8eca9dcc656a405a28ffba43f3d86a1ff0eb331 (diff) | |
download | lwn-accaefe07ddbeb12c0de4cec1d62dba6a0ea1605.tar.gz lwn-accaefe07ddbeb12c0de4cec1d62dba6a0ea1605.zip |
KVM: MMU: document clear_spte_count
Document it to Documentation/virtual/kvm/mmu.txt
Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 17 |
2 files changed, 18 insertions, 3 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 966f2650b6ab..5d28c11d5e21 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -226,6 +226,10 @@ struct kvm_mmu_page { DECLARE_BITMAP(unsync_child_bitmap, 512); #ifdef CONFIG_X86_32 + /* + * Used out of the mmu-lock to avoid reading spte values while an + * update is in progress; see the comments in __get_spte_lockless(). + */ int clear_spte_count; #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7113a0fb544c..f385a4cf4bfd 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -466,9 +466,20 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) /* * The idea using the light way get the spte on x86_32 guest is from * gup_get_pte(arch/x86/mm/gup.c). - * The difference is we can not catch the spte tlb flush if we leave - * guest mode, so we emulate it by increase clear_spte_count when spte - * is cleared. + * + * An spte tlb flush may be pending, because kvm_set_pte_rmapp + * coalesces them and we are running out of the MMU lock. Therefore + * we need to protect against in-progress updates of the spte. + * + * Reading the spte while an update is in progress may get the old value + * for the high part of the spte. The race is fine for a present->non-present + * change (because the high part of the spte is ignored for non-present spte), + * but for a present->present change we must reread the spte. + * + * All such changes are done in two steps (present->non-present and + * non-present->present), hence it is enough to count the number of + * present->non-present updates: if it changed while reading the spte, + * we might have hit the race. This is done using clear_spte_count. */ static u64 __get_spte_lockless(u64 *sptep) { |