diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2023-07-01 07:18:30 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2023-07-01 07:18:30 -0400 |
commit | 88de4b94801aa8d3a82fd89148330155acb904af (patch) | |
tree | f7d91a622a45c4fc79a61120c7c3f949c21f18da /arch/x86/kvm/mmu | |
parent | 36b68d360a7a893ba126d6ea6c1799e0b0726362 (diff) | |
parent | 0b210faf337314e4bc88e796218bc70c72a51209 (diff) | |
download | lwn-88de4b94801aa8d3a82fd89148330155acb904af.tar.gz lwn-88de4b94801aa8d3a82fd89148330155acb904af.zip |
Merge tag 'kvm-x86-mmu-6.5' of https://github.com/kvm-x86/linux into HEAD
KVM x86/mmu changes for 6.5:
- Add back a comment about the subtle side effect of try_cmpxchg64() in
tdp_mmu_set_spte_atomic()
- Add an assertion in __kvm_mmu_invalidate_addr() to verify that the target
KVM MMU is the current MMU
- Add a "never" option to effectively avoid creating NX hugepage recovery
threads
Diffstat (limited to 'arch/x86/kvm/mmu')
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 49 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.c | 5 |
2 files changed, 48 insertions, 6 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6eaa3d6994ae..03ff06cd65b3 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -58,6 +58,8 @@ extern bool itlb_multihit_kvm_mitigation; +static bool nx_hugepage_mitigation_hard_disabled; + int __read_mostly nx_huge_pages = -1; static uint __read_mostly nx_huge_pages_recovery_period_ms; #ifdef CONFIG_PREEMPT_RT @@ -67,12 +69,13 @@ static uint __read_mostly nx_huge_pages_recovery_ratio = 0; static uint __read_mostly nx_huge_pages_recovery_ratio = 60; #endif +static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp); static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp); static const struct kernel_param_ops nx_huge_pages_ops = { .set = set_nx_huge_pages, - .get = param_get_bool, + .get = get_nx_huge_pages, }; static const struct kernel_param_ops nx_huge_pages_recovery_param_ops = { @@ -5797,6 +5800,14 @@ static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu vcpu_clear_mmio_info(vcpu, addr); + /* + * Walking and synchronizing SPTEs both assume they are operating in + * the context of the current MMU, and would need to be reworked if + * this is ever used to sync the guest_mmu, e.g. to emulate INVEPT. + */ + if (WARN_ON_ONCE(mmu != vcpu->arch.mmu)) + return; + if (!VALID_PAGE(root_hpa)) return; @@ -6844,6 +6855,14 @@ static void mmu_destroy_caches(void) kmem_cache_destroy(mmu_page_header_cache); } +static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp) +{ + if (nx_hugepage_mitigation_hard_disabled) + return sprintf(buffer, "never\n"); + + return param_get_bool(buffer, kp); +} + static bool get_nx_auto_mode(void) { /* Return true when CPU has the bug, and mitigations are ON */ @@ -6860,15 +6879,29 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) bool old_val = nx_huge_pages; bool new_val; + if (nx_hugepage_mitigation_hard_disabled) + return -EPERM; + /* In "auto" mode deploy workaround only if CPU has the bug. */ - if (sysfs_streq(val, "off")) + if (sysfs_streq(val, "off")) { new_val = 0; - else if (sysfs_streq(val, "force")) + } else if (sysfs_streq(val, "force")) { new_val = 1; - else if (sysfs_streq(val, "auto")) + } else if (sysfs_streq(val, "auto")) { new_val = get_nx_auto_mode(); - else if (kstrtobool(val, &new_val) < 0) + } else if (sysfs_streq(val, "never")) { + new_val = 0; + + mutex_lock(&kvm_lock); + if (!list_empty(&vm_list)) { + mutex_unlock(&kvm_lock); + return -EBUSY; + } + nx_hugepage_mitigation_hard_disabled = true; + mutex_unlock(&kvm_lock); + } else if (kstrtobool(val, &new_val) < 0) { return -EINVAL; + } __set_nx_huge_pages(new_val); @@ -7006,6 +7039,9 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel uint old_period, new_period; int err; + if (nx_hugepage_mitigation_hard_disabled) + return -EPERM; + was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period); err = param_set_uint(val, kp); @@ -7164,6 +7200,9 @@ int kvm_mmu_post_init_vm(struct kvm *kvm) { int err; + if (nx_hugepage_mitigation_hard_disabled) + return 0; + err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0, "kvm-nx-lpage-recovery", &kvm->arch.nx_huge_page_recovery_thread); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 08340219c35a..512163d52194 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -592,7 +592,10 @@ static inline int tdp_mmu_set_spte_atomic(struct kvm *kvm, /* * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and - * does not hold the mmu_lock. + * does not hold the mmu_lock. On failure, i.e. if a different logical + * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with + * the current value, so the caller operates on fresh data, e.g. if it + * retries tdp_mmu_set_spte_atomic() */ if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte)) return -EBUSY; |