diff options
author | Kai Huang <kai.huang@intel.com> | 2022-04-19 23:17:03 +1200 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2022-05-12 09:51:44 -0400 |
commit | e54f1ff244ac96c919049838a5a1f03087793594 (patch) | |
tree | 54a3b9ad2938cdcba07e93537f8b4c76288e8394 /arch/x86/kvm | |
parent | c919e881ba83e2912354ffa035980f62c78cc2f3 (diff) | |
download | lwn-e54f1ff244ac96c919049838a5a1f03087793594.tar.gz lwn-e54f1ff244ac96c919049838a5a1f03087793594.zip |
KVM: x86/mmu: Add shadow_me_value and repurpose shadow_me_mask
Intel Multi-Key Total Memory Encryption (MKTME) repurposes couple of
high bits of physical address bits as 'KeyID' bits. Intel Trust Domain
Extentions (TDX) further steals part of MKTME KeyID bits as TDX private
KeyID bits. TDX private KeyID bits cannot be set in any mapping in the
host kernel since they can only be accessed by software running inside a
new CPU isolated mode. And unlike to AMD's SME, host kernel doesn't set
any legacy MKTME KeyID bits to any mapping either. Therefore, it's not
legitimate for KVM to set any KeyID bits in SPTE which maps guest
memory.
KVM maintains shadow_zero_check bits to represent which bits must be
zero for SPTE which maps guest memory. MKTME KeyID bits should be set
to shadow_zero_check. Currently, shadow_me_mask is used by AMD to set
the sme_me_mask to SPTE, and shadow_me_shadow is excluded from
shadow_zero_check. So initializing shadow_me_mask to represent all
MKTME keyID bits doesn't work for VMX (as oppositely, they must be set
to shadow_zero_check).
Introduce a new 'shadow_me_value' to replace existing shadow_me_mask,
and repurpose shadow_me_mask as 'all possible memory encryption bits'.
The new schematic of them will be:
- shadow_me_value: the memory encryption bit(s) that will be set to the
SPTE (the original shadow_me_mask).
- shadow_me_mask: all possible memory encryption bits (which is a super
set of shadow_me_value).
- For now, shadow_me_value is supposed to be set by SVM and VMX
respectively, and it is a constant during KVM's life time. This
perhaps doesn't fit MKTME but for now host kernel doesn't support it
(and perhaps will never do).
- Bits in shadow_me_mask are set to shadow_zero_check, except the bits
in shadow_me_value.
Introduce a new helper kvm_mmu_set_me_spte_mask() to initialize them.
Replace shadow_me_mask with shadow_me_value in almost all code paths,
except the one in PT64_PERM_MASK, which is used by need_remote_flush()
to determine whether remote TLB flush is needed. This should still use
shadow_me_mask as any encryption bit change should need a TLB flush.
And for AMD, move initializing shadow_me_value/shadow_me_mask from
kvm_mmu_reset_all_pte_masks() to svm_hardware_setup().
Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-Id: <f90964b93a3398b1cf1c56f510f3281e0709e2ab.1650363789.git.kai.huang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/mmu.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.c | 23 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 3 |
5 files changed, 34 insertions, 10 deletions
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index da5ed1430d7e..1db11cd6b177 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -90,6 +90,7 @@ static inline gfn_t kvm_mmu_max_gfn(void) } void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask); +void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask); void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only); void kvm_init_mmu(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index b91d2af46d90..ea4bc085bbf5 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3546,7 +3546,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * or a PAE 3-level page table. In either case we need to be aware that * the shadow page table may be a PAE or a long mode page table. */ - pm_mask = PT_PRESENT_MASK | shadow_me_mask; + pm_mask = PT_PRESENT_MASK | shadow_me_value; if (mmu->root_role.level >= PT64_ROOT_4LEVEL) { pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; @@ -4531,8 +4531,16 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, return; for (i = context->root_role.level; --i >= 0;) { - shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask; - shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask; + /* + * So far shadow_me_value is a constant during KVM's life + * time. Bits in shadow_me_value are allowed to be set. + * Bits in shadow_me_mask but not in shadow_me_value are + * not allowed to be set. + */ + shadow_zero_check->rsvd_bits_mask[0][i] |= shadow_me_mask; + shadow_zero_check->rsvd_bits_mask[1][i] |= shadow_me_mask; + shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_value; + shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_value; } } @@ -5624,7 +5632,7 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) if (!tdp_enabled) set_memory_decrypted((unsigned long)mmu->pae_root, 1); else - WARN_ON_ONCE(shadow_me_mask); + WARN_ON_ONCE(shadow_me_value); for (i = 0; i < 4; ++i) mmu->pae_root[i] = INVALID_PAE_ROOT; diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 800b857b3a53..12b9b3f8bd05 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -33,6 +33,7 @@ u64 __read_mostly shadow_mmio_value; u64 __read_mostly shadow_mmio_mask; u64 __read_mostly shadow_mmio_access_mask; u64 __read_mostly shadow_present_mask; +u64 __read_mostly shadow_me_value; u64 __read_mostly shadow_me_mask; u64 __read_mostly shadow_acc_track_mask; @@ -167,8 +168,8 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, else pte_access &= ~ACC_WRITE_MASK; - if (shadow_me_mask && !kvm_is_mmio_pfn(pfn)) - spte |= shadow_me_mask; + if (shadow_me_value && !kvm_is_mmio_pfn(pfn)) + spte |= shadow_me_value; spte |= (u64)pfn << PAGE_SHIFT; @@ -284,7 +285,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled) u64 spte = SPTE_MMU_PRESENT_MASK; spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK | - shadow_user_mask | shadow_x_mask | shadow_me_mask; + shadow_user_mask | shadow_x_mask | shadow_me_value; if (ad_disabled) spte |= SPTE_TDP_AD_DISABLED_MASK; @@ -388,6 +389,17 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask) } EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); +void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask) +{ + /* shadow_me_value must be a subset of shadow_me_mask */ + if (WARN_ON(me_value & ~me_mask)) + me_value = me_mask = 0; + + shadow_me_value = me_value; + shadow_me_mask = me_mask; +} +EXPORT_SYMBOL_GPL(kvm_mmu_set_me_spte_mask); + void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) { shadow_user_mask = VMX_EPT_READABLE_MASK; @@ -397,8 +409,6 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) shadow_x_mask = VMX_EPT_EXECUTABLE_MASK; shadow_present_mask = has_exec_only ? 0ull : VMX_EPT_READABLE_MASK; shadow_acc_track_mask = VMX_EPT_RWX_MASK; - shadow_me_mask = 0ull; - shadow_host_writable_mask = EPT_SPTE_HOST_WRITABLE; shadow_mmu_writable_mask = EPT_SPTE_MMU_WRITABLE; @@ -449,7 +459,8 @@ void kvm_mmu_reset_all_pte_masks(void) shadow_x_mask = 0; shadow_present_mask = PT_PRESENT_MASK; shadow_acc_track_mask = 0; - shadow_me_mask = sme_me_mask; + shadow_me_mask = 0; + shadow_me_value = 0; shadow_host_writable_mask = DEFAULT_SPTE_HOST_WRITABLE; shadow_mmu_writable_mask = DEFAULT_SPTE_MMU_WRITABLE; diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 43f7924bc7f0..0127bb6e3c7d 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -151,6 +151,7 @@ extern u64 __read_mostly shadow_mmio_value; extern u64 __read_mostly shadow_mmio_mask; extern u64 __read_mostly shadow_mmio_access_mask; extern u64 __read_mostly shadow_present_mask; +extern u64 __read_mostly shadow_me_value; extern u64 __read_mostly shadow_me_mask; /* diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 75b4f3ac8b1a..3b49337998ec 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4892,6 +4892,9 @@ static __init int svm_hardware_setup(void) get_npt_level(), PG_LEVEL_1G); pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); + /* Setup shadow_me_value and shadow_me_mask */ + kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask); + /* Note, SEV setup consumes npt_enabled. */ sev_hardware_setup(); |