summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/cpuid.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/cpuid.c')
-rw-r--r--arch/x86/kvm/cpuid.c1042
1 files changed, 707 insertions, 335 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ae0b438a2c99..5e4d4934c0d3 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -58,39 +58,29 @@ void __init kvm_init_xstate_sizes(void)
u32 xstate_required_size(u64 xstate_bv, bool compacted)
{
- int feature_bit = 0;
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+ int i;
xstate_bv &= XFEATURE_MASK_EXTEND;
- while (xstate_bv) {
- if (xstate_bv & 0x1) {
- struct cpuid_xstate_sizes *xs = &xstate_sizes[feature_bit];
- u32 offset;
-
- /* ECX[1]: 64B alignment in compacted form */
- if (compacted)
- offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret;
- else
- offset = xs->ebx;
- ret = max(ret, offset + xs->eax);
- }
+ for (i = XFEATURE_YMM; i < ARRAY_SIZE(xstate_sizes) && xstate_bv; i++) {
+ struct cpuid_xstate_sizes *xs = &xstate_sizes[i];
+ u32 offset;
- xstate_bv >>= 1;
- feature_bit++;
+ if (!(xstate_bv & BIT_ULL(i)))
+ continue;
+
+ /* ECX[1]: 64B alignment in compacted form */
+ if (compacted)
+ offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret;
+ else
+ offset = xs->ebx;
+ ret = max(ret, offset + xs->eax);
+ xstate_bv &= ~BIT_ULL(i);
}
return ret;
}
-#define F feature_bit
-
-/* Scattered Flag - For features that are scattered by cpufeatures.h. */
-#define SF(name) \
-({ \
- BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \
- (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0); \
-})
-
/*
* Magic value used by KVM when querying userspace-provided CPUID entries and
* doesn't care about the CPIUD index because the index of the function in
@@ -100,8 +90,8 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
*/
#define KVM_CPUID_INDEX_NOT_SIGNIFICANT -1ull
-static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
- struct kvm_cpuid_entry2 *entries, int nent, u32 function, u64 index)
+static struct kvm_cpuid_entry2 *cpuid_entry2_find(struct kvm_vcpu *vcpu,
+ u32 function, u64 index)
{
struct kvm_cpuid_entry2 *e;
int i;
@@ -118,8 +108,8 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
*/
lockdep_assert_irqs_enabled();
- for (i = 0; i < nent; i++) {
- e = &entries[i];
+ for (i = 0; i < vcpu->arch.cpuid_nent; i++) {
+ e = &vcpu->arch.cpuid_entries[i];
if (e->function != function)
continue;
@@ -151,9 +141,27 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
return NULL;
}
-static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
- struct kvm_cpuid_entry2 *entries,
- int nent)
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
+ u32 function, u32 index)
+{
+ return cpuid_entry2_find(vcpu, function, index);
+}
+EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry_index);
+
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function)
+{
+ return cpuid_entry2_find(vcpu, function, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+}
+EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
+
+/*
+ * cpuid_entry2_find() and KVM_CPUID_INDEX_NOT_SIGNIFICANT should never be used
+ * directly outside of kvm_find_cpuid_entry() and kvm_find_cpuid_entry_index().
+ */
+#undef KVM_CPUID_INDEX_NOT_SIGNIFICANT
+
+static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
u64 xfeatures;
@@ -162,8 +170,7 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
* The existing code assumes virtual address is 48-bit or 57-bit in the
* canonical address checks; exit if it is ever changed.
*/
- best = cpuid_entry2_find(entries, nent, 0x80000008,
- KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008);
if (best) {
int vaddr_bits = (best->eax & 0xff00) >> 8;
@@ -175,7 +182,7 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
* Exposing dynamic xfeatures to the guest requires additional
* enabling in the FPU, e.g. to expand the guest XSAVE state size.
*/
- best = cpuid_entry2_find(entries, nent, 0xd, 0);
+ best = kvm_find_cpuid_entry_index(vcpu, 0xd, 0);
if (!best)
return 0;
@@ -187,6 +194,9 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
return fpu_enable_guest_xfd_features(&vcpu->arch.guest_fpu, xfeatures);
}
+static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu);
+static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
+
/* Check whether the supplied CPUID data is equal to what is already set for the vCPU. */
static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
int nent)
@@ -194,6 +204,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
struct kvm_cpuid_entry2 *orig;
int i;
+ /*
+ * Apply runtime CPUID updates to the incoming CPUID entries to avoid
+ * false positives due mismatches on KVM-owned feature flags.
+ *
+ * Note! @e2 and @nent track the _old_ CPUID entries!
+ */
+ kvm_update_cpuid_runtime(vcpu);
+ kvm_apply_cpuid_pv_features_quirk(vcpu);
+
if (nent != vcpu->arch.cpuid_nent)
return -EINVAL;
@@ -210,15 +229,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
return 0;
}
-static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_entry2 *entries,
- int nent, const char *sig)
+static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
+ const char *sig)
{
struct kvm_hypervisor_cpuid cpuid = {};
struct kvm_cpuid_entry2 *entry;
u32 base;
for_each_possible_hypervisor_cpuid_base(base) {
- entry = cpuid_entry2_find(entries, nent, base, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+ entry = kvm_find_cpuid_entry(vcpu, base);
if (entry) {
u32 signature[3];
@@ -238,118 +257,91 @@ static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_e
return cpuid;
}
-static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
- const char *sig)
-{
- return __kvm_get_hypervisor_cpuid(vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent, sig);
-}
-
-static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_cpuid_entry2 *entries,
- int nent, u32 kvm_cpuid_base)
+static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu)
{
- return cpuid_entry2_find(entries, nent, kvm_cpuid_base | KVM_CPUID_FEATURES,
- KVM_CPUID_INDEX_NOT_SIGNIFICANT);
-}
-
-static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
-{
- u32 base = vcpu->arch.kvm_cpuid.base;
+ struct kvm_hypervisor_cpuid kvm_cpuid;
+ struct kvm_cpuid_entry2 *best;
- if (!base)
- return NULL;
+ kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE);
+ if (!kvm_cpuid.base)
+ return 0;
- return __kvm_find_kvm_cpuid_features(vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent, base);
-}
+ best = kvm_find_cpuid_entry(vcpu, kvm_cpuid.base | KVM_CPUID_FEATURES);
+ if (!best)
+ return 0;
-void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu);
+ if (kvm_hlt_in_guest(vcpu->kvm))
+ best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
- /*
- * save the feature bitmap to avoid cpuid lookup for every PV
- * operation
- */
- if (best)
- vcpu->arch.pv_cpuid.features = best->eax;
+ return best->eax;
}
/*
* Calculate guest's supported XCR0 taking into account guest CPUID data and
* KVM's supported XCR0 (comprised of host's XCR0 and KVM_SUPPORTED_XCR0).
*/
-static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
+static u64 cpuid_get_supported_xcr0(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
- best = cpuid_entry2_find(entries, nent, 0xd, 0);
+ best = kvm_find_cpuid_entry_index(vcpu, 0xd, 0);
if (!best)
return 0;
return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
}
-static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
- int nent)
+static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
+ struct kvm_cpuid_entry2 *entry,
+ unsigned int x86_feature,
+ bool has_feature)
+{
+ cpuid_entry_change(entry, x86_feature, has_feature);
+ guest_cpu_cap_change(vcpu, x86_feature, has_feature);
+}
+
+static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
- struct kvm_hypervisor_cpuid kvm_cpuid;
- best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+ vcpu->arch.cpuid_dynamic_bits_dirty = false;
+
+ best = kvm_find_cpuid_entry(vcpu, 1);
if (best) {
- /* Update OSXSAVE bit */
- if (boot_cpu_has(X86_FEATURE_XSAVE))
- cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
+ kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSXSAVE,
kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
- cpuid_entry_change(best, X86_FEATURE_APIC,
- vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
+ kvm_update_feature_runtime(vcpu, best, X86_FEATURE_APIC,
+ vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
+
+ if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
+ kvm_update_feature_runtime(vcpu, best, X86_FEATURE_MWAIT,
+ vcpu->arch.ia32_misc_enable_msr &
+ MSR_IA32_MISC_ENABLE_MWAIT);
}
- best = cpuid_entry2_find(entries, nent, 7, 0);
- if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
- cpuid_entry_change(best, X86_FEATURE_OSPKE,
- kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
+ best = kvm_find_cpuid_entry_index(vcpu, 7, 0);
+ if (best)
+ kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSPKE,
+ kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
+
- best = cpuid_entry2_find(entries, nent, 0xD, 0);
+ best = kvm_find_cpuid_entry_index(vcpu, 0xD, 0);
if (best)
best->ebx = xstate_required_size(vcpu->arch.xcr0, false);
- best = cpuid_entry2_find(entries, nent, 0xD, 1);
+ best = kvm_find_cpuid_entry_index(vcpu, 0xD, 1);
if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
-
- kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE);
- if (kvm_cpuid.base) {
- best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base);
- if (kvm_hlt_in_guest(vcpu->kvm) && best)
- best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
- }
-
- if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
- best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
- if (best)
- cpuid_entry_change(best, X86_FEATURE_MWAIT,
- vcpu->arch.ia32_misc_enable_msr &
- MSR_IA32_MISC_ENABLE_MWAIT);
- }
}
-void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
-{
- __kvm_update_cpuid_runtime(vcpu, vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
-}
-EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
-
-static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
+static bool kvm_cpuid_has_hyperv(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_KVM_HYPERV
struct kvm_cpuid_entry2 *entry;
- entry = cpuid_entry2_find(entries, nent, HYPERV_CPUID_INTERFACE,
- KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+ entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE);
return entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX;
#else
return false;
@@ -368,15 +360,71 @@ static bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
is_guest_vendor_hygon(entry->ebx, entry->ecx, entry->edx);
}
-static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+/*
+ * This isn't truly "unsafe", but except for the cpu_caps initialization code,
+ * all register lookups should use __cpuid_entry_get_reg(), which provides
+ * compile-time validation of the input.
+ */
+static u32 cpuid_get_reg_unsafe(struct kvm_cpuid_entry2 *entry, u32 reg)
+{
+ switch (reg) {
+ case CPUID_EAX:
+ return entry->eax;
+ case CPUID_EBX:
+ return entry->ebx;
+ case CPUID_ECX:
+ return entry->ecx;
+ case CPUID_EDX:
+ return entry->edx;
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+
+static int cpuid_func_emulated(struct kvm_cpuid_entry2 *entry, u32 func,
+ bool include_partially_emulated);
+
+void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry2 *entry;
bool allow_gbpages;
+ int i;
- BUILD_BUG_ON(KVM_NR_GOVERNED_FEATURES > KVM_MAX_NR_GOVERNED_FEATURES);
- bitmap_zero(vcpu->arch.governed_features.enabled,
- KVM_MAX_NR_GOVERNED_FEATURES);
+ memset(vcpu->arch.cpu_caps, 0, sizeof(vcpu->arch.cpu_caps));
+ BUILD_BUG_ON(ARRAY_SIZE(reverse_cpuid) != NR_KVM_CPU_CAPS);
+
+ /*
+ * Reset guest capabilities to userspace's guest CPUID definition, i.e.
+ * honor userspace's definition for features that don't require KVM or
+ * hardware management/support (or that KVM simply doesn't care about).
+ */
+ for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
+ const struct cpuid_reg cpuid = reverse_cpuid[i];
+ struct kvm_cpuid_entry2 emulated;
+
+ if (!cpuid.function)
+ continue;
+
+ entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index);
+ if (!entry)
+ continue;
+
+ cpuid_func_emulated(&emulated, cpuid.function, true);
+
+ /*
+ * A vCPU has a feature if it's supported by KVM and is enabled
+ * in guest CPUID. Note, this includes features that are
+ * supported by KVM but aren't advertised to userspace!
+ */
+ vcpu->arch.cpu_caps[i] = kvm_cpu_caps[i] |
+ cpuid_get_reg_unsafe(&emulated, cpuid.reg);
+ vcpu->arch.cpu_caps[i] &= cpuid_get_reg_unsafe(entry, cpuid.reg);
+ }
+
+ kvm_update_cpuid_runtime(vcpu);
/*
* If TDP is enabled, let the guest use GBPAGES if they're supported in
@@ -390,9 +438,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* and can install smaller shadow pages if the host lacks 1GiB support.
*/
allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
- guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
- if (allow_gbpages)
- kvm_governed_feature_set(vcpu, X86_FEATURE_GBPAGES);
+ guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES);
+ guest_cpu_cap_change(vcpu, X86_FEATURE_GBPAGES, allow_gbpages);
best = kvm_find_cpuid_entry(vcpu, 1);
if (best && apic) {
@@ -404,21 +451,22 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_apic_set_version(vcpu);
}
- vcpu->arch.guest_supported_xcr0 =
- cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+ vcpu->arch.guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu);
- kvm_update_pv_runtime(vcpu);
+ vcpu->arch.pv_cpuid.features = kvm_apply_cpuid_pv_features_quirk(vcpu);
vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
kvm_pmu_refresh(vcpu);
- vcpu->arch.cr4_guest_rsvd_bits =
- __cr4_reserved_bits(guest_cpuid_has, vcpu);
- kvm_hv_set_cpuid(vcpu, kvm_cpuid_has_hyperv(vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent));
+#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
+ vcpu->arch.cr4_guest_rsvd_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_) |
+ __cr4_reserved_bits(guest_cpu_cap_has, vcpu);
+#undef __kvm_cpu_cap_has
+
+ kvm_hv_set_cpuid(vcpu, kvm_cpuid_has_hyperv(vcpu));
/* Invoke the vendor callback only after the above state is updated. */
kvm_x86_call(vcpu_after_set_cpuid)(vcpu);
@@ -457,9 +505,25 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
int nent)
{
+ u32 vcpu_caps[NR_KVM_CPU_CAPS];
int r;
- __kvm_update_cpuid_runtime(vcpu, e2, nent);
+ /*
+ * Swap the existing (old) entries with the incoming (new) entries in
+ * order to massage the new entries, e.g. to account for dynamic bits
+ * that KVM controls, without clobbering the current guest CPUID, which
+ * KVM needs to preserve in order to unwind on failure.
+ *
+ * Similarly, save the vCPU's current cpu_caps so that the capabilities
+ * can be updated alongside the CPUID entries when performing runtime
+ * updates. Full initialization is done if and only if the vCPU hasn't
+ * run, i.e. only if userspace is potentially changing CPUID features.
+ */
+ swap(vcpu->arch.cpuid_entries, e2);
+ swap(vcpu->arch.cpuid_nent, nent);
+
+ memcpy(vcpu_caps, vcpu->arch.cpu_caps, sizeof(vcpu_caps));
+ BUILD_BUG_ON(sizeof(vcpu_caps) != sizeof(vcpu->arch.cpu_caps));
/*
* KVM does not correctly handle changing guest CPUID after KVM_RUN, as
@@ -475,35 +539,36 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
if (kvm_vcpu_has_run(vcpu)) {
r = kvm_cpuid_check_equal(vcpu, e2, nent);
if (r)
- return r;
-
- kvfree(e2);
- return 0;
+ goto err;
+ goto success;
}
#ifdef CONFIG_KVM_HYPERV
- if (kvm_cpuid_has_hyperv(e2, nent)) {
+ if (kvm_cpuid_has_hyperv(vcpu)) {
r = kvm_hv_vcpu_init(vcpu);
if (r)
- return r;
+ goto err;
}
#endif
- r = kvm_check_cpuid(vcpu, e2, nent);
+ r = kvm_check_cpuid(vcpu);
if (r)
- return r;
-
- kvfree(vcpu->arch.cpuid_entries);
- vcpu->arch.cpuid_entries = e2;
- vcpu->arch.cpuid_nent = nent;
+ goto err;
- vcpu->arch.kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE);
#ifdef CONFIG_KVM_XEN
vcpu->arch.xen.cpuid = kvm_get_hypervisor_cpuid(vcpu, XEN_SIGNATURE);
#endif
kvm_vcpu_after_set_cpuid(vcpu);
+success:
+ kvfree(e2);
return 0;
+
+err:
+ memcpy(vcpu->arch.cpu_caps, vcpu_caps, sizeof(vcpu_caps));
+ swap(vcpu->arch.cpuid_entries, e2);
+ swap(vcpu->arch.cpuid_nent, nent);
+ return r;
}
/* when an old userspace process fills a new kernel module */
@@ -582,6 +647,9 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
if (cpuid->nent < vcpu->arch.cpuid_nent)
return -E2BIG;
+ if (vcpu->arch.cpuid_dynamic_bits_dirty)
+ kvm_update_cpuid_runtime(vcpu);
+
if (copy_to_user(entries, vcpu->arch.cpuid_entries,
vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
return -EFAULT;
@@ -590,107 +658,294 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
return 0;
}
-/* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */
-static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf)
+static __always_inline u32 raw_cpuid_get(struct cpuid_reg cpuid)
{
- const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32);
struct kvm_cpuid_entry2 entry;
+ u32 base;
- reverse_cpuid_check(leaf);
+ /*
+ * KVM only supports features defined by Intel (0x0), AMD (0x80000000),
+ * and Centaur (0xc0000000). WARN if a feature for new vendor base is
+ * defined, as this and other code would need to be updated.
+ */
+ base = cpuid.function & 0xffff0000;
+ if (WARN_ON_ONCE(base && base != 0x80000000 && base != 0xc0000000))
+ return 0;
+
+ if (cpuid_eax(base) < cpuid.function)
+ return 0;
cpuid_count(cpuid.function, cpuid.index,
&entry.eax, &entry.ebx, &entry.ecx, &entry.edx);
- kvm_cpu_caps[leaf] &= *__cpuid_entry_get_reg(&entry, cpuid.reg);
+ return *__cpuid_entry_get_reg(&entry, cpuid.reg);
}
-static __always_inline
-void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask)
-{
- /* Use kvm_cpu_cap_mask for leafs that aren't KVM-only. */
- BUILD_BUG_ON(leaf < NCAPINTS);
+/*
+ * For kernel-defined leafs, mask KVM's supported feature set with the kernel's
+ * capabilities as well as raw CPUID. For KVM-defined leafs, consult only raw
+ * CPUID, as KVM is the one and only authority (in the kernel).
+ */
+#define kvm_cpu_cap_init(leaf, feature_initializers...) \
+do { \
+ const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); \
+ const u32 __maybe_unused kvm_cpu_cap_init_in_progress = leaf; \
+ const u32 *kernel_cpu_caps = boot_cpu_data.x86_capability; \
+ u32 kvm_cpu_cap_passthrough = 0; \
+ u32 kvm_cpu_cap_synthesized = 0; \
+ u32 kvm_cpu_cap_emulated = 0; \
+ u32 kvm_cpu_cap_features = 0; \
+ \
+ feature_initializers \
+ \
+ kvm_cpu_caps[leaf] = kvm_cpu_cap_features; \
+ \
+ if (leaf < NCAPINTS) \
+ kvm_cpu_caps[leaf] &= kernel_cpu_caps[leaf]; \
+ \
+ kvm_cpu_caps[leaf] |= kvm_cpu_cap_passthrough; \
+ kvm_cpu_caps[leaf] &= (raw_cpuid_get(cpuid) | \
+ kvm_cpu_cap_synthesized); \
+ kvm_cpu_caps[leaf] |= kvm_cpu_cap_emulated; \
+} while (0)
- kvm_cpu_caps[leaf] = mask;
+/*
+ * Assert that the feature bit being declared, e.g. via F(), is in the CPUID
+ * word that's being initialized. Exempt 0x8000_0001.EDX usage of 0x1.EDX
+ * features, as AMD duplicated many 0x1.EDX features into 0x8000_0001.EDX.
+ */
+#define KVM_VALIDATE_CPU_CAP_USAGE(name) \
+do { \
+ u32 __leaf = __feature_leaf(X86_FEATURE_##name); \
+ \
+ BUILD_BUG_ON(__leaf != kvm_cpu_cap_init_in_progress); \
+} while (0)
+
+#define F(name) \
+({ \
+ KVM_VALIDATE_CPU_CAP_USAGE(name); \
+ kvm_cpu_cap_features |= feature_bit(name); \
+})
- __kvm_cpu_cap_mask(leaf);
-}
+/* Scattered Flag - For features that are scattered by cpufeatures.h. */
+#define SCATTERED_F(name) \
+({ \
+ BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \
+ KVM_VALIDATE_CPU_CAP_USAGE(name); \
+ if (boot_cpu_has(X86_FEATURE_##name)) \
+ F(name); \
+})
-static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
-{
- /* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */
- BUILD_BUG_ON(leaf >= NCAPINTS);
+/* Features that KVM supports only on 64-bit kernels. */
+#define X86_64_F(name) \
+({ \
+ KVM_VALIDATE_CPU_CAP_USAGE(name); \
+ if (IS_ENABLED(CONFIG_X86_64)) \
+ F(name); \
+})
- kvm_cpu_caps[leaf] &= mask;
+/*
+ * Emulated Feature - For features that KVM emulates in software irrespective
+ * of host CPU/kernel support.
+ */
+#define EMULATED_F(name) \
+({ \
+ kvm_cpu_cap_emulated |= feature_bit(name); \
+ F(name); \
+})
- __kvm_cpu_cap_mask(leaf);
-}
+/*
+ * Synthesized Feature - For features that are synthesized into boot_cpu_data,
+ * i.e. may not be present in the raw CPUID, but can still be advertised to
+ * userspace. Primarily used for mitigation related feature flags.
+ */
+#define SYNTHESIZED_F(name) \
+({ \
+ kvm_cpu_cap_synthesized |= feature_bit(name); \
+ F(name); \
+})
+
+/*
+ * Passthrough Feature - For features that KVM supports based purely on raw
+ * hardware CPUID, i.e. that KVM virtualizes even if the host kernel doesn't
+ * use the feature. Simply force set the feature in KVM's capabilities, raw
+ * CPUID support will be factored in by kvm_cpu_cap_mask().
+ */
+#define PASSTHROUGH_F(name) \
+({ \
+ kvm_cpu_cap_passthrough |= feature_bit(name); \
+ F(name); \
+})
+
+/*
+ * Aliased Features - For features in 0x8000_0001.EDX that are duplicates of
+ * identical 0x1.EDX features, and thus are aliased from 0x1 to 0x8000_0001.
+ */
+#define ALIASED_1_EDX_F(name) \
+({ \
+ BUILD_BUG_ON(__feature_leaf(X86_FEATURE_##name) != CPUID_1_EDX); \
+ BUILD_BUG_ON(kvm_cpu_cap_init_in_progress != CPUID_8000_0001_EDX); \
+ kvm_cpu_cap_features |= feature_bit(name); \
+})
+
+/*
+ * Vendor Features - For features that KVM supports, but are added in later
+ * because they require additional vendor enabling.
+ */
+#define VENDOR_F(name) \
+({ \
+ KVM_VALIDATE_CPU_CAP_USAGE(name); \
+})
+
+/*
+ * Runtime Features - For features that KVM dynamically sets/clears at runtime,
+ * e.g. when CR4 changes, but which are never advertised to userspace.
+ */
+#define RUNTIME_F(name) \
+({ \
+ KVM_VALIDATE_CPU_CAP_USAGE(name); \
+})
+
+/*
+ * Undefine the MSR bit macro to avoid token concatenation issues when
+ * processing X86_FEATURE_SPEC_CTRL_SSBD.
+ */
+#undef SPEC_CTRL_SSBD
+
+/* DS is defined by ptrace-abi.h on 32-bit builds. */
+#undef DS
void kvm_set_cpu_caps(void)
{
-#ifdef CONFIG_X86_64
- unsigned int f_gbpages = F(GBPAGES);
- unsigned int f_lm = F(LM);
- unsigned int f_xfd = F(XFD);
-#else
- unsigned int f_gbpages = 0;
- unsigned int f_lm = 0;
- unsigned int f_xfd = 0;
-#endif
memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps));
BUILD_BUG_ON(sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps)) >
sizeof(boot_cpu_data.x86_capability));
- memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability,
- sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps)));
-
- kvm_cpu_cap_mask(CPUID_1_ECX,
+ kvm_cpu_cap_init(CPUID_1_ECX,
+ F(XMM3),
+ F(PCLMULQDQ),
+ VENDOR_F(DTES64),
/*
* NOTE: MONITOR (and MWAIT) are emulated as NOP, but *not*
- * advertised to guests via CPUID!
+ * advertised to guests via CPUID! MWAIT is also technically a
+ * runtime flag thanks to IA32_MISC_ENABLES; mark it as such so
+ * that KVM is aware that it's a known, unadvertised flag.
*/
- F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
- 0 /* DS-CPL, VMX, SMX, EST */ |
- 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
- F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) |
- F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
- F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
- 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
- F(F16C) | F(RDRAND)
+ RUNTIME_F(MWAIT),
+ /* DS-CPL */
+ VENDOR_F(VMX),
+ /* SMX, EST */
+ /* TM2 */
+ F(SSSE3),
+ /* CNXT-ID */
+ /* Reserved */
+ F(FMA),
+ F(CX16),
+ /* xTPR Update */
+ F(PDCM),
+ F(PCID),
+ /* Reserved, DCA */
+ F(XMM4_1),
+ F(XMM4_2),
+ EMULATED_F(X2APIC),
+ F(MOVBE),
+ F(POPCNT),
+ EMULATED_F(TSC_DEADLINE_TIMER),
+ F(AES),
+ F(XSAVE),
+ RUNTIME_F(OSXSAVE),
+ F(AVX),
+ F(F16C),
+ F(RDRAND),
+ EMULATED_F(HYPERVISOR),
+ );
+
+ kvm_cpu_cap_init(CPUID_1_EDX,
+ F(FPU),
+ F(VME),
+ F(DE),
+ F(PSE),
+ F(TSC),
+ F(MSR),
+ F(PAE),
+ F(MCE),
+ F(CX8),
+ F(APIC),
+ /* Reserved */
+ F(SEP),
+ F(MTRR),
+ F(PGE),
+ F(MCA),
+ F(CMOV),
+ F(PAT),
+ F(PSE36),
+ /* PSN */
+ F(CLFLUSH),
+ /* Reserved */
+ VENDOR_F(DS),
+ /* ACPI */
+ F(MMX),
+ F(FXSR),
+ F(XMM),
+ F(XMM2),
+ F(SELFSNOOP),
+ /* HTT, TM, Reserved, PBE */
);
- /* KVM emulates x2apic in software irrespective of host support. */
- kvm_cpu_cap_set(X86_FEATURE_X2APIC);
-
- kvm_cpu_cap_mask(CPUID_1_EDX,
- F(FPU) | F(VME) | F(DE) | F(PSE) |
- F(TSC) | F(MSR) | F(PAE) | F(MCE) |
- F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
- F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
- F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
- 0 /* Reserved, DS, ACPI */ | F(MMX) |
- F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
- 0 /* HTT, TM, Reserved, PBE */
+
+ kvm_cpu_cap_init(CPUID_7_0_EBX,
+ F(FSGSBASE),
+ EMULATED_F(TSC_ADJUST),
+ F(SGX),
+ F(BMI1),
+ F(HLE),
+ F(AVX2),
+ F(FDP_EXCPTN_ONLY),
+ F(SMEP),
+ F(BMI2),
+ F(ERMS),
+ F(INVPCID),
+ F(RTM),
+ F(ZERO_FCS_FDS),
+ VENDOR_F(MPX),
+ F(AVX512F),
+ F(AVX512DQ),
+ F(RDSEED),
+ F(ADX),
+ F(SMAP),
+ F(AVX512IFMA),
+ F(CLFLUSHOPT),
+ F(CLWB),
+ VENDOR_F(INTEL_PT),
+ F(AVX512PF),
+ F(AVX512ER),
+ F(AVX512CD),
+ F(SHA_NI),
+ F(AVX512BW),
+ F(AVX512VL),
);
- kvm_cpu_cap_mask(CPUID_7_0_EBX,
- F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) |
- F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) |
- F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) |
- F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) |
- F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) |
- F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) |
- F(AVX512VL));
-
- kvm_cpu_cap_mask(CPUID_7_ECX,
- F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
- F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
- F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
- F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
- F(SGX_LC) | F(BUS_LOCK_DETECT)
+ kvm_cpu_cap_init(CPUID_7_ECX,
+ F(AVX512VBMI),
+ PASSTHROUGH_F(LA57),
+ F(PKU),
+ RUNTIME_F(OSPKE),
+ F(RDPID),
+ F(AVX512_VPOPCNTDQ),
+ F(UMIP),
+ F(AVX512_VBMI2),
+ F(GFNI),
+ F(VAES),
+ F(VPCLMULQDQ),
+ F(AVX512_VNNI),
+ F(AVX512_BITALG),
+ F(CLDEMOTE),
+ F(MOVDIRI),
+ F(MOVDIR64B),
+ VENDOR_F(WAITPKG),
+ F(SGX_LC),
+ F(BUS_LOCK_DETECT),
);
- /* Set LA57 based on hardware capability. */
- if (cpuid_ecx(7) & F(LA57))
- kvm_cpu_cap_set(X86_FEATURE_LA57);
/*
* PKU not yet implemented for shadow paging and requires OSPKE
@@ -699,18 +954,25 @@ void kvm_set_cpu_caps(void)
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
kvm_cpu_cap_clear(X86_FEATURE_PKU);
- kvm_cpu_cap_mask(CPUID_7_EDX,
- F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
- F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
- F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
- F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
- F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D)
+ kvm_cpu_cap_init(CPUID_7_EDX,
+ F(AVX512_4VNNIW),
+ F(AVX512_4FMAPS),
+ F(SPEC_CTRL),
+ F(SPEC_CTRL_SSBD),
+ EMULATED_F(ARCH_CAPABILITIES),
+ F(INTEL_STIBP),
+ F(MD_CLEAR),
+ F(AVX512_VP2INTERSECT),
+ F(FSRM),
+ F(SERIALIZE),
+ F(TSXLDTRK),
+ F(AVX512_FP16),
+ F(AMX_TILE),
+ F(AMX_INT8),
+ F(AMX_BF16),
+ F(FLUSH_L1D),
);
- /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
- kvm_cpu_cap_set(X86_FEATURE_TSC_ADJUST);
- kvm_cpu_cap_set(X86_FEATURE_ARCH_CAPABILITIES);
-
if (boot_cpu_has(X86_FEATURE_AMD_IBPB_RET) &&
boot_cpu_has(X86_FEATURE_AMD_IBPB) &&
boot_cpu_has(X86_FEATURE_AMD_IBRS))
@@ -720,65 +982,133 @@ void kvm_set_cpu_caps(void)
if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
- kvm_cpu_cap_mask(CPUID_7_1_EAX,
- F(SHA512) | F(SM3) | F(SM4) | F(AVX_VNNI) | F(AVX512_BF16) |
- F(CMPCCXADD) | F(FZRM) | F(FSRS) | F(FSRC) | F(AMX_FP16) |
- F(AVX_IFMA) | F(LAM)
+ kvm_cpu_cap_init(CPUID_7_1_EAX,
+ F(SHA512),
+ F(SM3),
+ F(SM4),
+ F(AVX_VNNI),
+ F(AVX512_BF16),
+ F(CMPCCXADD),
+ F(FZRM),
+ F(FSRS),
+ F(FSRC),
+ F(AMX_FP16),
+ F(AVX_IFMA),
+ F(LAM),
);
- kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
- F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(AMX_COMPLEX) |
- F(AVX_VNNI_INT16) | F(PREFETCHITI) | F(AVX10)
+ kvm_cpu_cap_init(CPUID_7_1_EDX,
+ F(AVX_VNNI_INT8),
+ F(AVX_NE_CONVERT),
+ F(AMX_COMPLEX),
+ F(AVX_VNNI_INT16),
+ F(PREFETCHITI),
+ F(AVX10),
);
- kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
- F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) |
- F(BHI_CTRL) | F(MCDT_NO)
+ kvm_cpu_cap_init(CPUID_7_2_EDX,
+ F(INTEL_PSFD),
+ F(IPRED_CTRL),
+ F(RRSBA_CTRL),
+ F(DDPD_U),
+ F(BHI_CTRL),
+ F(MCDT_NO),
);
- kvm_cpu_cap_mask(CPUID_D_1_EAX,
- F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd
+ kvm_cpu_cap_init(CPUID_D_1_EAX,
+ F(XSAVEOPT),
+ F(XSAVEC),
+ F(XGETBV1),
+ F(XSAVES),
+ X86_64_F(XFD),
);
- kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX,
- SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
+ kvm_cpu_cap_init(CPUID_12_EAX,
+ SCATTERED_F(SGX1),
+ SCATTERED_F(SGX2),
+ SCATTERED_F(SGX_EDECCSSA),
);
- kvm_cpu_cap_init_kvm_defined(CPUID_24_0_EBX,
- F(AVX10_128) | F(AVX10_256) | F(AVX10_512)
+ kvm_cpu_cap_init(CPUID_24_0_EBX,
+ F(AVX10_128),
+ F(AVX10_256),
+ F(AVX10_512),
);
- kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
- F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
- F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
- F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
- 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
- F(TOPOEXT) | 0 /* PERFCTR_CORE */
+ kvm_cpu_cap_init(CPUID_8000_0001_ECX,
+ F(LAHF_LM),
+ F(CMP_LEGACY),
+ VENDOR_F(SVM),
+ /* ExtApicSpace */
+ F(CR8_LEGACY),
+ F(ABM),
+ F(SSE4A),
+ F(MISALIGNSSE),
+ F(3DNOWPREFETCH),
+ F(OSVW),
+ /* IBS */
+ F(XOP),
+ /* SKINIT, WDT, LWP */
+ F(FMA4),
+ F(TBM),
+ F(TOPOEXT),
+ VENDOR_F(PERFCTR_CORE),
);
- kvm_cpu_cap_mask(CPUID_8000_0001_EDX,
- F(FPU) | F(VME) | F(DE) | F(PSE) |
- F(TSC) | F(MSR) | F(PAE) | F(MCE) |
- F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
- F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
- F(PAT) | F(PSE36) | 0 /* Reserved */ |
- F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
- F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
- 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
+ kvm_cpu_cap_init(CPUID_8000_0001_EDX,
+ ALIASED_1_EDX_F(FPU),
+ ALIASED_1_EDX_F(VME),
+ ALIASED_1_EDX_F(DE),
+ ALIASED_1_EDX_F(PSE),
+ ALIASED_1_EDX_F(TSC),
+ ALIASED_1_EDX_F(MSR),
+ ALIASED_1_EDX_F(PAE),
+ ALIASED_1_EDX_F(MCE),
+ ALIASED_1_EDX_F(CX8),
+ ALIASED_1_EDX_F(APIC),
+ /* Reserved */
+ F(SYSCALL),
+ ALIASED_1_EDX_F(MTRR),
+ ALIASED_1_EDX_F(PGE),
+ ALIASED_1_EDX_F(MCA),
+ ALIASED_1_EDX_F(CMOV),
+ ALIASED_1_EDX_F(PAT),
+ ALIASED_1_EDX_F(PSE36),
+ /* Reserved */
+ F(NX),
+ /* Reserved */
+ F(MMXEXT),
+ ALIASED_1_EDX_F(MMX),
+ ALIASED_1_EDX_F(FXSR),
+ F(FXSR_OPT),
+ X86_64_F(GBPAGES),
+ F(RDTSCP),
+ /* Reserved */
+ X86_64_F(LM),
+ F(3DNOWEXT),
+ F(3DNOW),
);
if (!tdp_enabled && IS_ENABLED(CONFIG_X86_64))
kvm_cpu_cap_set(X86_FEATURE_GBPAGES);
- kvm_cpu_cap_init_kvm_defined(CPUID_8000_0007_EDX,
- SF(CONSTANT_TSC)
+ kvm_cpu_cap_init(CPUID_8000_0007_EDX,
+ SCATTERED_F(CONSTANT_TSC),
);
- kvm_cpu_cap_mask(CPUID_8000_0008_EBX,
- F(CLZERO) | F(XSAVEERPTR) |
- F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
- F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |
- F(AMD_PSFD) | F(AMD_IBPB_RET)
+ kvm_cpu_cap_init(CPUID_8000_0008_EBX,
+ F(CLZERO),
+ F(XSAVEERPTR),
+ F(WBNOINVD),
+ F(AMD_IBPB),
+ F(AMD_IBRS),
+ F(AMD_SSBD),
+ F(VIRT_SSBD),
+ F(AMD_SSB_NO),
+ F(AMD_STIBP),
+ F(AMD_STIBP_ALWAYS_ON),
+ F(AMD_PSFD),
+ F(AMD_IBPB_RET),
);
/*
@@ -808,50 +1138,73 @@ void kvm_set_cpu_caps(void)
!boot_cpu_has(X86_FEATURE_AMD_SSBD))
kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
- /*
- * Hide all SVM features by default, SVM will set the cap bits for
- * features it emulates and/or exposes for L1.
- */
- kvm_cpu_cap_mask(CPUID_8000_000A_EDX, 0);
-
- kvm_cpu_cap_mask(CPUID_8000_001F_EAX,
- 0 /* SME */ | 0 /* SEV */ | 0 /* VM_PAGE_FLUSH */ | 0 /* SEV_ES */ |
- F(SME_COHERENT));
+ /* All SVM features required additional vendor module enabling. */
+ kvm_cpu_cap_init(CPUID_8000_000A_EDX,
+ VENDOR_F(NPT),
+ VENDOR_F(VMCBCLEAN),
+ VENDOR_F(FLUSHBYASID),
+ VENDOR_F(NRIPS),
+ VENDOR_F(TSCRATEMSR),
+ VENDOR_F(V_VMSAVE_VMLOAD),
+ VENDOR_F(LBRV),
+ VENDOR_F(PAUSEFILTER),
+ VENDOR_F(PFTHRESHOLD),
+ VENDOR_F(VGIF),
+ VENDOR_F(VNMI),
+ VENDOR_F(SVME_ADDR_CHK),
+ );
- kvm_cpu_cap_mask(CPUID_8000_0021_EAX,
- F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ |
- F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ |
- F(WRMSR_XX_BASE_NS)
+ kvm_cpu_cap_init(CPUID_8000_001F_EAX,
+ VENDOR_F(SME),
+ VENDOR_F(SEV),
+ /* VM_PAGE_FLUSH */
+ VENDOR_F(SEV_ES),
+ F(SME_COHERENT),
);
- kvm_cpu_cap_check_and_set(X86_FEATURE_SBPB);
- kvm_cpu_cap_check_and_set(X86_FEATURE_IBPB_BRTYPE);
- kvm_cpu_cap_check_and_set(X86_FEATURE_SRSO_NO);
+ kvm_cpu_cap_init(CPUID_8000_0021_EAX,
+ F(NO_NESTED_DATA_BP),
+ /*
+ * Synthesize "LFENCE is serializing" into the AMD-defined entry
+ * in KVM's supported CPUID, i.e. if the feature is reported as
+ * supported by the kernel. LFENCE_RDTSC was a Linux-defined
+ * synthetic feature long before AMD joined the bandwagon, e.g.
+ * LFENCE is serializing on most CPUs that support SSE2. On
+ * CPUs that don't support AMD's leaf, ANDing with the raw host
+ * CPUID will drop the flags, and reporting support in AMD's
+ * leaf can make it easier for userspace to detect the feature.
+ */
+ SYNTHESIZED_F(LFENCE_RDTSC),
+ /* SmmPgCfgLock */
+ F(NULL_SEL_CLR_BASE),
+ F(AUTOIBRS),
+ EMULATED_F(NO_SMM_CTL_MSR),
+ /* PrefetchCtlMsr */
+ F(WRMSR_XX_BASE_NS),
+ SYNTHESIZED_F(SBPB),
+ SYNTHESIZED_F(IBPB_BRTYPE),
+ SYNTHESIZED_F(SRSO_NO),
+ F(SRSO_USER_KERNEL_NO),
+ );
- kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX,
- F(PERFMON_V2)
+ kvm_cpu_cap_init(CPUID_8000_0022_EAX,
+ F(PERFMON_V2),
);
- /*
- * Synthesize "LFENCE is serializing" into the AMD-defined entry in
- * KVM's supported CPUID if the feature is reported as supported by the
- * kernel. LFENCE_RDTSC was a Linux-defined synthetic feature long
- * before AMD joined the bandwagon, e.g. LFENCE is serializing on most
- * CPUs that support SSE2. On CPUs that don't support AMD's leaf,
- * kvm_cpu_cap_mask() will unfortunately drop the flag due to ANDing
- * the mask with the raw host CPUID, and reporting support in AMD's
- * leaf can make it easier for userspace to detect the feature.
- */
- if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
- kvm_cpu_cap_set(X86_FEATURE_LFENCE_RDTSC);
if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
kvm_cpu_cap_set(X86_FEATURE_NULL_SEL_CLR_BASE);
- kvm_cpu_cap_set(X86_FEATURE_NO_SMM_CTL_MSR);
- kvm_cpu_cap_mask(CPUID_C000_0001_EDX,
- F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
- F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
- F(PMM) | F(PMM_EN)
+ kvm_cpu_cap_init(CPUID_C000_0001_EDX,
+ F(XSTORE),
+ F(XSTORE_EN),
+ F(XCRYPT),
+ F(XCRYPT_EN),
+ F(ACE2),
+ F(ACE2_EN),
+ F(PHE),
+ F(PHE_EN),
+ F(PMM),
+ F(PMM_EN),
);
/*
@@ -871,6 +1224,16 @@ void kvm_set_cpu_caps(void)
}
EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
+#undef F
+#undef SCATTERED_F
+#undef X86_64_F
+#undef EMULATED_F
+#undef SYNTHESIZED_F
+#undef PASSTHROUGH_F
+#undef ALIASED_1_EDX_F
+#undef VENDOR_F
+#undef RUNTIME_F
+
struct kvm_cpuid_array {
struct kvm_cpuid_entry2 *entries;
int maxnent;
@@ -928,14 +1291,11 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
return entry;
}
-static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
+static int cpuid_func_emulated(struct kvm_cpuid_entry2 *entry, u32 func,
+ bool include_partially_emulated)
{
- struct kvm_cpuid_entry2 *entry;
-
- if (array->nent >= array->maxnent)
- return -E2BIG;
+ memset(entry, 0, sizeof(*entry));
- entry = &array->entries[array->nent];
entry->function = func;
entry->index = 0;
entry->flags = 0;
@@ -943,23 +1303,37 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
switch (func) {
case 0:
entry->eax = 7;
- ++array->nent;
- break;
+ return 1;
case 1:
- entry->ecx = F(MOVBE);
- ++array->nent;
- break;
+ entry->ecx = feature_bit(MOVBE);
+ /*
+ * KVM allows userspace to enumerate MONITOR+MWAIT support to
+ * the guest, but the MWAIT feature flag is never advertised
+ * to userspace because MONITOR+MWAIT aren't virtualized by
+ * hardware, can't be faithfully emulated in software (KVM
+ * emulates them as NOPs), and allowing the guest to execute
+ * them natively requires enabling a per-VM capability.
+ */
+ if (include_partially_emulated)
+ entry->ecx |= feature_bit(MWAIT);
+ return 1;
case 7:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
entry->eax = 0;
if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
- entry->ecx = F(RDPID);
- ++array->nent;
- break;
+ entry->ecx = feature_bit(RDPID);
+ return 1;
default:
- break;
+ return 0;
}
+}
+
+static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
+{
+ if (array->nent >= array->maxnent)
+ return -E2BIG;
+ array->nent += cpuid_func_emulated(&array->entries[array->nent], func, false);
return 0;
}
@@ -1103,7 +1477,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
goto out;
cpuid_entry_override(entry, CPUID_D_1_EAX);
- if (entry->eax & (F(XSAVES)|F(XSAVEC)))
+ if (entry->eax & (feature_bit(XSAVES) | feature_bit(XSAVEC)))
entry->ebx = xstate_required_size(permitted_xcr0 | permitted_xss,
true);
else {
@@ -1334,7 +1708,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
phys_as = entry->eax & 0xff;
g_phys_as = phys_as;
if (kvm_mmu_get_max_tdp_level() < 5)
- g_phys_as = min(g_phys_as, 48);
+ g_phys_as = min(g_phys_as, 48U);
}
entry->eax = phys_as | (virt_as << 8) | (g_phys_as << 16);
@@ -1393,19 +1767,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ecx = entry->edx = 0;
if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) {
- entry->eax = entry->ebx;
+ entry->eax = entry->ebx = 0;
break;
}
cpuid_entry_override(entry, CPUID_8000_0022_EAX);
- if (kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2))
- ebx.split.num_core_pmc = kvm_pmu_cap.num_counters_gp;
- else if (kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE))
- ebx.split.num_core_pmc = AMD64_NUM_COUNTERS_CORE;
- else
- ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
-
+ ebx.split.num_core_pmc = kvm_pmu_cap.num_counters_gp;
entry->ebx = ebx.full;
break;
}
@@ -1540,22 +1908,6 @@ out_free:
return r;
}
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
- u32 function, u32 index)
-{
- return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
- function, index);
-}
-EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry_index);
-
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
- u32 function)
-{
- return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
- function, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
-}
-EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
-
/*
* Intel CPUID semantics treats any query for an out-of-range leaf as if the
* highest basic leaf (i.e. CPUID.0H:EAX) were requested. AMD CPUID semantics
@@ -1631,6 +1983,9 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
struct kvm_cpuid_entry2 *entry;
bool exact, used_max_basic = false;
+ if (vcpu->arch.cpuid_dynamic_bits_dirty)
+ kvm_update_cpuid_runtime(vcpu);
+
entry = kvm_find_cpuid_entry_index(vcpu, function, index);
exact = !!entry;
@@ -1646,12 +2001,29 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
*edx = entry->edx;
if (function == 7 && index == 0) {
u64 data;
- if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
+ if ((*ebx & (feature_bit(RTM) | feature_bit(HLE))) &&
+ !__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
(data & TSX_CTRL_CPUID_CLEAR))
- *ebx &= ~(F(RTM) | F(HLE));
+ *ebx &= ~(feature_bit(RTM) | feature_bit(HLE));
} else if (function == 0x80000007) {
if (kvm_hv_invtsc_suppressed(vcpu))
- *edx &= ~SF(CONSTANT_TSC);
+ *edx &= ~feature_bit(CONSTANT_TSC);
+ } else if (IS_ENABLED(CONFIG_KVM_XEN) &&
+ kvm_xen_is_tsc_leaf(vcpu, function)) {
+ /*
+ * Update guest TSC frequency information if necessary.
+ * Ignore failures, there is no sane value that can be
+ * provided if KVM can't get the TSC frequency.
+ */
+ if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu))
+ kvm_guest_time_update(vcpu);
+
+ if (index == 1) {
+ *ecx = vcpu->arch.pvclock_tsc_mul;
+ *edx = vcpu->arch.pvclock_tsc_shift;
+ } else if (index == 2) {
+ *eax = vcpu->arch.hw_tsc_khz;
+ }
}
} else {
*eax = *ebx = *ecx = *edx = 0;