diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-26 14:20:14 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-26 14:20:14 -0700 |
commit | bf9095424d027e942e1d1ee74977e17b7df8e455 (patch) | |
tree | 57659cf68b7df09005bc5ada4d315d66472cebf3 /arch/x86/kvm/vmx/vmx.c | |
parent | 98931dd95fd489fcbfa97da563505a6f071d7c77 (diff) | |
parent | ffd1925a596ce68bed7d81c61cb64bc35f788a9d (diff) | |
download | lwn-bf9095424d027e942e1d1ee74977e17b7df8e455.tar.gz lwn-bf9095424d027e942e1d1ee74977e17b7df8e455.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"S390:
- ultravisor communication device driver
- fix TEID on terminating storage key ops
RISC-V:
- Added Sv57x4 support for G-stage page table
- Added range based local HFENCE functions
- Added remote HFENCE functions based on VCPU requests
- Added ISA extension registers in ONE_REG interface
- Updated KVM RISC-V maintainers entry to cover selftests support
ARM:
- Add support for the ARMv8.6 WFxT extension
- Guard pages for the EL2 stacks
- Trap and emulate AArch32 ID registers to hide unsupported features
- Ability to select and save/restore the set of hypercalls exposed to
the guest
- Support for PSCI-initiated suspend in collaboration with userspace
- GICv3 register-based LPI invalidation support
- Move host PMU event merging into the vcpu data structure
- GICv3 ITS save/restore fixes
- The usual set of small-scale cleanups and fixes
x86:
- New ioctls to get/set TSC frequency for a whole VM
- Allow userspace to opt out of hypercall patching
- Only do MSR filtering for MSRs accessed by rdmsr/wrmsr
AMD SEV improvements:
- Add KVM_EXIT_SHUTDOWN metadata for SEV-ES
- V_TSC_AUX support
Nested virtualization improvements for AMD:
- Support for "nested nested" optimizations (nested vVMLOAD/VMSAVE,
nested vGIF)
- Allow AVIC to co-exist with a nested guest running
- Fixes for LBR virtualizations when a nested guest is running, and
nested LBR virtualization support
- PAUSE filtering for nested hypervisors
Guest support:
- Decoupling of vcpu_is_preempted from PV spinlocks"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (199 commits)
KVM: x86: Fix the intel_pt PMI handling wrongly considered from guest
KVM: selftests: x86: Sync the new name of the test case to .gitignore
Documentation: kvm: reorder ARM-specific section about KVM_SYSTEM_EVENT_SUSPEND
x86, kvm: use correct GFP flags for preemption disabled
KVM: LAPIC: Drop pending LAPIC timer injection when canceling the timer
x86/kvm: Alloc dummy async #PF token outside of raw spinlock
KVM: x86: avoid calling x86 emulator without a decoded instruction
KVM: SVM: Use kzalloc for sev ioctl interfaces to prevent kernel data leak
x86/fpu: KVM: Set the base guest FPU uABI size to sizeof(struct kvm_xsave)
s390/uv_uapi: depend on CONFIG_S390
KVM: selftests: x86: Fix test failure on arch lbr capable platforms
KVM: LAPIC: Trace LAPIC timer expiration on every vmentry
KVM: s390: selftest: Test suppression indication on key prot exception
KVM: s390: Don't indicate suppression on dirtying, failing memop
selftests: drivers/s390x: Add uvdevice tests
drivers/s390/char: Add Ultravisor io device
MAINTAINERS: Update KVM RISC-V entry to cover selftests support
RISC-V: KVM: Introduce ISA extension register
RISC-V: KVM: Cleanup stale TLB entries when host CPU changes
RISC-V: KVM: Add remote HFENCE functions based on VCPU requests
...
Diffstat (limited to 'arch/x86/kvm/vmx/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 45 |
1 files changed, 37 insertions, 8 deletions
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 610355b9ccce..f5aeade623d6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2444,7 +2444,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, &_cpu_based_exec_control) < 0) return -EIO; #ifdef CONFIG_X86_64 - if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) + if (_cpu_based_exec_control & CPU_BASED_TPR_SHADOW) _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING & ~CPU_BASED_CR8_STORE_EXITING; #endif @@ -2948,7 +2948,7 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) if (enable_ept) ept_sync_context(construct_eptp(vcpu, root_hpa, - mmu->shadow_root_level)); + mmu->root_role.level)); else vpid_sync_context(vmx_get_current_vpid(vcpu)); } @@ -4385,7 +4385,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) if (cpu_has_secondary_exec_ctrls()) secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx)); - if (kvm_vcpu_apicv_active(&vmx->vcpu)) { + if (enable_apicv && lapic_in_kernel(&vmx->vcpu)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); vmcs_write64(EOI_EXIT_BITMAP1, 0); vmcs_write64(EOI_EXIT_BITMAP2, 0); @@ -5410,9 +5410,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR) ? PFERR_FETCH_MASK : 0; /* ept page table entry is present? */ - error_code |= (exit_qualification & - (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE | - EPT_VIOLATION_EXECUTABLE)) + error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK) ? PFERR_PRESENT_MASK : 0; error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ? @@ -7823,7 +7821,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .cpu_dirty_log_size = PML_ENTITY_NUM, .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging, - .pmu_ops = &intel_pmu_ops, .nested_ops = &vmx_nested_ops, .pi_update_irte = vmx_pi_update_irte, @@ -7856,7 +7853,7 @@ static unsigned int vmx_handle_intel_pt_intr(void) struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); /* '0' on failure so that the !PT case can use a RET0 static call. */ - if (!kvm_arch_pmi_in_guest(vcpu)) + if (!vcpu || !kvm_handling_nmi_from_guest(vcpu)) return 0; kvm_make_request(KVM_REQ_PMI, vcpu); @@ -7891,6 +7888,31 @@ static __init void vmx_setup_user_return_msrs(void) kvm_add_user_return_msr(vmx_uret_msrs_list[i]); } +static void __init vmx_setup_me_spte_mask(void) +{ + u64 me_mask = 0; + + /* + * kvm_get_shadow_phys_bits() returns shadow_phys_bits. Use + * the former to avoid exposing shadow_phys_bits. + * + * On pre-MKTME system, boot_cpu_data.x86_phys_bits equals to + * shadow_phys_bits. On MKTME and/or TDX capable systems, + * boot_cpu_data.x86_phys_bits holds the actual physical address + * w/o the KeyID bits, and shadow_phys_bits equals to MAXPHYADDR + * reported by CPUID. Those bits between are KeyID bits. + */ + if (boot_cpu_data.x86_phys_bits != kvm_get_shadow_phys_bits()) + me_mask = rsvd_bits(boot_cpu_data.x86_phys_bits, + kvm_get_shadow_phys_bits() - 1); + /* + * Unlike SME, host kernel doesn't support setting up any + * MKTME KeyID on Intel platforms. No memory encryption + * bits should be included into the SPTE. + */ + kvm_mmu_set_me_spte_mask(0, me_mask); +} + static struct kvm_x86_init_ops vmx_init_ops __initdata; static __init int hardware_setup(void) @@ -7993,6 +8015,12 @@ static __init int hardware_setup(void) kvm_mmu_set_ept_masks(enable_ept_ad_bits, cpu_has_vmx_ept_execute_only()); + /* + * Setup shadow_me_value/shadow_me_mask to include MKTME KeyID + * bits to shadow_zero_check. + */ + vmx_setup_me_spte_mask(); + kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(), ept_caps_to_lpage_level(vmx_capability.ept)); @@ -8077,6 +8105,7 @@ static struct kvm_x86_init_ops vmx_init_ops __initdata = { .handle_intel_pt_intr = NULL, .runtime_ops = &vmx_x86_ops, + .pmu_ops = &intel_pmu_ops, }; static void vmx_cleanup_l1d_flush(void) |