diff options
Diffstat (limited to 'arch/x86/include/asm/kvm_host.h')
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 346 |
1 files changed, 239 insertions, 107 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b15cde0a9b5c..f14009f25a3b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -27,16 +27,21 @@ #include <linux/kfifo.h> #include <linux/sched/vhost_task.h> #include <linux/call_once.h> +#include <linux/atomic.h> #include <asm/apic.h> #include <asm/pvclock-abi.h> +#include <asm/debugreg.h> #include <asm/desc.h> #include <asm/mtrr.h> #include <asm/msr-index.h> +#include <asm/msr.h> #include <asm/asm.h> +#include <asm/irq_remapping.h> #include <asm/kvm_page_track.h> #include <asm/kvm_vcpu_regs.h> -#include <asm/reboot.h> +#include <asm/virt.h> + #include <hyperv/hvhdk.h> #define __KVM_HAVE_ARCH_VCPU_DEBUGFS @@ -116,14 +121,15 @@ #define KVM_REQ_TLB_FLUSH_GUEST \ KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) -#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) +#define KVM_REQ_RECALC_INTERCEPTS KVM_ARCH_REQ(29) #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \ KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_HV_TLB_FLUSH \ KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE KVM_ARCH_REQ(34) +#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE \ + KVM_ARCH_REQ_FLAGS(34, KVM_REQUEST_WAIT) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -137,7 +143,7 @@ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \ - | X86_CR4_LAM_SUP)) + | X86_CR4_LAM_SUP | X86_CR4_CET)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -190,7 +196,15 @@ enum kvm_reg { VCPU_EXREG_PDPTR = NR_VCPU_REGS, VCPU_EXREG_CR0, + /* + * Alias AMD's ERAPS (not a real register) to CR3 so that common code + * can trigger emulation of the RAP (Return Address Predictor) with + * minimal support required in common code. Piggyback CR3 as the RAP + * is cleared on writes to CR3, i.e. marking CR3 dirty will naturally + * mark ERAPS dirty as well. + */ VCPU_EXREG_CR3, + VCPU_EXREG_ERAPS = VCPU_EXREG_CR3, VCPU_EXREG_CR4, VCPU_EXREG_RFLAGS, VCPU_EXREG_SEGMENTS, @@ -245,7 +259,6 @@ enum x86_intercept_stage; #define DR7_BP_EN_MASK 0x000000ff #define DR7_GE (1 << 9) #define DR7_GD (1 << 13) -#define DR7_FIXED_1 0x00000400 #define DR7_VOLATILE 0xffff2bff #define KVM_GUESTDBG_VALID_MASK \ @@ -263,6 +276,7 @@ enum x86_intercept_stage; #define PFERR_RSVD_MASK BIT(3) #define PFERR_FETCH_MASK BIT(4) #define PFERR_PK_MASK BIT(5) +#define PFERR_SS_MASK BIT(6) #define PFERR_SGX_MASK BIT(15) #define PFERR_GUEST_RMP_MASK BIT_ULL(31) #define PFERR_GUEST_FINAL_MASK BIT_ULL(32) @@ -293,6 +307,7 @@ enum x86_intercept_stage; */ #define KVM_APIC_PV_EOI_PENDING 1 +struct kvm_kernel_irqfd; struct kvm_kernel_irq_routing_entry; /* @@ -405,11 +420,10 @@ union kvm_cpu_role { }; struct kvm_rmap_head { - unsigned long val; + atomic_long_t val; }; struct kvm_pio_request { - unsigned long linear_rip; unsigned long count; int in; int port; @@ -524,6 +538,7 @@ struct kvm_pmc { */ u64 emulated_counter; u64 eventsel; + u64 eventsel_hw; struct perf_event *perf_event; struct kvm_vcpu *vcpu; /* @@ -541,10 +556,10 @@ struct kvm_pmc { #define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \ KVM_MAX_NR_AMD_GP_COUNTERS) -#define KVM_MAX_NR_INTEL_FIXED_COUTNERS 3 -#define KVM_MAX_NR_AMD_FIXED_COUTNERS 0 -#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUTNERS, \ - KVM_MAX_NR_AMD_FIXED_COUTNERS) +#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3 +#define KVM_MAX_NR_AMD_FIXED_COUNTERS 0 +#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \ + KVM_MAX_NR_AMD_FIXED_COUNTERS) struct kvm_pmu { u8 version; @@ -552,6 +567,7 @@ struct kvm_pmu { unsigned nr_arch_fixed_counters; unsigned available_event_types; u64 fixed_ctr_ctrl; + u64 fixed_ctr_ctrl_hw; u64 fixed_ctr_ctrl_rsvd; u64 global_ctrl; u64 global_status; @@ -575,6 +591,9 @@ struct kvm_pmu { DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); + DECLARE_BITMAP(pmc_counting_instructions, X86_PMC_IDX_MAX); + DECLARE_BITMAP(pmc_counting_branches, X86_PMC_IDX_MAX); + u64 ds_area; u64 pebs_enable; u64 pebs_enable_rsvd; @@ -606,8 +625,15 @@ struct kvm_pmu { struct kvm_pmu_ops; enum { - KVM_DEBUGREG_BP_ENABLED = 1, - KVM_DEBUGREG_WONT_EXIT = 2, + KVM_DEBUGREG_BP_ENABLED = BIT(0), + KVM_DEBUGREG_WONT_EXIT = BIT(1), + /* + * Guest debug registers (DR0-3, DR6 and DR7) are saved/restored by + * hardware on exit from or enter to guest. KVM needn't switch them. + * DR0-3, DR6 and DR7 are set to their architectural INIT value on VM + * exit, host values need to be restored. + */ + KVM_DEBUGREG_AUTO_SWITCH = BIT(2), }; struct kvm_mtrr { @@ -690,8 +716,13 @@ struct kvm_vcpu_hv { struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; - /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ + /* + * Preallocated buffers for handling hypercalls that pass sparse vCPU + * sets (for high vCPU counts, they're too large to comfortably fit on + * the stack). + */ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct hv_vp_assist_page vp_assist_page; @@ -754,6 +785,10 @@ enum kvm_only_cpuid_leafs { CPUID_8000_0022_EAX, CPUID_7_2_EDX, CPUID_24_0_EBX, + CPUID_8000_0021_ECX, + CPUID_7_1_ECX, + CPUID_1E_1_EAX, + CPUID_24_1_ECX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -780,6 +815,7 @@ struct kvm_vcpu_arch { u32 pkru; u32 hflags; u64 efer; + u64 host_debugctl; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ bool load_eoi_exitmap_pending; @@ -793,7 +829,6 @@ struct kvm_vcpu_arch { bool at_instruction_boundary; bool tpr_access_reporting; bool xfd_no_write_intercept; - u64 ia32_xss; u64 microcode_version; u64 arch_capabilities; u64 perf_capabilities; @@ -854,6 +889,8 @@ struct kvm_vcpu_arch { u64 xcr0; u64 guest_supported_xcr0; + u64 ia32_xss; + u64 guest_supported_xss; struct kvm_pio_request pio; void *pio_data; @@ -879,6 +916,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; + bool cpuid_dynamic_bits_dirty; bool is_amd_compatible; /* @@ -906,9 +944,12 @@ struct kvm_vcpu_arch { bool emulate_regs_need_sync_to_vcpu; bool emulate_regs_need_sync_from_vcpu; int (*complete_userspace_io)(struct kvm_vcpu *vcpu); + unsigned long cui_linear_rip; + int cui_rdmsr_imm_reg; gpa_t time; - struct pvclock_vcpu_time_info hv_clock; + s8 pvclock_tsc_shift; + u32 pvclock_tsc_mul; unsigned int hw_tsc_khz; struct gfn_to_pfn_cache pv_time; /* set guest stopped flag in pvclock flags field */ @@ -996,8 +1037,8 @@ struct kvm_vcpu_arch { u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */ u16 vec; u32 id; - bool send_user_only; u32 host_apf_flags; + bool send_always; bool delivery_as_pf_vmexit; bool pageready_pending; } apf; @@ -1022,13 +1063,11 @@ struct kvm_vcpu_arch { int pending_ioapic_eoi; int pending_external_vector; + int highest_stale_pending_ioapic_eoi; /* be preempted when it's in kernel-mode(cpl=0) */ bool preempted_in_kernel; - /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ - bool l1tf_flush_l1d; - /* Host CPU on which VM-entry was most recently attempted */ int last_vmentry_cpu; @@ -1052,6 +1091,7 @@ struct kvm_vcpu_arch { /* Protected Guests */ bool guest_state_protected; + bool guest_tsc_protected; /* * Set when PDPTS were loaded directly by the userspace without @@ -1059,6 +1099,21 @@ struct kvm_vcpu_arch { */ bool pdptrs_from_userspace; + /* + * Set if an emulated nested VM-Enter to L2 is pending completion. KVM + * must not synthesize a VM-Exit to L1 before entering L2, as VM-Exits + * can only occur at instruction boundaries. The only exception is + * VMX's "notify" exits, which exist in large part to break the CPU out + * of infinite ucode loops, but can corrupt vCPU state in the process! + * + * For all intents and purposes, this is a boolean, but it's tracked as + * a u8 so that KVM can detect when userspace may have stuffed vCPU + * state and generated an architecturally-impossible VM-Exit. + */ +#define KVM_NESTED_RUN_PENDING 1 +#define KVM_NESTED_RUN_PENDING_UNTRUSTED 2 + u8 nested_run_pending; + #if IS_ENABLED(CONFIG_HYPERV) hpa_t hv_root_tdp; #endif @@ -1188,15 +1243,25 @@ struct kvm_xen { struct gfn_to_pfn_cache shinfo_cache; struct idr evtchn_ports; unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + + struct kvm_xen_hvm_config hvm_config; }; #endif enum kvm_irqchip_mode { KVM_IRQCHIP_NONE, +#ifdef CONFIG_KVM_IOAPIC KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ +#endif KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; +enum kvm_suppress_eoi_broadcast_mode { + KVM_SUPPRESS_EOI_BROADCAST_QUIRKED, /* Legacy behavior */ + KVM_SUPPRESS_EOI_BROADCAST_ENABLED, /* Enable Suppress EOI broadcast */ + KVM_SUPPRESS_EOI_BROADCAST_DISABLED /* Disable Suppress EOI broadcast */ +}; + struct kvm_x86_msr_filter { u8 count; bool default_allow:1; @@ -1212,7 +1277,7 @@ struct kvm_x86_pmu_event_filter { __u32 nr_excludes; __u64 *includes; __u64 *excludes; - __u64 events[]; + __u64 events[] __counted_by(nevents); }; enum kvm_apicv_inhibit { @@ -1296,6 +1361,12 @@ enum kvm_apicv_inhibit { */ APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED, + /* + * AVIC is disabled because the vCPU's APIC ID is beyond the max + * supported by AVIC/x2AVIC, i.e. the vCPU is unaddressable. + */ + APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG, + NR_APICV_INHIBIT_REASONS, }; @@ -1314,20 +1385,10 @@ enum kvm_apicv_inhibit { __APICV_INHIBIT_REASON(IRQWIN), \ __APICV_INHIBIT_REASON(PIT_REINJ), \ __APICV_INHIBIT_REASON(SEV), \ - __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED) + __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \ + __APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG) -struct kvm_arch { - unsigned long n_used_mmu_pages; - unsigned long n_requested_mmu_pages; - unsigned long n_max_mmu_pages; - unsigned int indirect_shadow_pages; - u8 mmu_valid_gen; - u8 vm_type; - bool has_private_mem; - bool has_protected_state; - bool pre_fault_allowed; - struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; - struct list_head active_mmu_pages; +struct kvm_possible_nx_huge_pages { /* * A list of kvm_mmu_page structs that, if zapped, could possibly be * replaced by an NX huge page. A shadow page is on this list if its @@ -1339,7 +1400,32 @@ struct kvm_arch { * guest attempts to execute from the region then KVM obviously can't * create an NX huge page (without hanging the guest). */ - struct list_head possible_nx_huge_pages; + struct list_head pages; + u64 nr_pages; +}; + +enum kvm_mmu_type { + KVM_SHADOW_MMU, +#ifdef CONFIG_X86_64 + KVM_TDP_MMU, +#endif + KVM_NR_MMU_TYPES, +}; + +struct kvm_arch { + unsigned long n_used_mmu_pages; + unsigned long n_requested_mmu_pages; + unsigned long n_max_mmu_pages; + unsigned int indirect_shadow_pages; + u8 mmu_valid_gen; + u8 vm_type; + bool has_private_mem; + bool has_protected_state; + bool has_protected_eoi; + bool pre_fault_allowed; + struct hlist_head *mmu_page_hash; + struct list_head active_mmu_pages; + struct kvm_possible_nx_huge_pages possible_nx_huge_pages[KVM_NR_MMU_TYPES]; #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING struct kvm_page_track_notifier_head track_notifier_head; #endif @@ -1353,16 +1439,17 @@ struct kvm_arch { u64 shadow_mmio_value; - struct iommu_domain *iommu_domain; - bool iommu_noncoherent; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA atomic_t noncoherent_dma_count; -#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE - atomic_t assigned_device_count; + unsigned long nr_possible_bypass_irqs; + +#ifdef CONFIG_KVM_IOAPIC struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; +#endif atomic_t vapics_in_nmi_mode; + struct mutex apic_map_lock; struct kvm_apic_map __rcu *apic_map; atomic_t apic_map_dirty; @@ -1370,18 +1457,28 @@ struct kvm_arch { bool apic_access_memslot_enabled; bool apic_access_memslot_inhibited; - /* Protects apicv_inhibit_reasons */ + /* + * Force apicv_update_lock and apicv_nr_irq_window_req to reside in a + * dedicated cacheline. They are write-mostly, whereas most everything + * else in kvm_arch is read-mostly. Note that apicv_inhibit_reasons is + * read-mostly: toggling VM-wide inhibits is rare; _checking_ for + * inhibits is common. + */ + ____cacheline_aligned + /* + * Protects apicv_inhibit_reasons and apicv_nr_irq_window_req (with an + * asterisk, see kvm_inc_or_dec_irq_window_inhibit() for details). + */ struct rw_semaphore apicv_update_lock; + atomic_t apicv_nr_irq_window_req; + ____cacheline_aligned + unsigned long apicv_inhibit_reasons; gpa_t wall_clock; - bool mwait_in_guest; - bool hlt_in_guest; - bool pause_in_guest; - bool cstate_in_guest; + u64 disabled_exits; - unsigned long irq_sources_bitmap; s64 kvmclock_offset; /* @@ -1407,13 +1504,7 @@ struct kvm_arch { bool use_master_clock; u64 master_kernel_ns; u64 master_cycle_now; - struct delayed_work kvmclock_update_work; - struct delayed_work kvmclock_sync_work; - - struct kvm_xen_hvm_config xen_hvm_config; - - /* reads protected by irq_srcu, writes by irq_lock */ - struct hlist_head mask_notifier_list; + struct ratelimit_state kvmclock_update_rs; #ifdef CONFIG_KVM_HYPERV struct kvm_hv hyperv; @@ -1436,7 +1527,9 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + enum kvm_suppress_eoi_broadcast_mode suppress_eoi_broadcast_mode; + bool has_mapped_host_mmio; bool guest_can_read_msr_platform_info; bool exception_payload_enabled; @@ -1444,6 +1537,7 @@ struct kvm_arch { bool bus_lock_detection_enabled; bool enable_pmu; + bool created_mediated_pmu; u32 notify_window; u32 notify_vmexit_flags; @@ -1469,8 +1563,13 @@ struct kvm_arch { struct once nx_once; #ifdef CONFIG_X86_64 - /* The number of TDP MMU pages across all roots. */ +#ifdef CONFIG_KVM_PROVE_MMU + /* + * The number of TDP MMU pages across all roots. Used only to sanity + * check that KVM isn't leaking TDP MMU pages. + */ atomic64_t tdp_mmu_pages; +#endif /* * List of struct kvm_mmu_pages being used as roots. @@ -1478,6 +1577,7 @@ struct kvm_arch { * tdp_mmu_page set. * * For reads, this list is protected by: + * RCU alone or * the MMU lock in read mode + RCU or * the MMU lock in write mode * @@ -1496,7 +1596,7 @@ struct kvm_arch { * is held in read mode: * - tdp_mmu_roots (above) * - the link field of kvm_mmu_page structs used by the TDP MMU - * - possible_nx_huge_pages; + * - possible_nx_huge_pages[KVM_TDP_MMU]; * - the possible_nx_huge_page_link field of kvm_mmu_page structs used * by the TDP MMU * Because the lock is only taken within the MMU lock, strictly @@ -1560,6 +1660,13 @@ struct kvm_arch { struct kvm_mmu_memory_cache split_desc_cache; gfn_t gfn_direct_bits; + + /* + * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A Zero + * value indicates CPU dirty logging is unsupported or disabled in + * current VM. + */ + int cpu_dirty_log_size; }; struct kvm_vm_stat { @@ -1647,6 +1754,12 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; } +enum kvm_x86_run_flags { + KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0), + KVM_RUN_LOAD_GUEST_DR6 = BIT(1), + KVM_RUN_LOAD_DEBUGCTL = BIT(2), +}; + struct kvm_x86_ops { const char *name; @@ -1663,6 +1776,7 @@ struct kvm_x86_ops { unsigned int vm_size; int (*vm_init)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm); + void (*vm_pre_destroy)(struct kvm *kvm); /* Create, but do not attach this VCPU */ int (*vcpu_precreate)(struct kvm *kvm); @@ -1674,6 +1788,12 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); + /* + * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to + * match the host's value even while the guest is active. + */ + const u64 HOST_OWNED_DEBUGCTL; + void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); @@ -1726,7 +1846,7 @@ struct kvm_x86_ops { int (*vcpu_pre_run)(struct kvm_vcpu *vcpu); enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu, - bool force_immediate_exit); + u64 run_flags); int (*handle_exit)(struct kvm_vcpu *vcpu, enum exit_fastpath_completion exit_fastpath); int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); @@ -1777,15 +1897,15 @@ struct kvm_x86_ops { void *external_spt); /* Update the external page table from spte getting set. */ int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, - kvm_pfn_t pfn_for_gfn); + u64 mirror_spte); /* Update external page tables for page table about to be freed. */ int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, void *external_spt); /* Update external page table from spte getting removed, and flush TLB. */ - int (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, - kvm_pfn_t pfn_for_gfn); + void (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + u64 mirror_spte); bool (*has_wbinvd_exit)(void); @@ -1811,11 +1931,6 @@ struct kvm_x86_ops { struct x86_exception *exception); void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); - /* - * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero - * value indicates CPU dirty logging is unsupported or disabled. - */ - int cpu_dirty_log_size; void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu); const struct kvm_x86_nested_ops *nested_ops; @@ -1823,12 +1938,14 @@ struct kvm_x86_ops { void (*vcpu_blocking)(struct kvm_vcpu *vcpu); void (*vcpu_unblocking)(struct kvm_vcpu *vcpu); - int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set); - void (*pi_start_assignment)(struct kvm *kvm); + int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, + unsigned int host_irq, uint32_t guest_irq, + struct kvm_vcpu *vcpu, u32 vector); + void (*pi_start_bypass)(struct kvm *kvm); void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); + bool (*protected_apic_has_interrupt)(struct kvm_vcpu *vcpu); int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, bool *expired); @@ -1845,6 +1962,8 @@ struct kvm_x86_ops { int (*dev_get_attr)(u32 group, u64 attr, u64 *val); int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); + int (*vcpu_mem_enc_ioctl)(struct kvm_vcpu *vcpu, void __user *argp); + int (*vcpu_mem_enc_unlocked_ioctl)(struct kvm_vcpu *vcpu, void __user *argp); int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); @@ -1860,7 +1979,7 @@ struct kvm_x86_ops { int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); void (*migrate_timers)(struct kvm_vcpu *vcpu); - void (*msr_filter_changed)(struct kvm_vcpu *vcpu); + void (*recalc_intercepts)(struct kvm_vcpu *vcpu); int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); @@ -1874,7 +1993,7 @@ struct kvm_x86_ops { void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end); - int (*private_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn); + int (*gmem_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn, bool is_private); }; struct kvm_x86_nested_ops { @@ -1918,6 +2037,8 @@ struct kvm_arch_async_pf { extern u32 __read_mostly kvm_nr_uret_msrs; extern bool __read_mostly allow_smaller_maxphyaddr; extern bool __read_mostly enable_apicv; +extern bool __read_mostly enable_ipiv; +extern bool __read_mostly enable_device_posted_irqs; extern struct kvm_x86_ops kvm_x86_ops; #define kvm_x86_call(func) static_call(kvm_x86_##func) @@ -1935,7 +2056,7 @@ void kvm_x86_vendor_exit(void); #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { - return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); + return kvzalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT); } #define __KVM_HAVE_ARCH_VM_FREE @@ -1980,7 +2101,7 @@ void kvm_mmu_vendor_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); -void kvm_mmu_init_vm(struct kvm *kvm); +int kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm, @@ -2008,22 +2129,6 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); -int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, - const void *val, int bytes); - -struct kvm_irq_mask_notifier { - void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); - int irq; - struct hlist_node link; -}; - -void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, - struct kvm_irq_mask_notifier *kimn); -void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, - struct kvm_irq_mask_notifier *kimn); -void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, - bool mask); - extern bool tdp_enabled; u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); @@ -2085,6 +2190,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); * the gfn, i.e. retrying the instruction will hit a * !PRESENT fault, which results in a new shadow page * and sends KVM back to square one. + * + * EMULTYPE_SKIP_SOFT_INT - Set in combination with EMULTYPE_SKIP to only skip + * an instruction if it could generate a given software + * interrupt, which must be encoded via + * EMULTYPE_SET_SOFT_INT_VECTOR(). */ #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) @@ -2095,6 +2205,10 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); #define EMULTYPE_PF (1 << 6) #define EMULTYPE_COMPLETE_USER_EXIT (1 << 7) #define EMULTYPE_WRITE_PF_TO_SP (1 << 8) +#define EMULTYPE_SKIP_SOFT_INT (1 << 9) + +#define EMULTYPE_SET_SOFT_INT_VECTOR(v) ((u32)((v) & 0xff) << 16) +#define EMULTYPE_GET_SOFT_INT_VECTOR(e) (((e) >> 16) & 0xff) static inline bool kvm_can_emulate_event_vectoring(int emul_type) { @@ -2109,16 +2223,20 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa); +void kvm_prepare_unexpected_reason_exit(struct kvm_vcpu *vcpu, u64 exit_reason); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); -int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data); -int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data); -int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); -int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); -int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); +int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data); +int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data); +int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data); int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); +int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg); int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu); +int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg); int kvm_emulate_as_nop(struct kvm_vcpu *vcpu); int kvm_emulate_invd(struct kvm_vcpu *vcpu); int kvm_emulate_mwait(struct kvm_vcpu *vcpu); @@ -2150,6 +2268,7 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); +int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu); int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); @@ -2162,8 +2281,8 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu); void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); -void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); -void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr, + bool has_error_code, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); @@ -2182,9 +2301,6 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state, return !!(*irq_state); } -int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); -void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); - void kvm_inject_nmi(struct kvm_vcpu *vcpu); int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu); @@ -2229,6 +2345,18 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, kvm_set_or_clear_apicv_inhibit(kvm, reason, false); } +void kvm_inc_or_dec_irq_window_inhibit(struct kvm *kvm, bool inc); + +static inline void kvm_inc_apicv_irq_window_req(struct kvm *kvm) +{ + kvm_inc_or_dec_irq_window_inhibit(kvm, true); +} + +static inline void kvm_dec_apicv_irq_window_req(struct kvm *kvm) +{ + kvm_inc_or_dec_irq_window_inhibit(kvm, false); +} + int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg); @@ -2242,10 +2370,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level); -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES #define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) -#else -#define kvm_arch_has_private_mem(kvm) false #endif #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) @@ -2267,7 +2393,7 @@ static inline unsigned long read_msr(unsigned long msr) { u64 value; - rdmsrl(msr, value); + rdmsrq(msr, value); return value; } #endif @@ -2321,6 +2447,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, int kvm_add_user_return_msr(u32 msr); int kvm_find_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); +u64 kvm_get_user_return_msr(unsigned int slot); static inline bool kvm_is_supported_user_return_msr(u32 msr) { @@ -2357,12 +2484,6 @@ void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); -bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, - struct kvm_vcpu **dest_vcpu); - -void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, - struct kvm_lapic_irq *irq); - static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) { /* We can only post Fixed and LowPrio IRQs */ @@ -2404,7 +2525,13 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \ KVM_X86_QUIRK_SLOT_ZAP_ALL | \ - KVM_X86_QUIRK_STUFF_FEATURE_MSRS) + KVM_X86_QUIRK_STUFF_FEATURE_MSRS | \ + KVM_X86_QUIRK_IGNORE_GUEST_PAT | \ + KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM) + +#define KVM_X86_CONDITIONAL_QUIRKS \ + (KVM_X86_QUIRK_CD_NW_CLEARED | \ + KVM_X86_QUIRK_IGNORE_GUEST_PAT) /* * KVM previously used a u32 field in kvm_run to indicate the hypercall was @@ -2413,4 +2540,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); */ #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return enable_device_posted_irqs; +} + #endif /* _ASM_X86_KVM_HOST_H */ |
