summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c342
1 files changed, 175 insertions, 167 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4b64ab350bcd..c841817a914a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -800,9 +800,9 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto
ex->payload = payload;
}
-static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
- unsigned nr, bool has_error, u32 error_code,
- bool has_payload, unsigned long payload, bool reinject)
+static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned int nr,
+ bool has_error, u32 error_code,
+ bool has_payload, unsigned long payload)
{
u32 prev_nr;
int class1, class2;
@@ -810,13 +810,10 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_EVENT, vcpu);
/*
- * If the exception is destined for L2 and isn't being reinjected,
- * morph it to a VM-Exit if L1 wants to intercept the exception. A
- * previously injected exception is not checked because it was checked
- * when it was original queued, and re-checking is incorrect if _L1_
- * injected the exception, in which case it's exempt from interception.
+ * If the exception is destined for L2, morph it to a VM-Exit if L1
+ * wants to intercept the exception.
*/
- if (!reinject && is_guest_mode(vcpu) &&
+ if (is_guest_mode(vcpu) &&
kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, nr, error_code)) {
kvm_queue_exception_vmexit(vcpu, nr, has_error, error_code,
has_payload, payload);
@@ -825,28 +822,9 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
queue:
- if (reinject) {
- /*
- * On VM-Entry, an exception can be pending if and only
- * if event injection was blocked by nested_run_pending.
- * In that case, however, vcpu_enter_guest() requests an
- * immediate exit, and the guest shouldn't proceed far
- * enough to need reinjection.
- */
- WARN_ON_ONCE(kvm_is_exception_pending(vcpu));
- vcpu->arch.exception.injected = true;
- if (WARN_ON_ONCE(has_payload)) {
- /*
- * A reinjected event has already
- * delivered its payload.
- */
- has_payload = false;
- payload = 0;
- }
- } else {
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.injected = false;
- }
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.injected = false;
+
vcpu->arch.exception.has_error_code = has_error;
vcpu->arch.exception.vector = nr;
vcpu->arch.exception.error_code = error_code;
@@ -887,29 +865,52 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
- kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
+ kvm_multiple_exception(vcpu, nr, false, 0, false, 0);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception);
-void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
-{
- kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
-}
-EXPORT_SYMBOL_GPL(kvm_requeue_exception);
void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
unsigned long payload)
{
- kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
+ kvm_multiple_exception(vcpu, nr, false, 0, true, payload);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
u32 error_code, unsigned long payload)
{
- kvm_multiple_exception(vcpu, nr, true, error_code,
- true, payload, false);
+ kvm_multiple_exception(vcpu, nr, true, error_code, true, payload);
+}
+
+void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
+ bool has_error_code, u32 error_code)
+{
+
+ /*
+ * On VM-Entry, an exception can be pending if and only if event
+ * injection was blocked by nested_run_pending. In that case, however,
+ * vcpu_enter_guest() requests an immediate exit, and the guest
+ * shouldn't proceed far enough to need reinjection.
+ */
+ WARN_ON_ONCE(kvm_is_exception_pending(vcpu));
+
+ /*
+ * Do not check for interception when injecting an event for L2, as the
+ * exception was checked for intercept when it was original queued, and
+ * re-checking is incorrect if _L1_ injected the exception, in which
+ * case it's exempt from interception.
+ */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+ vcpu->arch.exception.injected = true;
+ vcpu->arch.exception.has_error_code = has_error_code;
+ vcpu->arch.exception.vector = nr;
+ vcpu->arch.exception.error_code = error_code;
+ vcpu->arch.exception.has_payload = false;
+ vcpu->arch.exception.payload = 0;
}
+EXPORT_SYMBOL_GPL(kvm_requeue_exception);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
{
@@ -980,16 +981,10 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu)
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
- kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
+ kvm_multiple_exception(vcpu, nr, true, error_code, false, 0);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
-void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
-{
- kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
-}
-EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
-
/*
* Checks if cpl <= required_cpl; if true, return true. Otherwise queue
* a #GP and return false.
@@ -1264,7 +1259,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
vcpu->arch.xcr0 = xcr0;
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
- kvm_update_cpuid_runtime(vcpu);
+ vcpu->arch.cpuid_dynamic_bits_dirty = true;
return 0;
}
@@ -2080,10 +2075,20 @@ EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn)
{
- if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) &&
- !guest_cpu_cap_has(vcpu, X86_FEATURE_MWAIT))
+ bool enabled;
+
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS))
+ goto emulate_as_nop;
+
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
+ enabled = guest_cpu_cap_has(vcpu, X86_FEATURE_MWAIT);
+ else
+ enabled = vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT;
+
+ if (!enabled)
return kvm_handle_invalid_op(vcpu);
+emulate_as_nop:
pr_warn_once("%s instruction emulated as NOP!\n", insn);
return kvm_emulate_as_nop(vcpu);
}
@@ -2569,6 +2574,9 @@ EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier);
static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
{
+ if (vcpu->arch.guest_tsc_protected)
+ return;
+
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
vcpu->arch.l1_tsc_offset,
l1_offset);
@@ -2626,12 +2634,18 @@ static inline bool kvm_check_tsc_unstable(void)
* participates in.
*/
static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc,
- u64 ns, bool matched)
+ u64 ns, bool matched, bool user_set_tsc)
{
struct kvm *kvm = vcpu->kvm;
lockdep_assert_held(&kvm->arch.tsc_write_lock);
+ if (vcpu->arch.guest_tsc_protected)
+ return;
+
+ if (user_set_tsc)
+ vcpu->kvm->arch.user_set_tsc = true;
+
/*
* We also track th most recent recorded KHZ, write and time to
* allow the matching interval to be extended at each write.
@@ -2717,8 +2731,6 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 *user_value)
}
}
- if (user_value)
- kvm->arch.user_set_tsc = true;
/*
* For a reliable TSC, we can match TSC offsets, and for an unstable
@@ -2738,7 +2750,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 *user_value)
matched = true;
}
- __kvm_synchronize_tsc(vcpu, offset, data, ns, matched);
+ __kvm_synchronize_tsc(vcpu, offset, data, ns, matched, !!user_value);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
}
@@ -3116,15 +3128,17 @@ u64 get_kvmclock_ns(struct kvm *kvm)
return data.clock;
}
-static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
+static void kvm_setup_guest_pvclock(struct pvclock_vcpu_time_info *ref_hv_clock,
+ struct kvm_vcpu *vcpu,
struct gfn_to_pfn_cache *gpc,
- unsigned int offset,
- bool force_tsc_unstable)
+ unsigned int offset)
{
- struct kvm_vcpu_arch *vcpu = &v->arch;
struct pvclock_vcpu_time_info *guest_hv_clock;
+ struct pvclock_vcpu_time_info hv_clock;
unsigned long flags;
+ memcpy(&hv_clock, ref_hv_clock, sizeof(hv_clock));
+
read_lock_irqsave(&gpc->lock, flags);
while (!kvm_gpc_check(gpc, offset + sizeof(*guest_hv_clock))) {
read_unlock_irqrestore(&gpc->lock, flags);
@@ -3144,52 +3158,34 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
* it is consistent.
*/
- guest_hv_clock->version = vcpu->hv_clock.version = (guest_hv_clock->version + 1) | 1;
+ guest_hv_clock->version = hv_clock.version = (guest_hv_clock->version + 1) | 1;
smp_wmb();
/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
- vcpu->hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
-
- if (vcpu->pvclock_set_guest_stopped_request) {
- vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
- vcpu->pvclock_set_guest_stopped_request = false;
- }
+ hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
- memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
-
- if (force_tsc_unstable)
- guest_hv_clock->flags &= ~PVCLOCK_TSC_STABLE_BIT;
+ memcpy(guest_hv_clock, &hv_clock, sizeof(*guest_hv_clock));
smp_wmb();
- guest_hv_clock->version = ++vcpu->hv_clock.version;
+ guest_hv_clock->version = ++hv_clock.version;
kvm_gpc_mark_dirty_in_slot(gpc);
read_unlock_irqrestore(&gpc->lock, flags);
- trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
+ trace_kvm_pvclock_update(vcpu->vcpu_id, &hv_clock);
}
-static int kvm_guest_time_update(struct kvm_vcpu *v)
+int kvm_guest_time_update(struct kvm_vcpu *v)
{
+ struct pvclock_vcpu_time_info hv_clock = {};
unsigned long flags, tgt_tsc_khz;
unsigned seq;
struct kvm_vcpu_arch *vcpu = &v->arch;
struct kvm_arch *ka = &v->kvm->arch;
s64 kernel_ns;
u64 tsc_timestamp, host_tsc;
- u8 pvclock_flags;
bool use_master_clock;
-#ifdef CONFIG_KVM_XEN
- /*
- * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless
- * explicitly told to use TSC as its clocksource Xen will not set this bit.
- * This default behaviour led to bugs in some guest kernels which cause
- * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags.
- */
- bool xen_pvclock_tsc_unstable =
- ka->xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
-#endif
kernel_ns = 0;
host_tsc = 0;
@@ -3250,35 +3246,57 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
- &vcpu->hv_clock.tsc_shift,
- &vcpu->hv_clock.tsc_to_system_mul);
+ &vcpu->pvclock_tsc_shift,
+ &vcpu->pvclock_tsc_mul);
vcpu->hw_tsc_khz = tgt_tsc_khz;
- kvm_xen_update_tsc_info(v);
}
- vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
- vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
+ hv_clock.tsc_shift = vcpu->pvclock_tsc_shift;
+ hv_clock.tsc_to_system_mul = vcpu->pvclock_tsc_mul;
+ hv_clock.tsc_timestamp = tsc_timestamp;
+ hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
vcpu->last_guest_tsc = tsc_timestamp;
/* If the host uses TSC clocksource, then it is stable */
- pvclock_flags = 0;
+ hv_clock.flags = 0;
if (use_master_clock)
- pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
+ hv_clock.flags |= PVCLOCK_TSC_STABLE_BIT;
- vcpu->hv_clock.flags = pvclock_flags;
+ if (vcpu->pv_time.active) {
+ /*
+ * GUEST_STOPPED is only supported by kvmclock, and KVM's
+ * historic behavior is to only process the request if kvmclock
+ * is active/enabled.
+ */
+ if (vcpu->pvclock_set_guest_stopped_request) {
+ hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
+ vcpu->pvclock_set_guest_stopped_request = false;
+ }
+ kvm_setup_guest_pvclock(&hv_clock, v, &vcpu->pv_time, 0);
+
+ hv_clock.flags &= ~PVCLOCK_GUEST_STOPPED;
+ }
+
+ kvm_hv_setup_tsc_page(v->kvm, &hv_clock);
- if (vcpu->pv_time.active)
- kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
#ifdef CONFIG_KVM_XEN
+ /*
+ * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless
+ * explicitly told to use TSC as its clocksource Xen will not set this bit.
+ * This default behaviour led to bugs in some guest kernels which cause
+ * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags.
+ *
+ * Note! Clear TSC_STABLE only for Xen clocks, i.e. the order matters!
+ */
+ if (ka->xen.hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE)
+ hv_clock.flags &= ~PVCLOCK_TSC_STABLE_BIT;
+
if (vcpu->xen.vcpu_info_cache.active)
- kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
- offsetof(struct compat_vcpu_info, time),
- xen_pvclock_tsc_unstable);
+ kvm_setup_guest_pvclock(&hv_clock, v, &vcpu->xen.vcpu_info_cache,
+ offsetof(struct compat_vcpu_info, time));
if (vcpu->xen.vcpu_time_info_cache.active)
- kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0,
- xen_pvclock_tsc_unstable);
+ kvm_setup_guest_pvclock(&hv_clock, v, &vcpu->xen.vcpu_time_info_cache, 0);
#endif
- kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
return 0;
}
@@ -3544,7 +3562,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
sizeof(u64)))
return 1;
- vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
+ vcpu->arch.apf.send_always = (data & KVM_ASYNC_PF_SEND_ALWAYS);
vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
kvm_async_pf_wakeup_all(vcpu);
@@ -3733,7 +3751,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
u32 msr = msr_info->index;
u64 data = msr_info->data;
- if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr)
+ /*
+ * Do not allow host-initiated writes to trigger the Xen hypercall
+ * page setup; it could incur locking paths which are not expected
+ * if userspace sets the MSR in an unusual location.
+ */
+ if (kvm_xen_is_hypercall_page_msr(vcpu->kvm, msr) &&
+ !msr_info->host_initiated)
return kvm_xen_write_hypercall_page(vcpu, data);
switch (msr) {
@@ -3889,7 +3913,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XMM3))
return 1;
vcpu->arch.ia32_misc_enable_msr = data;
- kvm_update_cpuid_runtime(vcpu);
+ vcpu->arch.cpuid_dynamic_bits_dirty = true;
} else {
vcpu->arch.ia32_misc_enable_msr = data;
}
@@ -3906,7 +3930,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_TSC:
if (msr_info->host_initiated) {
kvm_synchronize_tsc(vcpu, &data);
- } else {
+ } else if (!vcpu->arch.guest_tsc_protected) {
u64 adj = kvm_compute_l1_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
adjust_tsc_offset_guest(vcpu, adj);
vcpu->arch.ia32_tsc_adjust_msr += adj;
@@ -3924,7 +3948,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & ~kvm_caps.supported_xss)
return 1;
vcpu->arch.ia32_xss = data;
- kvm_update_cpuid_runtime(vcpu);
+ vcpu->arch.cpuid_dynamic_bits_dirty = true;
break;
case MSR_SMI_COUNT:
if (!msr_info->host_initiated)
@@ -4573,6 +4597,11 @@ static bool kvm_is_vm_type_supported(unsigned long type)
return type < 32 && (kvm_caps.supported_vm_types & BIT(type));
}
+static inline u32 kvm_sync_valid_fields(struct kvm *kvm)
+{
+ return kvm && kvm->arch.has_protected_state ? 0 : KVM_SYNC_X86_VALID_FIELDS;
+}
+
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r = 0;
@@ -4681,7 +4710,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
#endif
case KVM_CAP_SYNC_REGS:
- r = KVM_SYNC_X86_VALID_FIELDS;
+ r = kvm_sync_valid_fields(kvm);
break;
case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_VALID_FLAGS;
@@ -4986,7 +5015,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
u64 offset = kvm_compute_l1_tsc_offset(vcpu,
vcpu->arch.last_guest_tsc);
kvm_vcpu_write_tsc_offset(vcpu, offset);
- vcpu->arch.tsc_catchup = 1;
+ if (!vcpu->arch.guest_tsc_protected)
+ vcpu->arch.tsc_catchup = 1;
}
if (kvm_lapic_hv_timer_in_use(vcpu))
@@ -5725,8 +5755,7 @@ static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu,
tsc = kvm_scale_tsc(rdtsc(), vcpu->arch.l1_tsc_scaling_ratio) + offset;
ns = get_kvmclock_base_ns();
- kvm->arch.user_set_tsc = true;
- __kvm_synchronize_tsc(vcpu, offset, tsc, ns, matched);
+ __kvm_synchronize_tsc(vcpu, offset, tsc, ns, matched, true);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
r = 0;
@@ -6905,23 +6934,15 @@ static int kvm_arch_suspend_notifier(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
unsigned long i;
- int ret = 0;
-
- mutex_lock(&kvm->lock);
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!vcpu->arch.pv_time.active)
- continue;
- ret = kvm_set_guest_paused(vcpu);
- if (ret) {
- kvm_err("Failed to pause guest VCPU%d: %d\n",
- vcpu->vcpu_id, ret);
- break;
- }
- }
- mutex_unlock(&kvm->lock);
+ /*
+ * Ignore the return, marking the guest paused only "fails" if the vCPU
+ * isn't using kvmclock; continuing on is correct and desirable.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ (void)kvm_set_guest_paused(vcpu);
- return ret ? NOTIFY_BAD : NOTIFY_DONE;
+ return NOTIFY_DONE;
}
int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state)
@@ -11220,9 +11241,7 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu)
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
case KVM_MP_STATE_AP_RESET_HOLD:
- vcpu->arch.pv.pv_unhalted = false;
- vcpu->arch.mp_state =
- KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
fallthrough;
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.apf.halted = false;
@@ -11299,9 +11318,8 @@ static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
++vcpu->stat.halt_exits;
if (lapic_in_kernel(vcpu)) {
if (kvm_vcpu_has_events(vcpu))
- vcpu->arch.pv.pv_unhalted = false;
- else
- vcpu->arch.mp_state = state;
+ state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, state);
return 1;
} else {
vcpu->run->exit_reason = reason;
@@ -11474,6 +11492,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
struct kvm_queued_exception *ex = &vcpu->arch.exception;
struct kvm_run *kvm_run = vcpu->run;
+ u32 sync_valid_fields;
int r;
r = kvm_mmu_post_init_vm(vcpu->kvm);
@@ -11519,8 +11538,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
goto out;
}
- if ((kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) ||
- (kvm_run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)) {
+ sync_valid_fields = kvm_sync_valid_fields(vcpu->kvm);
+ if ((kvm_run->kvm_valid_regs & ~sync_valid_fields) ||
+ (kvm_run->kvm_dirty_regs & ~sync_valid_fields)) {
r = -EINVAL;
goto out;
}
@@ -11578,7 +11598,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
out:
kvm_put_guest_fpu(vcpu);
- if (kvm_run->kvm_valid_regs)
+ if (kvm_run->kvm_valid_regs && likely(!vcpu->arch.guest_state_protected))
store_regs(vcpu);
post_kvm_run_save(vcpu);
kvm_vcpu_srcu_read_unlock(vcpu);
@@ -11821,10 +11841,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
goto out;
if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
- vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED);
set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
} else
- vcpu->arch.mp_state = mp_state->mp_state;
+ kvm_set_mp_state(vcpu, mp_state->mp_state);
kvm_make_request(KVM_REQ_EVENT, vcpu);
ret = 0;
@@ -11951,7 +11971,7 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
!is_protmode(vcpu))
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
return 0;
}
@@ -12254,9 +12274,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm);
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
else
- vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_UNINITIALIZED);
r = kvm_mmu_create(vcpu);
if (r < 0)
@@ -12363,6 +12383,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
int idx;
+ kvm_clear_async_pf_completion_queue(vcpu);
+ kvm_mmu_unload(vcpu);
+
kvmclock_reset(vcpu);
kvm_x86_call(vcpu_free)(vcpu);
@@ -12756,31 +12779,6 @@ out:
return ret;
}
-static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
-{
- vcpu_load(vcpu);
- kvm_mmu_unload(vcpu);
- vcpu_put(vcpu);
-}
-
-static void kvm_unload_vcpu_mmus(struct kvm *kvm)
-{
- unsigned long i;
- struct kvm_vcpu *vcpu;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- kvm_clear_async_pf_completion_queue(vcpu);
- kvm_unload_vcpu_mmu(vcpu);
- }
-}
-
-void kvm_arch_sync_events(struct kvm *kvm)
-{
- cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
- cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
- kvm_free_pit(kvm);
-}
-
/**
* __x86_set_memory_region: Setup KVM internal memory slot
*
@@ -12859,6 +12857,17 @@ EXPORT_SYMBOL_GPL(__x86_set_memory_region);
void kvm_arch_pre_destroy_vm(struct kvm *kvm)
{
+ /*
+ * Stop all background workers and kthreads before destroying vCPUs, as
+ * iterating over vCPUs in a different task while vCPUs are being freed
+ * is unsafe, i.e. will lead to use-after-free. The PIT also needs to
+ * be stopped before IRQ routing is freed.
+ */
+ cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
+ cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
+
+ kvm_free_pit(kvm);
+
kvm_mmu_pre_destroy_vm(kvm);
}
@@ -12878,9 +12887,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
__x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
mutex_unlock(&kvm->slots_lock);
}
- kvm_unload_vcpu_mmus(kvm);
kvm_destroy_vcpus(kvm);
- kvm_x86_call(vm_destroy)(kvm);
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
@@ -12890,6 +12897,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_page_track_cleanup(kvm);
kvm_xen_destroy_vm(kvm);
kvm_hv_destroy_vm(kvm);
+ kvm_x86_call(vm_destroy)(kvm);
}
static void memslot_rmap_free(struct kvm_memory_slot *slot)
@@ -13383,8 +13391,8 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
if (!kvm_pv_async_pf_enabled(vcpu))
return false;
- if (vcpu->arch.apf.send_user_only &&
- kvm_x86_call(get_cpl)(vcpu) == 0)
+ if (!vcpu->arch.apf.send_always &&
+ (vcpu->arch.guest_state_protected || !kvm_x86_call(get_cpl)(vcpu)))
return false;
if (is_guest_mode(vcpu)) {
@@ -13474,7 +13482,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
}
vcpu->arch.apf.halted = false;
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
}
void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu)