diff options
author | Nicholas Piggin <npiggin@gmail.com> | 2021-05-28 19:07:30 +1000 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2021-06-10 22:12:13 +1000 |
commit | 6ffe2c6e6dcefb971e4046f02086c4adadd0b310 (patch) | |
tree | 43d7e2faab8483ebd512150de075fdcc3b1b2b46 | |
parent | 413679e73bdfc2720dc2fa2172b65b7411185fa7 (diff) | |
download | lwn-6ffe2c6e6dcefb971e4046f02086c4adadd0b310.tar.gz lwn-6ffe2c6e6dcefb971e4046f02086c4adadd0b310.zip |
KVM: PPC: Book3S HV P9: Reduce irq_work vs guest decrementer races
irq_work's use of the DEC SPR is racy with guest<->host switch and guest
entry which flips the DEC interrupt to guest, which could lose a host
work interrupt.
This patch closes one race, and attempts to comment another class of
races.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-11-npiggin@gmail.com
-rw-r--r-- | arch/powerpc/include/asm/time.h | 12 | ||||
-rw-r--r-- | arch/powerpc/kernel/time.c | 10 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 15 |
3 files changed, 27 insertions, 10 deletions
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 8dd3cdb25338..8c2c3dd4ddba 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -97,6 +97,18 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low, extern void secondary_cpu_time_init(void); extern void __init time_init(void); +#ifdef CONFIG_PPC64 +static inline unsigned long test_irq_work_pending(void) +{ + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, irq_work_pending))); + return x; +} +#endif + DECLARE_PER_CPU(u64, decrementers_next_tb); /* Convert timebase ticks to nanoseconds */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index b67d93a609a2..da995c5fb97d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -508,16 +508,6 @@ EXPORT_SYMBOL(profile_pc); * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... */ #ifdef CONFIG_PPC64 -static inline unsigned long test_irq_work_pending(void) -{ - unsigned long x; - - asm volatile("lbz %0,%1(13)" - : "=r" (x) - : "i" (offsetof(struct paca_struct, irq_work_pending))); - return x; -} - static inline void set_irq_work_pending_flag(void) { asm volatile("stb %0,%1(13)" : : diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 466d62b35b6a..d82ff7fe8ac7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3708,6 +3708,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, if (!(vcpu->arch.ctrl & 1)) mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1); + /* + * When setting DEC, we must always deal with irq_work_raise via NMI vs + * setting DEC. The problem occurs right as we switch into guest mode + * if a NMI hits and sets pending work and sets DEC, then that will + * apply to the guest and not bring us back to the host. + * + * irq_work_raise could check a flag (or possibly LPCR[HDICE] for + * example) and set HDEC to 1? That wouldn't solve the nested hv + * case which needs to abort the hcall or zero the time limit. + * + * XXX: Another day's problem. + */ mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb()); if (kvmhv_on_pseries()) { @@ -3822,6 +3834,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vc->in_guest = 0; mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb()); + /* We may have raced with new irq work */ + if (test_irq_work_pending()) + set_dec(1); mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); kvmhv_load_host_pmu(); |