From 28f6bf9e247fe23d177cfdbf7e709270e8cc7fa6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 27 Feb 2020 09:51:40 +0100 Subject: arm64: Prepare arch_nmi_enter() for recursion When using nmi_enter() recursively, arch_nmi_enter() must also be recursion safe. In particular, it must be ensured that HCR_TGE is always set while in NMI context when in HYP mode, and be restored to it's former state when done. The current code fails this when interleaved wrong. Notably it overwrites the original hcr state on nesting. Introduce a nesting counter to make sure to store the original value. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Cc: Will Deacon Cc: Catalin Marinas Link: https://lkml.kernel.org/r/20200505134100.771491291@linutronix.de --- arch/arm64/include/asm/hardirq.h | 78 ++++++++++++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 19 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 87ad961f3c97..985493af704b 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -32,30 +32,70 @@ u64 smp_irq_stat_cpu(unsigned int cpu); struct nmi_ctx { u64 hcr; + unsigned int cnt; }; DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts); -#define arch_nmi_enter() \ - do { \ - if (is_kernel_in_hyp_mode()) { \ - struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ - nmi_ctx->hcr = read_sysreg(hcr_el2); \ - if (!(nmi_ctx->hcr & HCR_TGE)) { \ - write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \ - isb(); \ - } \ - } \ - } while (0) +#define arch_nmi_enter() \ +do { \ + struct nmi_ctx *___ctx; \ + u64 ___hcr; \ + \ + if (!is_kernel_in_hyp_mode()) \ + break; \ + \ + ___ctx = this_cpu_ptr(&nmi_contexts); \ + if (___ctx->cnt) { \ + ___ctx->cnt++; \ + break; \ + } \ + \ + ___hcr = read_sysreg(hcr_el2); \ + if (!(___hcr & HCR_TGE)) { \ + write_sysreg(___hcr | HCR_TGE, hcr_el2); \ + isb(); \ + } \ + /* \ + * Make sure the sysreg write is performed before ___ctx->cnt \ + * is set to 1. NMIs that see cnt == 1 will rely on us. \ + */ \ + barrier(); \ + ___ctx->cnt = 1; \ + /* \ + * Make sure ___ctx->cnt is set before we save ___hcr. We \ + * don't want ___ctx->hcr to be overwritten. \ + */ \ + barrier(); \ + ___ctx->hcr = ___hcr; \ +} while (0) -#define arch_nmi_exit() \ - do { \ - if (is_kernel_in_hyp_mode()) { \ - struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ - if (!(nmi_ctx->hcr & HCR_TGE)) \ - write_sysreg(nmi_ctx->hcr, hcr_el2); \ - } \ - } while (0) +#define arch_nmi_exit() \ +do { \ + struct nmi_ctx *___ctx; \ + u64 ___hcr; \ + \ + if (!is_kernel_in_hyp_mode()) \ + break; \ + \ + ___ctx = this_cpu_ptr(&nmi_contexts); \ + ___hcr = ___ctx->hcr; \ + /* \ + * Make sure we read ___ctx->hcr before we release \ + * ___ctx->cnt as it makes ___ctx->hcr updatable again. \ + */ \ + barrier(); \ + ___ctx->cnt--; \ + /* \ + * Make sure ___ctx->cnt release is visible before we \ + * restore the sysreg. Otherwise a new NMI occurring \ + * right after write_sysreg() can be fooled and think \ + * we secured things for it. \ + */ \ + barrier(); \ + if (!___ctx->cnt && !(___hcr & HCR_TGE)) \ + write_sysreg(___hcr, hcr_el2); \ +} while (0) static inline void ack_bad_irq(unsigned int irq) { -- cgit v1.2.3 From 69ea03b56ed2c7189ccd0b5910ad39f3cad1df21 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 Feb 2020 09:46:47 +0100 Subject: hardirq/nmi: Allow nested nmi_enter() Since there are already a number of sites (ARM64, PowerPC) that effectively nest nmi_enter(), make the primitive support this before adding even more. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Acked-by: Will Deacon Cc: Michael Ellerman Link: https://lkml.kernel.org/r/20200505134100.864179229@linutronix.de --- arch/arm64/kernel/sdei.c | 14 ++------------ arch/arm64/kernel/traps.c | 8 ++------ arch/powerpc/kernel/traps.c | 22 ++++++---------------- include/linux/hardirq.h | 5 ++++- include/linux/preempt.h | 4 ++-- 5 files changed, 16 insertions(+), 37 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index d6259dac62b6..e396e69e33a1 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -251,22 +251,12 @@ asmlinkage __kprobes notrace unsigned long __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { unsigned long ret; - bool do_nmi_exit = false; - /* - * nmi_enter() deals with printk() re-entrance and use of RCU when - * RCU believed this CPU was idle. Because critical events can - * interrupt normal events, we may already be in_nmi(). - */ - if (!in_nmi()) { - nmi_enter(); - do_nmi_exit = true; - } + nmi_enter(); ret = _sdei_handler(regs, arg); - if (do_nmi_exit) - nmi_exit(); + nmi_exit(); return ret; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index cf402be5c573..c728f163f329 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -906,17 +906,13 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) { - const bool was_in_nmi = in_nmi(); - - if (!was_in_nmi) - nmi_enter(); + nmi_enter(); /* non-RAS errors are not containable */ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) arm64_serror_panic(regs, esr); - if (!was_in_nmi) - nmi_exit(); + nmi_exit(); } asmlinkage void enter_from_user_mode(void) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3fca22276bb1..b44dd75de517 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -441,15 +441,9 @@ nonrecoverable: void system_reset_exception(struct pt_regs *regs) { unsigned long hsrr0, hsrr1; - bool nested = in_nmi(); bool saved_hsrrs = false; - /* - * Avoid crashes in case of nested NMI exceptions. Recoverability - * is determined by RI and in_nmi - */ - if (!nested) - nmi_enter(); + nmi_enter(); /* * System reset can interrupt code where HSRRs are live and MSR[RI]=1. @@ -521,8 +515,7 @@ out: mtspr(SPRN_HSRR1, hsrr1); } - if (!nested) - nmi_exit(); + nmi_exit(); /* What should we do here? We could issue a shutdown or hard reset. */ } @@ -823,9 +816,8 @@ int machine_check_generic(struct pt_regs *regs) void machine_check_exception(struct pt_regs *regs) { int recover = 0; - bool nested = in_nmi(); - if (!nested) - nmi_enter(); + + nmi_enter(); __this_cpu_inc(irq_stat.mce_exceptions); @@ -851,8 +843,7 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - if (!nested) - nmi_exit(); + nmi_exit(); die("Machine check", regs, SIGBUS); @@ -863,8 +854,7 @@ void machine_check_exception(struct pt_regs *regs) return; bail: - if (!nested) - nmi_exit(); + nmi_exit(); } void SMIException(struct pt_regs *regs) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 7c8b82f69288..a043ad826c67 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -65,13 +65,16 @@ extern void irq_exit(void); #define arch_nmi_exit() do { } while (0) #endif +/* + * nmi_enter() can nest up to 15 times; see NMI_BITS. + */ #define nmi_enter() \ do { \ arch_nmi_enter(); \ printk_nmi_enter(); \ lockdep_off(); \ ftrace_nmi_enter(); \ - BUG_ON(in_nmi()); \ + BUG_ON(in_nmi() == NMI_MASK); \ preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ rcu_nmi_enter(); \ lockdep_hardirq_enter(); \ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index bc3f1aecaa19..7d9c1c0e149c 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -26,13 +26,13 @@ * PREEMPT_MASK: 0x000000ff * SOFTIRQ_MASK: 0x0000ff00 * HARDIRQ_MASK: 0x000f0000 - * NMI_MASK: 0x00100000 + * NMI_MASK: 0x00f00000 * PREEMPT_NEED_RESCHED: 0x80000000 */ #define PREEMPT_BITS 8 #define SOFTIRQ_BITS 8 #define HARDIRQ_BITS 4 -#define NMI_BITS 1 +#define NMI_BITS 4 #define PREEMPT_SHIFT 0 #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) -- cgit v1.2.3