From e67198cc05b8ecbb7b8e2d8ef9fb5c8d26821873 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:25 +0200 Subject: context_tracking: Take idle eqs entrypoints over RCU The RCU dynticks counter is going to be merged into the context tracking subsystem. Start with moving the idle extended quiescent states entrypoints to context tracking. For now those are dumb redirections to existing RCU calls. [ paulmck: Apply kernel test robot feedback. ] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/rcupdate.h') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 1a32036c918c..6ebe754501c3 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -128,7 +128,7 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { } * @a: Code that RCU needs to pay attention to. * * RCU read-side critical sections are forbidden in the inner idle loop, - * that is, between the rcu_idle_enter() and the rcu_idle_exit() -- RCU + * that is, between the ct_idle_enter() and the ct_idle_exit() -- RCU * will happily ignore any such read-side critical sections. However, * things like powertop need tracepoints in the inner idle loop. * -- cgit v1.2.3 From 6f0e6c1598b1a3d19fc30db86b6e26d6f881b43d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:26 +0200 Subject: context_tracking: Take IRQ eqs entrypoints over RCU The RCU dynticks counter is going to be merged into the context tracking subsystem. Prepare with moving the IRQ extended quiescent states entrypoints to context tracking. For now those are dumb redirection to existing RCU calls. [ paulmck: Apply Stephen Rothwell feedback from -next. ] [ paulmck: Apply Nathan Chancellor feedback. ] Acked-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- .../RCU/Design/Requirements/Requirements.rst | 10 ++++----- Documentation/RCU/stallwarn.rst | 4 ++-- arch/Kconfig | 2 +- arch/arm64/kernel/entry-common.c | 6 +++--- arch/x86/mm/fault.c | 2 +- drivers/cpuidle/cpuidle-psci.c | 8 ++++---- drivers/cpuidle/cpuidle-riscv-sbi.c | 8 ++++---- include/linux/context_tracking_irq.h | 17 +++++++++++++++ include/linux/context_tracking_state.h | 1 + include/linux/entry-common.h | 10 ++++----- include/linux/rcupdate.h | 5 +++-- include/linux/tracepoint.h | 4 ++-- kernel/cfi.c | 4 ++-- kernel/context_tracking.c | 24 ++++++++++++++++++++-- kernel/cpu_pm.c | 8 ++++---- kernel/entry/common.c | 12 +++++------ kernel/softirq.c | 4 ++-- kernel/trace/trace.c | 6 +++--- 18 files changed, 87 insertions(+), 48 deletions(-) create mode 100644 include/linux/context_tracking_irq.h (limited to 'include/linux/rcupdate.h') diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst index 04ed8bf27a0e..074810c73936 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.rst +++ b/Documentation/RCU/Design/Requirements/Requirements.rst @@ -1844,10 +1844,10 @@ that meets this requirement. Furthermore, NMI handlers can be interrupted by what appear to RCU to be normal interrupts. One way that this can happen is for code that -directly invokes rcu_irq_enter() and rcu_irq_exit() to be called +directly invokes ct_irq_enter() and ct_irq_exit() to be called from an NMI handler. This astonishing fact of life prompted the current -code structure, which has rcu_irq_enter() invoking -rcu_nmi_enter() and rcu_irq_exit() invoking rcu_nmi_exit(). +code structure, which has ct_irq_enter() invoking +rcu_nmi_enter() and ct_irq_exit() invoking rcu_nmi_exit(). And yes, I also learned of this requirement the hard way. Loadable Modules @@ -2195,7 +2195,7 @@ scheduling-clock interrupt be enabled when RCU needs it to be: sections, and RCU believes this CPU to be idle, no problem. This sort of thing is used by some architectures for light-weight exception handlers, which can then avoid the overhead of - rcu_irq_enter() and rcu_irq_exit() at exception entry and + ct_irq_enter() and ct_irq_exit() at exception entry and exit, respectively. Some go further and avoid the entireties of irq_enter() and irq_exit(). Just make very sure you are running some of your tests with @@ -2226,7 +2226,7 @@ scheduling-clock interrupt be enabled when RCU needs it to be: +-----------------------------------------------------------------------+ | **Answer**: | +-----------------------------------------------------------------------+ -| One approach is to do ``rcu_irq_exit();rcu_irq_enter();`` every so | +| One approach is to do ``ct_irq_exit();ct_irq_enter();`` every so | | often. But given that long-running interrupt handlers can cause other | | problems, not least for response time, shouldn't you work to keep | | your interrupt handler's runtime within reasonable bounds? | diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst index b95bda7755fa..ce1f58a9d954 100644 --- a/Documentation/RCU/stallwarn.rst +++ b/Documentation/RCU/stallwarn.rst @@ -98,11 +98,11 @@ warnings: - A low-level kernel issue that either fails to invoke one of the variants of rcu_user_enter(), rcu_user_exit(), ct_idle_enter(), - ct_idle_exit(), rcu_irq_enter(), or rcu_irq_exit() on the one + ct_idle_exit(), ct_irq_enter(), or ct_irq_exit() on the one hand, or that invokes one of them too many times on the other. Historically, the most frequent issue has been an omission of either irq_enter() or irq_exit(), which in turn invoke - rcu_irq_enter() or rcu_irq_exit(), respectively. Building your + ct_irq_enter() or ct_irq_exit(), respectively. Building your kernel with CONFIG_RCU_EQS_DEBUG=y can help track down these types of issues, which sometimes arise in architecture-specific code. diff --git a/arch/Kconfig b/arch/Kconfig index 154b7b78da09..342642be105f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -782,7 +782,7 @@ config HAVE_CONTEXT_TRACKING_USER Syscalls need to be wrapped inside user_exit()-user_enter(), either optimized behind static key or through the slow path using TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs are already - protected inside rcu_irq_enter/rcu_irq_exit() but preemption or signal + protected inside ct_irq_enter/ct_irq_exit() but preemption or signal handling on irq exit still need to be protected. config HAVE_CONTEXT_TRACKING_USER_OFFSTACK diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 56cefd33eb8e..8dabe9ec10f1 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -41,7 +41,7 @@ static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { lockdep_hardirqs_off(CALLER_ADDR0); - rcu_irq_enter(); + ct_irq_enter(); trace_hardirqs_off_finish(); regs->exit_rcu = true; @@ -76,7 +76,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs) if (regs->exit_rcu) { trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(); - rcu_irq_exit(); + ct_irq_exit(); lockdep_hardirqs_on(CALLER_ADDR0); return; } @@ -84,7 +84,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs) trace_hardirqs_on(); } else { if (regs->exit_rcu) - rcu_irq_exit(); + ct_irq_exit(); } } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fad8faa29d04..971977c438fc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1526,7 +1526,7 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault) /* * Entry handling for valid #PF from kernel mode is slightly - * different: RCU is already watching and rcu_irq_enter() must not + * different: RCU is already watching and ct_irq_enter() must not * be invoked because a kernel fault on a user space address might * sleep. * diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index 540105ca0781..57bc3e3ae391 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -69,12 +69,12 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev, return -1; /* Do runtime PM to manage a hierarchical CPU toplogy. */ - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); if (s2idle) dev_pm_genpd_suspend(pd_dev); else pm_runtime_put_sync_suspend(pd_dev); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); state = psci_get_domain_state(); if (!state) @@ -82,12 +82,12 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev, ret = psci_cpu_suspend_enter(state) ? -1 : idx; - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); if (s2idle) dev_pm_genpd_resume(pd_dev); else pm_runtime_get_sync(pd_dev); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); cpu_pm_exit(); diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c index 1151e5e2ba82..862a2876f1c9 100644 --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -116,12 +116,12 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev, return -1; /* Do runtime PM to manage a hierarchical CPU toplogy. */ - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); if (s2idle) dev_pm_genpd_suspend(pd_dev); else pm_runtime_put_sync_suspend(pd_dev); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); if (sbi_is_domain_state_available()) state = sbi_get_domain_state(); @@ -130,12 +130,12 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev, ret = sbi_suspend(state) ? -1 : idx; - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); if (s2idle) dev_pm_genpd_resume(pd_dev); else pm_runtime_get_sync(pd_dev); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); cpu_pm_exit(); diff --git a/include/linux/context_tracking_irq.h b/include/linux/context_tracking_irq.h new file mode 100644 index 000000000000..62f62bbd1a50 --- /dev/null +++ b/include/linux/context_tracking_irq.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CONTEXT_TRACKING_IRQ_H +#define _LINUX_CONTEXT_TRACKING_IRQ_H + +#ifdef CONFIG_CONTEXT_TRACKING_IDLE +void ct_irq_enter(void); +void ct_irq_exit(void); +void ct_irq_enter_irqson(void); +void ct_irq_exit_irqson(void); +#else +static inline void ct_irq_enter(void) { } +static inline void ct_irq_exit(void) { } +static inline void ct_irq_enter_irqson(void) { } +static inline void ct_irq_exit_irqson(void) { } +#endif + +#endif diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 2b46afe105a9..9c16a8b2c194 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -4,6 +4,7 @@ #include #include +#include struct context_tracking { /* diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index c92ac75d6556..84a466b176cf 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -357,7 +357,7 @@ void irqentry_exit_to_user_mode(struct pt_regs *regs); /** * struct irqentry_state - Opaque object for exception state storage * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the - * exit path has to invoke rcu_irq_exit(). + * exit path has to invoke ct_irq_exit(). * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that * lockdep state is restored correctly on exit from nmi. * @@ -395,12 +395,12 @@ typedef struct irqentry_state { * * For kernel mode entries RCU handling is done conditional. If RCU is * watching then the only RCU requirement is to check whether the tick has - * to be restarted. If RCU is not watching then rcu_irq_enter() has to be - * invoked on entry and rcu_irq_exit() on exit. + * to be restarted. If RCU is not watching then ct_irq_enter() has to be + * invoked on entry and ct_irq_exit() on exit. * - * Avoiding the rcu_irq_enter/exit() calls is an optimization but also + * Avoiding the ct_irq_enter/exit() calls is an optimization but also * solves the problem of kernel mode pagefaults which can schedule, which - * is not possible after invoking rcu_irq_enter() without undoing it. + * is not possible after invoking ct_irq_enter() without undoing it. * * For user mode entries irqentry_enter_from_user_mode() is invoked to * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6ebe754501c3..f1562d91c67d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -29,6 +29,7 @@ #include #include #include +#include #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) @@ -143,9 +144,9 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { } */ #define RCU_NONIDLE(a) \ do { \ - rcu_irq_enter_irqson(); \ + ct_irq_enter_irqson(); \ do { a; } while (0); \ - rcu_irq_exit_irqson(); \ + ct_irq_exit_irqson(); \ } while (0) /* diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 28031b15f878..55717a2eda08 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -200,13 +200,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) */ \ if (rcuidle) { \ __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ - rcu_irq_enter_irqson(); \ + ct_irq_enter_irqson(); \ } \ \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ \ if (rcuidle) { \ - rcu_irq_exit_irqson(); \ + ct_irq_exit_irqson(); \ srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ } \ \ diff --git a/kernel/cfi.c b/kernel/cfi.c index 08102d19ec15..2046276ee234 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -295,7 +295,7 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr) rcu_idle = !rcu_is_watching(); if (rcu_idle) { local_irq_save(flags); - rcu_irq_enter(); + ct_irq_enter(); } if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) @@ -304,7 +304,7 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr) fn = find_module_check_fn(ptr); if (rcu_idle) { - rcu_irq_exit(); + ct_irq_exit(); local_irq_restore(flags); } diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index c0b3798d4e94..72bd71a02c44 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -35,6 +35,26 @@ void ct_idle_exit(void) rcu_idle_exit(); } EXPORT_SYMBOL_GPL(ct_idle_exit); + +noinstr void ct_irq_enter(void) +{ + rcu_irq_enter(); +} + +noinstr void ct_irq_exit(void) +{ + rcu_irq_exit(); +} + +void ct_irq_enter_irqson(void) +{ + rcu_irq_enter_irqson(); +} + +void ct_irq_exit_irqson(void) +{ + rcu_irq_exit_irqson(); +} #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ #ifdef CONFIG_CONTEXT_TRACKING_USER @@ -90,7 +110,7 @@ void noinstr __ct_user_enter(enum ctx_state state) * At this stage, only low level arch entry code remains and * then we'll run in userspace. We can assume there won't be * any RCU read-side critical section until the next call to - * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency + * user_exit() or ct_irq_enter(). Let's remove RCU's dependency * on the tick. */ if (state == CONTEXT_USER) { @@ -136,7 +156,7 @@ void ct_user_enter(enum ctx_state state) /* * Some contexts may involve an exception occuring in an irq, * leading to that nesting: - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() + * ct_irq_enter() rcu_user_exit() rcu_user_exit() ct_irq_exit() * This would mess up the dyntick_nesting count though. And rcu_irq_*() * helpers are enough to protect RCU uses inside the exception. So * just return immediately if we detect we are in an IRQ. diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c index 246efc74e3f3..ba4ba71facf9 100644 --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c @@ -35,11 +35,11 @@ static int cpu_pm_notify(enum cpu_pm_event event) * disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know * this. */ - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); rcu_read_lock(); ret = raw_notifier_call_chain(&cpu_pm_notifier.chain, event, NULL); rcu_read_unlock(); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); return notifier_to_errno(ret); } @@ -49,11 +49,11 @@ static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event ev unsigned long flags; int ret; - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags); ret = raw_notifier_call_chain_robust(&cpu_pm_notifier.chain, event_up, event_down, NULL); raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); return notifier_to_errno(ret); } diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 032f164abe7c..667ba5d581ff 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -321,7 +321,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) } /* - * If this entry hit the idle task invoke rcu_irq_enter() whether + * If this entry hit the idle task invoke ct_irq_enter() whether * RCU is watching or not. * * Interrupts can nest when the first interrupt invokes softirq @@ -332,12 +332,12 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) * not nested into another interrupt. * * Checking for rcu_is_watching() here would prevent the nesting - * interrupt to invoke rcu_irq_enter(). If that nested interrupt is + * interrupt to invoke ct_irq_enter(). If that nested interrupt is * the tick then rcu_flavor_sched_clock_irq() would wrongfully * assume that it is the first interrupt and eventually claim * quiescent state and end grace periods prematurely. * - * Unconditionally invoke rcu_irq_enter() so RCU state stays + * Unconditionally invoke ct_irq_enter() so RCU state stays * consistent. * * TINY_RCU does not support EQS, so let the compiler eliminate @@ -350,7 +350,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) * as in irqentry_enter_from_user_mode(). */ lockdep_hardirqs_off(CALLER_ADDR0); - rcu_irq_enter(); + ct_irq_enter(); instrumentation_begin(); trace_hardirqs_off_finish(); instrumentation_end(); @@ -418,7 +418,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(); instrumentation_end(); - rcu_irq_exit(); + ct_irq_exit(); lockdep_hardirqs_on(CALLER_ADDR0); return; } @@ -436,7 +436,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) * was not watching on entry. */ if (state.exit_rcu) - rcu_irq_exit(); + ct_irq_exit(); } } diff --git a/kernel/softirq.c b/kernel/softirq.c index 9f0aef8aa9ff..c8a6913c067d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -620,7 +620,7 @@ void irq_enter_rcu(void) */ void irq_enter(void) { - rcu_irq_enter(); + ct_irq_enter(); irq_enter_rcu(); } @@ -672,7 +672,7 @@ void irq_exit_rcu(void) void irq_exit(void) { __irq_exit_rcu(); - rcu_irq_exit(); + ct_irq_exit(); /* must be last! */ lockdep_hardirq_exit(); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2c95992e2c71..fe78a6818126 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3107,15 +3107,15 @@ void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, /* * When an NMI triggers, RCU is enabled via rcu_nmi_enter(), * but if the above rcu_is_watching() failed, then the NMI - * triggered someplace critical, and rcu_irq_enter() should + * triggered someplace critical, and ct_irq_enter() should * not be called from NMI. */ if (unlikely(in_nmi())) return; - rcu_irq_enter_irqson(); + ct_irq_enter_irqson(); __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); - rcu_irq_exit_irqson(); + ct_irq_exit_irqson(); } /** -- cgit v1.2.3 From 564506495ca96a6e66d077d3d5b9f02d4b9b0f45 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:32 +0200 Subject: rcu/context-tracking: Move deferred nocb resched to context tracking To prepare for migrating the RCU eqs accounting code to context tracking, split the last-resort deferred nocb resched from rcu_user_enter() and move it into a separate call from context tracking. Acked-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/rcupdate.h | 6 ++++++ kernel/context_tracking.c | 8 ++++++++ kernel/rcu/tree.c | 15 ++------------- 3 files changed, 16 insertions(+), 13 deletions(-) (limited to 'include/linux/rcupdate.h') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f1562d91c67d..3717cad983a6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -112,6 +112,12 @@ static inline void rcu_user_enter(void) { } static inline void rcu_user_exit(void) { } #endif /* CONFIG_NO_HZ_FULL */ +#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) +void rcu_irq_work_resched(void); +#else +static inline void rcu_irq_work_resched(void) { } +#endif + #ifdef CONFIG_RCU_NOCB_CPU void rcu_init_nohz(void); int rcu_nocb_cpu_offload(int cpu); diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 7c3033e9a518..8cf59d8a6af6 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -177,6 +177,8 @@ static __always_inline void context_tracking_recursion_exit(void) */ void noinstr __ct_user_enter(enum ctx_state state) { + lockdep_assert_irqs_disabled(); + /* Kernel threads aren't supposed to go to userspace */ WARN_ON_ONCE(!current->mm); @@ -198,6 +200,12 @@ void noinstr __ct_user_enter(enum ctx_state state) vtime_user_enter(current); instrumentation_end(); } + /* + * Other than generic entry implementation, we may be past the last + * rescheduling opportunity in the entry code. Trigger a self IPI + * that will fire and reschedule once we resume in user/guest mode. + */ + rcu_irq_work_resched(); rcu_user_enter(); } /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 006939b29e82..8c0c3490532e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -681,7 +681,7 @@ static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) = * last resort is to fire a local irq_work that will trigger a reschedule once IRQs * get re-enabled again. */ -noinstr static void rcu_irq_work_resched(void) +noinstr void rcu_irq_work_resched(void) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); @@ -697,10 +697,7 @@ noinstr static void rcu_irq_work_resched(void) } instrumentation_end(); } - -#else -static inline void rcu_irq_work_resched(void) { } -#endif +#endif /* #if !defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK) */ /** * rcu_user_enter - inform RCU that we are resuming userspace. @@ -715,14 +712,6 @@ static inline void rcu_irq_work_resched(void) { } */ noinstr void rcu_user_enter(void) { - lockdep_assert_irqs_disabled(); - - /* - * Other than generic entry implementation, we may be past the last - * rescheduling opportunity in the entry code. Trigger a self IPI - * that will fire and reschedule once we resume in user/guest mode. - */ - rcu_irq_work_resched(); rcu_eqs_enter(true); } -- cgit v1.2.3 From c33ef43a359001415032665dfcd433979c462b71 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:34 +0200 Subject: rcu/context-tracking: Remove unused and/or unecessary middle functions Some eqs functions are now only used internally by context tracking, so their public declarations can be removed. Also middle functions such as rcu_user_*() and rcu_idle_*() which now directly call to rcu_eqs_enter() and rcu_eqs_exit() can be wiped out as well. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- Documentation/RCU/stallwarn.rst | 2 +- include/linux/hardirq.h | 8 ---- include/linux/rcupdate.h | 8 ---- include/linux/rcutiny.h | 2 - include/linux/rcutree.h | 2 - kernel/context_tracking.c | 98 ++++++++++++----------------------------- 6 files changed, 28 insertions(+), 92 deletions(-) (limited to 'include/linux/rcupdate.h') diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst index ce1f58a9d954..e38c587067fc 100644 --- a/Documentation/RCU/stallwarn.rst +++ b/Documentation/RCU/stallwarn.rst @@ -97,7 +97,7 @@ warnings: which will include additional debugging information. - A low-level kernel issue that either fails to invoke one of the - variants of rcu_user_enter(), rcu_user_exit(), ct_idle_enter(), + variants of rcu_eqs_enter(true), rcu_eqs_exit(true), ct_idle_enter(), ct_idle_exit(), ct_irq_enter(), or ct_irq_exit() on the one hand, or that invokes one of them too many times on the other. Historically, the most frequent issue has been an omission diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 345cdbe9c1b7..d57cab4d4c06 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -92,14 +92,6 @@ void irq_exit_rcu(void); #define arch_nmi_exit() do { } while (0) #endif -#ifdef CONFIG_TINY_RCU -static inline void rcu_nmi_enter(void) { } -static inline void rcu_nmi_exit(void) { } -#else -extern void rcu_nmi_enter(void); -extern void rcu_nmi_exit(void); -#endif - /* * NMI vs Tracing * -------------- diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3717cad983a6..434da1eb88cd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -104,14 +104,6 @@ static inline void rcu_sysrq_start(void) { } static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ -#ifdef CONFIG_NO_HZ_FULL -void rcu_user_enter(void); -void rcu_user_exit(void); -#else -static inline void rcu_user_enter(void) { } -static inline void rcu_user_exit(void) { } -#endif /* CONFIG_NO_HZ_FULL */ - #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) void rcu_irq_work_resched(void); #else diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 591119413cf1..900ba35c3582 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -76,8 +76,6 @@ static inline int rcu_needs_cpu(void) static inline void rcu_virt_note_context_switch(int cpu) { } static inline void rcu_cpu_stall_reset(void) { } static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; } -static inline void rcu_idle_enter(void) { } -static inline void rcu_idle_exit(void) { } static inline void rcu_irq_exit_check_preempt(void) { } #define rcu_is_idle_cpu(cpu) \ (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 24db1e41695c..9cca00ed9bc9 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -45,8 +45,6 @@ unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu(unsigned long oldstate); -void rcu_idle_enter(void); -void rcu_idle_exit(void); bool rcu_is_idle_cpu(int cpu); #ifdef CONFIG_PROVE_RCU diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 072c4b6044b3..e485b6b01537 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -189,17 +189,17 @@ static void noinstr rcu_eqs_exit(bool user) } /** - * rcu_nmi_exit - inform RCU of exit from NMI context + * ct_nmi_exit - inform RCU of exit from NMI context * * If we are returning from the outermost NMI handler that interrupted an * RCU-idle period, update ct->dynticks and ct->dynticks_nmi_nesting * to let the RCU grace-period handling know that the CPU is back to * being RCU-idle. * - * If you add or remove a call to rcu_nmi_exit(), be sure to test + * If you add or remove a call to ct_nmi_exit(), be sure to test * with CONFIG_RCU_EQS_DEBUG=y. */ -void noinstr rcu_nmi_exit(void) +void noinstr ct_nmi_exit(void) { struct context_tracking *ct = this_cpu_ptr(&context_tracking); @@ -242,7 +242,7 @@ void noinstr rcu_nmi_exit(void) } /** - * rcu_nmi_enter - inform RCU of entry to NMI context + * ct_nmi_enter - inform RCU of entry to NMI context * * If the CPU was idle from RCU's viewpoint, update ct->dynticks and * ct->dynticks_nmi_nesting to let the RCU grace-period handling know @@ -250,10 +250,10 @@ void noinstr rcu_nmi_exit(void) * long as the nesting level does not overflow an int. (You will probably * run out of stack space first.) * - * If you add or remove a call to rcu_nmi_enter(), be sure to test + * If you add or remove a call to ct_nmi_enter(), be sure to test * with CONFIG_RCU_EQS_DEBUG=y. */ -void noinstr rcu_nmi_enter(void) +void noinstr ct_nmi_enter(void) { long incby = 2; struct context_tracking *ct = this_cpu_ptr(&context_tracking); @@ -302,32 +302,33 @@ void noinstr rcu_nmi_enter(void) } /** - * rcu_idle_enter - inform RCU that current CPU is entering idle + * ct_idle_enter - inform RCU that current CPU is entering idle * * Enter idle mode, in other words, -leave- the mode in which RCU * read-side critical sections can occur. (Though RCU read-side * critical sections can occur in irq handlers in idle, a possibility * handled by irq_enter() and irq_exit().) * - * If you add or remove a call to rcu_idle_enter(), be sure to test with + * If you add or remove a call to ct_idle_enter(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ -void noinstr rcu_idle_enter(void) +void noinstr ct_idle_enter(void) { WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled()); rcu_eqs_enter(false); } +EXPORT_SYMBOL_GPL(ct_idle_enter); /** - * rcu_idle_exit - inform RCU that current CPU is leaving idle + * ct_idle_exit - inform RCU that current CPU is leaving idle * * Exit idle mode, in other words, -enter- the mode in which RCU * read-side critical sections can occur. * - * If you add or remove a call to rcu_idle_exit(), be sure to test with + * If you add or remove a call to ct_idle_exit(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ -void noinstr rcu_idle_exit(void) +void noinstr ct_idle_exit(void) { unsigned long flags; @@ -335,18 +336,6 @@ void noinstr rcu_idle_exit(void) rcu_eqs_exit(false); raw_local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(rcu_idle_exit); - -noinstr void ct_idle_enter(void) -{ - rcu_idle_enter(); -} -EXPORT_SYMBOL_GPL(ct_idle_enter); - -void ct_idle_exit(void) -{ - rcu_idle_exit(); -} EXPORT_SYMBOL_GPL(ct_idle_exit); /** @@ -431,50 +420,11 @@ void ct_irq_exit_irqson(void) ct_irq_exit(); local_irq_restore(flags); } - -noinstr void ct_nmi_enter(void) -{ - rcu_nmi_enter(); -} - -noinstr void ct_nmi_exit(void) -{ - rcu_nmi_exit(); -} +#else +static __always_inline void rcu_eqs_enter(bool user) { } +static __always_inline void rcu_eqs_exit(bool user) { } #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ -#ifdef CONFIG_NO_HZ_FULL -/** - * rcu_user_enter - inform RCU that we are resuming userspace. - * - * Enter RCU idle mode right before resuming userspace. No use of RCU - * is permitted between this call and rcu_user_exit(). This way the - * CPU doesn't need to maintain the tick for RCU maintenance purposes - * when the CPU runs in userspace. - * - * If you add or remove a call to rcu_user_enter(), be sure to test with - * CONFIG_RCU_EQS_DEBUG=y. - */ -noinstr void rcu_user_enter(void) -{ - rcu_eqs_enter(true); -} - -/** - * rcu_user_exit - inform RCU that we are exiting userspace. - * - * Exit RCU idle mode while entering the kernel because it can - * run a RCU read side critical section anytime. - * - * If you add or remove a call to rcu_user_exit(), be sure to test with - * CONFIG_RCU_EQS_DEBUG=y. - */ -void noinstr rcu_user_exit(void) -{ - rcu_eqs_exit(true); -} -#endif /* #ifdef CONFIG_NO_HZ_FULL */ - #ifdef CONFIG_CONTEXT_TRACKING_USER #define CREATE_TRACE_POINTS @@ -542,7 +492,13 @@ void noinstr __ct_user_enter(enum ctx_state state) * that will fire and reschedule once we resume in user/guest mode. */ rcu_irq_work_resched(); - rcu_user_enter(); + /* + * Enter RCU idle mode right before resuming userspace. No use of RCU + * is permitted between this call and rcu_eqs_exit(). This way the + * CPU doesn't need to maintain the tick for RCU maintenance purposes + * when the CPU runs in userspace. + */ + rcu_eqs_enter(true); } /* * Even if context tracking is disabled on this CPU, because it's outside @@ -579,7 +535,7 @@ void ct_user_enter(enum ctx_state state) /* * Some contexts may involve an exception occuring in an irq, * leading to that nesting: - * ct_irq_enter() rcu_user_exit() rcu_user_exit() ct_irq_exit() + * ct_irq_enter() rcu_eqs_exit(true) rcu_eqs_enter(true) ct_irq_exit() * This would mess up the dyntick_nesting count though. And rcu_irq_*() * helpers are enough to protect RCU uses inside the exception. So * just return immediately if we detect we are in an IRQ. @@ -631,10 +587,10 @@ void noinstr __ct_user_exit(enum ctx_state state) if (__this_cpu_read(context_tracking.state) == state) { if (__this_cpu_read(context_tracking.active)) { /* - * We are going to run code that may use RCU. Inform - * RCU core about that (ie: we may need the tick again). + * Exit RCU idle mode while entering the kernel because it can + * run a RCU read side critical section anytime. */ - rcu_user_exit(); + rcu_eqs_exit(true); if (state == CONTEXT_USER) { instrumentation_begin(); vtime_user_exit(current); -- cgit v1.2.3