summaryrefslogtreecommitdiff
path: root/kernel/rcu/tree_exp.h
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2018-06-21 12:50:01 -0700
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2018-08-30 16:02:34 -0700
commit3e31009898699dfca823893054748d85048dc7b3 (patch)
treedb39491a0402a8484e3fd8c71116cf80ef4ea275 /kernel/rcu/tree_exp.h
parentcf7614e13c8fcaf290c5ffaa04b2e1b4f704a52a (diff)
downloadlwn-3e31009898699dfca823893054748d85048dc7b3.tar.gz
lwn-3e31009898699dfca823893054748d85048dc7b3.zip
rcu: Defer reporting RCU-preempt quiescent states when disabled
This commit defers reporting of RCU-preempt quiescent states at rcu_read_unlock_special() time when any of interrupts, softirq, or preemption are disabled. These deferred quiescent states are reported at a later RCU_SOFTIRQ, context switch, idle entry, or CPU-hotplug offline operation. Of course, if another RCU read-side critical section has started in the meantime, the reporting of the quiescent state will be further deferred. This also means that disabling preemption, interrupts, and/or softirqs will act as an RCU-preempt read-side critical section. This is enforced by checking preempt_count() as needed. Some special cases must be handled on an ad-hoc basis, for example, context switch is a quiescent state even though both the scheduler and do_exit() disable preemption. In these cases, additional calls to rcu_preempt_deferred_qs() override the preemption disabling. Similar logic overrides disabled interrupts in rcu_preempt_check_callbacks() because in this case the quiescent state happened just before the corresponding scheduling-clock interrupt. In theory, this change lifts a long-standing restriction that required that if interrupts were disabled across a call to rcu_read_unlock() that the matching rcu_read_lock() also be contained within that interrupts-disabled region of code. Because the reporting of the corresponding RCU-preempt quiescent state is now deferred until after interrupts have been enabled, it is no longer possible for this situation to result in deadlocks involving the scheduler's runqueue and priority-inheritance locks. This may allow some code simplification that might reduce interrupt latency a bit. Unfortunately, in practice this would also defer deboosting a low-priority task that had been subjected to RCU priority boosting, so real-time-response considerations might well force this restriction to remain in place. Because RCU-preempt grace periods are now blocked not only by RCU read-side critical sections, but also by disabling of interrupts, preemption, and softirqs, it will be possible to eliminate RCU-bh and RCU-sched in favor of RCU-preempt in CONFIG_PREEMPT=y kernels. This may require some additional plumbing to provide the network denial-of-service guarantees that have been traditionally provided by RCU-bh. Once these are in place, CONFIG_PREEMPT=n kernels will be able to fold RCU-bh into RCU-sched. This would mean that all kernels would have but one flavor of RCU, which would open the door to significant code cleanup. Moving to a single flavor of RCU would also have the beneficial effect of reducing the NOCB kthreads by at least a factor of two. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> [ paulmck: Apply rcu_read_unlock_special() preempt_count() feedback from Joel Fernandes. ] [ paulmck: Adjust rcu_eqs_enter() call to rcu_preempt_deferred_qs() in response to bug reports from kbuild test robot. ] [ paulmck: Fix bug located by kbuild test robot involving recursion via rcu_preempt_deferred_qs(). ]
Diffstat (limited to 'kernel/rcu/tree_exp.h')
-rw-r--r--kernel/rcu/tree_exp.h71
1 files changed, 55 insertions, 16 deletions
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 0b2c2ad69629..f9d5bbd8adce 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -262,6 +262,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
bool wake)
{
+ WRITE_ONCE(rdp->deferred_qs, false);
rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
}
@@ -735,32 +736,70 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
*/
static void sync_rcu_exp_handler(void *info)
{
- struct rcu_data *rdp;
+ unsigned long flags;
struct rcu_state *rsp = info;
+ struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
+ struct rcu_node *rnp = rdp->mynode;
struct task_struct *t = current;
/*
- * Within an RCU read-side critical section, request that the next
- * rcu_read_unlock() report. Unless this RCU read-side critical
- * section has already blocked, in which case it is already set
- * up for the expedited grace period to wait on it.
+ * First, the common case of not being in an RCU read-side
+ * critical section. If also enabled or idle, immediately
+ * report the quiescent state, otherwise defer.
*/
- if (t->rcu_read_lock_nesting > 0 &&
- !t->rcu_read_unlock_special.b.blocked) {
- t->rcu_read_unlock_special.b.exp_need_qs = true;
+ if (!t->rcu_read_lock_nesting) {
+ if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
+ rcu_dynticks_curr_cpu_in_eqs()) {
+ rcu_report_exp_rdp(rsp, rdp, true);
+ } else {
+ rdp->deferred_qs = true;
+ resched_cpu(rdp->cpu);
+ }
return;
}
/*
- * We are either exiting an RCU read-side critical section (negative
- * values of t->rcu_read_lock_nesting) or are not in one at all
- * (zero value of t->rcu_read_lock_nesting). Or we are in an RCU
- * read-side critical section that blocked before this expedited
- * grace period started. Either way, we can immediately report
- * the quiescent state.
+ * Second, the less-common case of being in an RCU read-side
+ * critical section. In this case we can count on a future
+ * rcu_read_unlock(). However, this rcu_read_unlock() might
+ * execute on some other CPU, but in that case there will be
+ * a future context switch. Either way, if the expedited
+ * grace period is still waiting on this CPU, set ->deferred_qs
+ * so that the eventual quiescent state will be reported.
+ * Note that there is a large group of race conditions that
+ * can have caused this quiescent state to already have been
+ * reported, so we really do need to check ->expmask.
*/
- rdp = this_cpu_ptr(rsp->rda);
- rcu_report_exp_rdp(rsp, rdp, true);
+ if (t->rcu_read_lock_nesting > 0) {
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ if (rnp->expmask & rdp->grpmask)
+ rdp->deferred_qs = true;
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ }
+
+ /*
+ * The final and least likely case is where the interrupted
+ * code was just about to or just finished exiting the RCU-preempt
+ * read-side critical section, and no, we can't tell which.
+ * So either way, set ->deferred_qs to flag later code that
+ * a quiescent state is required.
+ *
+ * If the CPU is fully enabled (or if some buggy RCU-preempt
+ * read-side critical section is being used from idle), just
+ * invoke rcu_preempt_defer_qs() to immediately report the
+ * quiescent state. We cannot use rcu_read_unlock_special()
+ * because we are in an interrupt handler, which will cause that
+ * function to take an early exit without doing anything.
+ *
+ * Otherwise, use resched_cpu() to force a context switch after
+ * the CPU enables everything.
+ */
+ rdp->deferred_qs = true;
+ if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
+ WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()))
+ rcu_preempt_deferred_qs(t);
+ else
+ resched_cpu(rdp->cpu);
}
/**