diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2018-04-30 10:57:36 -0700 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2018-07-12 14:27:55 -0700 |
commit | 29365e563b1e4e5bfde211280d37dc6127c019ed (patch) | |
tree | 867de647cb22b8309caa2ba947c16cbedd154f8f /kernel/rcu/tree.c | |
parent | d43a5d32e125db1e34641922e52baaa4fee3510a (diff) | |
download | lwn-29365e563b1e4e5bfde211280d37dc6127c019ed.tar.gz lwn-29365e563b1e4e5bfde211280d37dc6127c019ed.zip |
rcu: Convert grace-period requests to ->gp_seq
This commit converts the grace-period request code paths from ->completed
and ->gpnum to ->gp_seq. The need_future_gp_element() macro encapsulates
the shift operation required to use ->gp_seq as an index to the
->need_future_gp[] array. The rcu_cbs_completed() function is removed
in favor of the rcu_seq_snap() function. The rcu_start_this_gp()
gets some temporary consistency checks and uses rcu_seq_done(),
rcu_seq_current(), rcu_seq_state(), and rcu_gp_in_progress() in place
of the earlier open-coded comparisons of ->gpnum and ->completed.
The rcu_future_gp_cleanup() function replaces use of ->completed
with ->gp_seq. The rcu_accelerate_cbs() function replaces a call to
rcu_cbs_completed() with one to rcu_seq_snap(). The rcu_advance_cbs()
function replaces an access to >completed with one to ->gp_seq and adds
some temporary warnings. The rcu_nocb_wait_gp() function replaces a
call to rcu_cbs_completed() with one to rcu_seq_snap() and an open-coded
comparison with rcu_seq_done().
The temporary warnings will be removed when the various ->gpnum and
->completed fields are removed. Their purpose is to locate code who
might still be using ->gpnum and ->completed. (Much easier that way
than trying to trace down the causes of too-short grace periods and
grace-period hangs!)
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r-- | kernel/rcu/tree.c | 84 |
1 files changed, 20 insertions, 64 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index da2ca9cc078b..ffa45b6175d9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1548,52 +1548,6 @@ void rcu_cpu_stall_reset(void) WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2); } -/* - * Determine the value that ->completed will have at the end of the - * next subsequent grace period. This is used to tag callbacks so that - * a CPU can invoke callbacks in a timely fashion even if that CPU has - * been dyntick-idle for an extended period with callbacks under the - * influence of RCU_FAST_NO_HZ. - * - * The caller must hold rnp->lock with interrupts disabled. - */ -static unsigned long rcu_cbs_completed(struct rcu_state *rsp, - struct rcu_node *rnp) -{ - raw_lockdep_assert_held_rcu_node(rnp); - - /* - * If RCU is idle, we just wait for the next grace period. - * But we can only be sure that RCU is idle if we are looking - * at the root rcu_node structure -- otherwise, a new grace - * period might have started, but just not yet gotten around - * to initializing the current non-root rcu_node structure. - */ - if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed) - return rnp->completed + 1; - - /* - * If the current rcu_node structure believes that RCU is - * idle, and if the rcu_state structure does not yet reflect - * the start of a new grace period, then the next grace period - * will suffice. The memory barrier is needed to accurately - * sample the rsp->gpnum, and pairs with the second lock - * acquisition in rcu_gp_init(), which is augmented with - * smp_mb__after_unlock_lock() for this purpose. - */ - if (rnp->gpnum == rnp->completed) { - smp_mb(); /* See above block comment. */ - if (READ_ONCE(rsp->gpnum) == rnp->completed) - return rnp->completed + 1; - } - - /* - * Otherwise, wait for a possible partial grace period and - * then the subsequent full grace period. - */ - return rnp->completed + 2; -} - /* Trace-event wrapper function for trace_rcu_future_grace_period. */ static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, unsigned long c, const char *s) @@ -1629,16 +1583,16 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, * not be released. */ raw_lockdep_assert_held_rcu_node(rnp); + WARN_ON_ONCE(c & 0x2); /* Catch any lingering use of ->gpnum. */ + WARN_ON_ONCE(((rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT) != rcu_seq_ctr(rnp->gp_seq)); /* Catch any ->completed/->gp_seq mismatches. */ trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf")); for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) { if (rnp_root != rnp) raw_spin_lock_rcu_node(rnp_root); - WARN_ON_ONCE(ULONG_CMP_LT(rnp_root->gpnum + - need_future_gp_mask(), c)); if (need_future_gp_element(rnp_root, c) || - ULONG_CMP_GE(rnp_root->gpnum, c) || + rcu_seq_done(&rnp_root->gp_seq, c) || (rnp != rnp_root && - rnp_root->gpnum != rnp_root->completed)) { + rcu_seq_state(rcu_seq_current(&rnp_root->gp_seq)))) { trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted")); goto unlock_out; } @@ -1650,7 +1604,7 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, } /* If GP already in progress, just leave, otherwise start one. */ - if (rnp_root->gpnum != rnp_root->completed) { + if (rcu_gp_in_progress(rsp)) { trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedleafroot")); goto unlock_out; } @@ -1675,7 +1629,7 @@ unlock_out: */ static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) { - unsigned long c = rnp->completed; + unsigned long c = rnp->gp_seq; bool needmore; struct rcu_data *rdp = this_cpu_ptr(rsp->rda); @@ -1703,14 +1657,14 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp) } /* - * If there is room, assign a ->completed number to any callbacks on - * this CPU that have not already been assigned. Also accelerate any - * callbacks that were previously assigned a ->completed number that has - * since proven to be too conservative, which can happen if callbacks get - * assigned a ->completed number while RCU is idle, but with reference to - * a non-root rcu_node structure. This function is idempotent, so it does - * not hurt to call it repeatedly. Returns an flag saying that we should - * awaken the RCU grace-period kthread. + * If there is room, assign a ->gp_seq number to any callbacks on this + * CPU that have not already been assigned. Also accelerate any callbacks + * that were previously assigned a ->gp_seq number that has since proven + * to be too conservative, which can happen if callbacks get assigned a + * ->gp_seq number while RCU is idle, but with reference to a non-root + * rcu_node structure. This function is idempotent, so it does not hurt + * to call it repeatedly. Returns an flag saying that we should awaken + * the RCU grace-period kthread. * * The caller must hold rnp->lock with interrupts disabled. */ @@ -1736,7 +1690,7 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, * accelerating callback invocation to an earlier grace-period * number. */ - c = rcu_cbs_completed(rsp, rnp); + c = rcu_seq_snap(&rsp->gp_seq); if (rcu_segcblist_accelerate(&rdp->cblist, c)) ret = rcu_start_this_gp(rnp, rdp, c); @@ -1751,7 +1705,7 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, /* * Move any callbacks whose grace period has completed to the * RCU_DONE_TAIL sublist, then compact the remaining sublists and - * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL + * assign ->gp_seq numbers to any callbacks in the RCU_NEXT_TAIL * sublist. This function is idempotent, so it does not hurt to * invoke it repeatedly. As long as it is not invoked -too- often... * Returns true if the RCU grace-period kthread needs to be awakened. @@ -1768,10 +1722,10 @@ static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, return false; /* - * Find all callbacks whose ->completed numbers indicate that they + * Find all callbacks whose ->gp_seq numbers indicate that they * are ready to invoke, and put them into the RCU_DONE_TAIL sublist. */ - rcu_segcblist_advance(&rdp->cblist, rnp->completed); + rcu_segcblist_advance(&rdp->cblist, rnp->gp_seq); /* Classify any remaining callbacks. */ return rcu_accelerate_cbs(rsp, rnp, rdp); @@ -1889,6 +1843,8 @@ static bool rcu_gp_init(struct rcu_state *rsp) smp_store_release(&rsp->gpnum, rsp->gpnum + 1); smp_mb(); /* Pairs with barriers in stall-warning code. */ rcu_seq_start(&rsp->gp_seq); + if (WARN_ON_ONCE(((rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT) != rcu_seq_ctr(rnp->gp_seq))) /* Catch any ->completed/->gp_seq mismatches. */ + pr_info("%s ->completed: %#lx (%#lx) ->gp_seq %#lx (%#lx)\n", __func__, rnp->completed, (rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT, rnp->gp_seq, rcu_seq_ctr(rnp->gp_seq)); trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); raw_spin_unlock_irq_rcu_node(rnp); |