summaryrefslogtreecommitdiff
path: root/kernel/rcu
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/Kconfig37
-rw-r--r--kernel/rcu/Kconfig.debug18
-rw-r--r--kernel/rcu/rcu.h13
-rw-r--r--kernel/rcu/rcutorture.c126
-rw-r--r--kernel/rcu/refscale.c32
-rw-r--r--kernel/rcu/srcutiny.c20
-rw-r--r--kernel/rcu/srcutree.c209
-rw-r--r--kernel/rcu/tasks.h7
-rw-r--r--kernel/rcu/tiny.c29
-rw-r--r--kernel/rcu/tree.c73
-rw-r--r--kernel/rcu/tree_exp.h6
-rw-r--r--kernel/rcu/tree_nocb.h24
-rw-r--r--kernel/rcu/tree_plugin.h22
13 files changed, 424 insertions, 192 deletions
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index b9b6bc55185d..4d9b21f69eaa 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -18,7 +18,7 @@ config TREE_RCU
config PREEMPT_RCU
bool
- default y if PREEMPTION
+ default y if (PREEMPT || PREEMPT_RT || PREEMPT_DYNAMIC)
select TREE_RCU
help
This option selects the RCU implementation that is
@@ -65,6 +65,19 @@ config TREE_SRCU
help
This option selects the full-fledged version of SRCU.
+config FORCE_NEED_SRCU_NMI_SAFE
+ bool "Force selection of NEED_SRCU_NMI_SAFE"
+ depends on !TINY_SRCU
+ depends on RCU_EXPERT
+ depends on ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
+ select NEED_SRCU_NMI_SAFE
+ default n
+ help
+ This option forces selection of the NEED_SRCU_NMI_SAFE
+ Kconfig option, allowing testing of srcu_read_lock_nmisafe()
+ and srcu_read_unlock_nmisafe() on architectures (like x86)
+ that select the ARCH_HAS_NMI_SAFE_THIS_CPU_OPS Kconfig option.
+
config NEED_SRCU_NMI_SAFE
def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU
@@ -91,7 +104,7 @@ config NEED_TASKS_RCU
config TASKS_RCU
bool
- default NEED_TASKS_RCU && (PREEMPTION || PREEMPT_AUTO)
+ default NEED_TASKS_RCU && PREEMPTION
select IRQ_WORK
config FORCE_TASKS_RUDE_RCU
@@ -323,21 +336,27 @@ config RCU_LAZY
depends on RCU_NOCB_CPU
default n
help
- To save power, batch RCU callbacks and flush after delay, memory
- pressure, or callback list growing too big.
+ To save power, batch RCU callbacks and delay starting the
+ corresponding grace period for multiple seconds. The grace
+ period will be started after this delay, in case of memory
+ pressure, or if the corresponding CPU's callback list grows
+ too large.
- Requires rcu_nocbs=all to be set.
+ These delays happen only on rcu_nocbs CPUs, that is, CPUs
+ whose callbacks have been offloaded.
- Use rcutree.enable_rcu_lazy=0 to turn it off at boot time.
+ Use the rcutree.enable_rcu_lazy=0 kernel-boot parameter to
+ globally disable these delays.
config RCU_LAZY_DEFAULT_OFF
bool "Turn RCU lazy invocation off by default"
depends on RCU_LAZY
default n
help
- Allows building the kernel with CONFIG_RCU_LAZY=y yet keep it default
- off. Boot time param rcutree.enable_rcu_lazy=1 can be used to switch
- it back on.
+ Build the kernel with CONFIG_RCU_LAZY=y, but cause the kernel
+ to boot with these energy-efficiency delays disabled. Use the
+ rcutree.enable_rcu_lazy=0 kernel-boot parameter to override
+ the this option at boot time, thus re-enabling these delays.
config RCU_DOUBLE_CHECK_CB_TIME
bool "RCU callback-batch backup time check"
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 6af90510a1ca..12e4c64ebae1 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -54,7 +54,7 @@ config RCU_TORTURE_TEST
Say N if you are unsure.
config RCU_TORTURE_TEST_CHK_RDR_STATE
- tristate "Check rcutorture reader state"
+ bool "Check rcutorture reader state"
depends on RCU_TORTURE_TEST
default n
help
@@ -70,7 +70,7 @@ config RCU_TORTURE_TEST_CHK_RDR_STATE
Say N if you are unsure.
config RCU_TORTURE_TEST_LOG_CPU
- tristate "Log CPU for rcutorture failures"
+ bool "Log CPU for rcutorture failures"
depends on RCU_TORTURE_TEST
default n
help
@@ -84,6 +84,20 @@ config RCU_TORTURE_TEST_LOG_CPU
Say Y here if you want CPU IDs logged.
Say N if you are unsure.
+config RCU_TORTURE_TEST_LOG_GP
+ bool "Log grace-period numbers for rcutorture failures"
+ depends on RCU_TORTURE_TEST
+ default n
+ help
+ This option causes rcutorture to decorate each entry of its
+ log of failure/close-call rcutorture reader segments with the
+ corresponding grace-period sequence numbers. This information
+ can be useful, but it does incur additional overhead, overhead
+ that can make both failures and close calls less probable.
+
+ Say Y here if you want grace-period sequence numbers logged.
+ Say N if you are unsure.
+
config RCU_REF_SCALE_TEST
tristate "Scalability tests for read-side synchronization (RCU and others)"
depends on DEBUG_KERNEL
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index feb3ac1dc5d5..eed2951a4962 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -162,7 +162,7 @@ static inline bool rcu_seq_done_exact(unsigned long *sp, unsigned long s)
{
unsigned long cur_s = READ_ONCE(*sp);
- return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (2 * RCU_SEQ_STATE_MASK + 1));
+ return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (3 * RCU_SEQ_STATE_MASK + 1));
}
/*
@@ -590,6 +590,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
#endif
static inline void rcu_gp_set_torture_wait(int duration) { }
#endif
+unsigned long long rcutorture_gather_gp_seqs(void);
+void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
#ifdef CONFIG_TINY_SRCU
@@ -611,8 +613,6 @@ void srcutorture_get_gp_data(struct srcu_struct *sp, int *flags,
static inline bool rcu_watching_zero_in_eqs(int cpu, int *vp) { return false; }
static inline unsigned long rcu_get_gp_seq(void) { return 0; }
static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
-static inline unsigned long
-srcu_batches_completed(struct srcu_struct *sp) { return 0; }
static inline void rcu_force_quiescent_state(void) { }
static inline bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) { return true; }
static inline void show_rcu_gp_kthreads(void) { }
@@ -624,7 +624,6 @@ static inline void rcu_gp_slow_unregister(atomic_t *rgssp) { }
bool rcu_watching_zero_in_eqs(int cpu, int *vp);
unsigned long rcu_get_gp_seq(void);
unsigned long rcu_exp_batches_completed(void);
-unsigned long srcu_batches_completed(struct srcu_struct *sp);
bool rcu_check_boost_fail(unsigned long gp_state, int *cpup);
void show_rcu_gp_kthreads(void);
int rcu_get_gp_kthreads_prio(void);
@@ -636,6 +635,12 @@ void rcu_gp_slow_register(atomic_t *rgssp);
void rcu_gp_slow_unregister(atomic_t *rgssp);
#endif /* #else #ifdef CONFIG_TINY_RCU */
+#ifdef CONFIG_TINY_SRCU
+static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) { return 0; }
+#else // #ifdef CONFIG_TINY_SRCU
+unsigned long srcu_batches_completed(struct srcu_struct *sp);
+#endif // #else // #ifdef CONFIG_TINY_SRCU
+
#ifdef CONFIG_RCU_NOCB_CPU
void rcu_bind_current_to_nocb(void);
#else
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d26fb1d33ed9..4fa7772be183 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -135,6 +135,7 @@ torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s
torture_param(int, stutter, 5, "Number of seconds to run/halt test");
torture_param(int, test_boost, 1, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
torture_param(int, test_boost_duration, 4, "Duration of each boost test, seconds.");
+torture_param(int, test_boost_holdoff, 0, "Holdoff time from rcutorture start, seconds.");
torture_param(int, test_boost_interval, 7, "Interval between boost tests, seconds.");
torture_param(int, test_nmis, 0, "End-test NMI tests, 0 to disable.");
torture_param(bool, test_no_idle_hz, true, "Test support for tickless idle CPUs");
@@ -147,6 +148,7 @@ MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, srcu, ...)");
static int nrealnocbers;
static int nrealreaders;
+static int nrealfakewriters;
static struct task_struct *writer_task;
static struct task_struct **fakewriter_tasks;
static struct task_struct **reader_tasks;
@@ -272,6 +274,9 @@ struct rt_read_seg {
bool rt_preempted;
int rt_cpu;
int rt_end_cpu;
+ unsigned long long rt_gp_seq;
+ unsigned long long rt_gp_seq_end;
+ u64 rt_ts;
};
static int err_segs_recorded;
static struct rt_read_seg err_segs[RCUTORTURE_RDR_MAX_SEGS];
@@ -406,6 +411,8 @@ struct rcu_torture_ops {
void (*gp_slow_register)(atomic_t *rgssp);
void (*gp_slow_unregister)(atomic_t *rgssp);
bool (*reader_blocked)(void);
+ unsigned long long (*gather_gp_seqs)(void);
+ void (*format_gp_seqs)(unsigned long long seqs, char *cp, size_t len);
long cbflood_max;
int irq_capable;
int can_boost;
@@ -610,6 +617,8 @@ static struct rcu_torture_ops rcu_ops = {
.reader_blocked = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)
? has_rcu_reader_blocked
: NULL,
+ .gather_gp_seqs = rcutorture_gather_gp_seqs,
+ .format_gp_seqs = rcutorture_format_gp_seqs,
.irq_capable = 1,
.can_boost = IS_ENABLED(CONFIG_RCU_BOOST),
.extendables = RCUTORTURE_MAX_EXTEND,
@@ -655,6 +664,8 @@ static struct rcu_torture_ops rcu_busted_ops = {
.sync = synchronize_rcu_busted,
.exp_sync = synchronize_rcu_busted,
.call = call_rcu_busted,
+ .gather_gp_seqs = rcutorture_gather_gp_seqs,
+ .format_gp_seqs = rcutorture_format_gp_seqs,
.irq_capable = 1,
.extendables = RCUTORTURE_MAX_EXTEND,
.name = "busted"
@@ -677,8 +688,11 @@ static void srcu_get_gp_data(int *flags, unsigned long *gp_seq)
static int srcu_torture_read_lock(void)
{
int idx;
+ struct srcu_ctr __percpu *scp;
int ret = 0;
+ WARN_ON_ONCE(reader_flavor & ~SRCU_READ_FLAVOR_ALL);
+
if ((reader_flavor & SRCU_READ_FLAVOR_NORMAL) || !(reader_flavor & SRCU_READ_FLAVOR_ALL)) {
idx = srcu_read_lock(srcu_ctlp);
WARN_ON_ONCE(idx & ~0x1);
@@ -694,6 +708,12 @@ static int srcu_torture_read_lock(void)
WARN_ON_ONCE(idx & ~0x1);
ret += idx << 2;
}
+ if (reader_flavor & SRCU_READ_FLAVOR_FAST) {
+ scp = srcu_read_lock_fast(srcu_ctlp);
+ idx = __srcu_ptr_to_ctr(srcu_ctlp, scp);
+ WARN_ON_ONCE(idx & ~0x1);
+ ret += idx << 3;
+ }
return ret;
}
@@ -719,6 +739,8 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
static void srcu_torture_read_unlock(int idx)
{
WARN_ON_ONCE((reader_flavor && (idx & ~reader_flavor)) || (!reader_flavor && (idx & ~0x1)));
+ if (reader_flavor & SRCU_READ_FLAVOR_FAST)
+ srcu_read_unlock_fast(srcu_ctlp, __srcu_ctr_to_ptr(srcu_ctlp, (idx & 0x8) >> 3));
if (reader_flavor & SRCU_READ_FLAVOR_LITE)
srcu_read_unlock_lite(srcu_ctlp, (idx & 0x4) >> 2);
if (reader_flavor & SRCU_READ_FLAVOR_NMI)
@@ -791,6 +813,7 @@ static struct rcu_torture_ops srcu_ops = {
.readunlock = srcu_torture_read_unlock,
.readlock_held = torture_srcu_read_lock_held,
.get_gp_seq = srcu_torture_completed,
+ .gp_diff = rcu_seq_diff,
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
.exp_sync = srcu_torture_synchronize_expedited,
@@ -834,6 +857,7 @@ static struct rcu_torture_ops srcud_ops = {
.readunlock = srcu_torture_read_unlock,
.readlock_held = torture_srcu_read_lock_held,
.get_gp_seq = srcu_torture_completed,
+ .gp_diff = rcu_seq_diff,
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
.exp_sync = srcu_torture_synchronize_expedited,
@@ -1148,8 +1172,19 @@ static int rcu_torture_boost(void *arg)
unsigned long gp_state;
unsigned long gp_state_time;
unsigned long oldstarttime;
+ unsigned long booststarttime = get_torture_init_jiffies() + test_boost_holdoff * HZ;
- VERBOSE_TOROUT_STRING("rcu_torture_boost started");
+ if (test_boost_holdoff <= 0 || time_after(jiffies, booststarttime)) {
+ VERBOSE_TOROUT_STRING("rcu_torture_boost started");
+ } else {
+ VERBOSE_TOROUT_STRING("rcu_torture_boost started holdoff period");
+ while (time_before(jiffies, booststarttime)) {
+ schedule_timeout_idle(HZ);
+ if (kthread_should_stop())
+ goto cleanup;
+ }
+ VERBOSE_TOROUT_STRING("rcu_torture_boost finished holdoff period");
+ }
/* Set real-time priority. */
sched_set_fifo_low(current);
@@ -1225,6 +1260,7 @@ checkwait: if (stutter_wait("rcu_torture_boost"))
sched_set_fifo_low(current);
} while (!torture_must_stop());
+cleanup:
/* Clean up and exit. */
while (!kthread_should_stop()) {
torture_shutdown_absorb("rcu_torture_boost");
@@ -1728,7 +1764,7 @@ rcu_torture_fakewriter(void *arg)
do {
torture_hrtimeout_jiffies(torture_random(&rand) % 10, &rand);
if (cur_ops->cb_barrier != NULL &&
- torture_random(&rand) % (nfakewriters * 8) == 0) {
+ torture_random(&rand) % (nrealfakewriters * 8) == 0) {
cur_ops->cb_barrier();
} else {
switch (synctype[torture_random(&rand) % nsynctypes]) {
@@ -1873,6 +1909,8 @@ static void rcu_torture_reader_do_mbchk(long myid, struct rcu_torture *rtp,
#define ROEC_ARGS "%s %s: Current %#x To add %#x To remove %#x preempt_count() %#x\n", __func__, s, curstate, new, old, preempt_count()
static void rcutorture_one_extend_check(char *s, int curstate, int new, int old, bool insoftirq)
{
+ int mask;
+
if (!IS_ENABLED(CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE))
return;
@@ -1899,11 +1937,27 @@ static void rcutorture_one_extend_check(char *s, int curstate, int new, int old,
WARN_ONCE(cur_ops->extendables &&
!(curstate & (RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH)) &&
(preempt_count() & SOFTIRQ_MASK), ROEC_ARGS);
- WARN_ONCE(cur_ops->extendables &&
- !(curstate & (RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED)) &&
+
+ /*
+ * non-preemptible RCU in a preemptible kernel uses preempt_disable()
+ * as rcu_read_lock().
+ */
+ mask = RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED;
+ if (!IS_ENABLED(CONFIG_PREEMPT_RCU))
+ mask |= RCUTORTURE_RDR_RCU_1 | RCUTORTURE_RDR_RCU_2;
+
+ WARN_ONCE(cur_ops->extendables && !(curstate & mask) &&
(preempt_count() & PREEMPT_MASK), ROEC_ARGS);
- WARN_ONCE(cur_ops->readlock_nesting &&
- !(curstate & (RCUTORTURE_RDR_RCU_1 | RCUTORTURE_RDR_RCU_2)) &&
+
+ /*
+ * non-preemptible RCU in a preemptible kernel uses "preempt_count() &
+ * PREEMPT_MASK" as ->readlock_nesting().
+ */
+ mask = RCUTORTURE_RDR_RCU_1 | RCUTORTURE_RDR_RCU_2;
+ if (!IS_ENABLED(CONFIG_PREEMPT_RCU))
+ mask |= RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED;
+
+ WARN_ONCE(cur_ops->readlock_nesting && !(curstate & mask) &&
cur_ops->readlock_nesting() > 0, ROEC_ARGS);
}
@@ -1965,6 +2019,13 @@ static void rcutorture_one_extend(int *readstate, int newstate, bool insoftirq,
rtrsp[-1].rt_preempted = cur_ops->reader_blocked();
}
}
+ // Sample grace-period sequence number, as good a place as any.
+ if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_GP) && cur_ops->gather_gp_seqs) {
+ rtrsp->rt_gp_seq = cur_ops->gather_gp_seqs();
+ rtrsp->rt_ts = ktime_get_mono_fast_ns();
+ if (!first)
+ rtrsp[-1].rt_gp_seq_end = rtrsp->rt_gp_seq;
+ }
/*
* Next, remove old protection, in decreasing order of strength
@@ -2263,7 +2324,7 @@ rcu_torture_reader(void *arg)
stutter_wait("rcu_torture_reader");
} while (!torture_must_stop());
if (irqreader && cur_ops->irq_capable) {
- del_timer_sync(&t);
+ timer_delete_sync(&t);
destroy_timer_on_stack(&t);
}
tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
@@ -2512,7 +2573,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
"shuffle_interval=%d stutter=%d irqreader=%d "
"fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
"test_boost=%d/%d test_boost_interval=%d "
- "test_boost_duration=%d shutdown_secs=%d "
+ "test_boost_duration=%d test_boost_holdoff=%d shutdown_secs=%d "
"stall_cpu=%d stall_cpu_holdoff=%d stall_cpu_irqsoff=%d "
"stall_cpu_block=%d stall_cpu_repeat=%d "
"n_barrier_cbs=%d "
@@ -2522,11 +2583,11 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
"nocbs_nthreads=%d nocbs_toggle=%d "
"test_nmis=%d "
"preempt_duration=%d preempt_interval=%d\n",
- torture_type, tag, nrealreaders, nfakewriters,
+ torture_type, tag, nrealreaders, nrealfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
test_boost, cur_ops->can_boost,
- test_boost_interval, test_boost_duration, shutdown_secs,
+ test_boost_interval, test_boost_duration, test_boost_holdoff, shutdown_secs,
stall_cpu, stall_cpu_holdoff, stall_cpu_irqsoff,
stall_cpu_block, stall_cpu_repeat,
n_barrier_cbs,
@@ -3553,6 +3614,7 @@ rcu_torture_cleanup(void)
int flags = 0;
unsigned long gp_seq = 0;
int i;
+ int j;
if (torture_cleanup_begin()) {
if (cur_ops->cb_barrier != NULL) {
@@ -3597,7 +3659,7 @@ rcu_torture_cleanup(void)
rcu_torture_reader_mbchk = NULL;
if (fakewriter_tasks) {
- for (i = 0; i < nfakewriters; i++)
+ for (i = 0; i < nrealfakewriters; i++)
torture_stop_kthread(rcu_torture_fakewriter,
fakewriter_tasks[i]);
kfree(fakewriter_tasks);
@@ -3635,7 +3697,11 @@ rcu_torture_cleanup(void)
pr_alert("\t: No segments recorded!!!\n");
firsttime = 1;
for (i = 0; i < rt_read_nsegs; i++) {
- pr_alert("\t%d: %#4x", i, err_segs[i].rt_readstate);
+ if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_GP))
+ pr_alert("\t%lluus ", div64_u64(err_segs[i].rt_ts, 1000ULL));
+ else
+ pr_alert("\t");
+ pr_cont("%d: %#4x", i, err_segs[i].rt_readstate);
if (err_segs[i].rt_delay_jiffies != 0) {
pr_cont("%s%ldjiffies", firsttime ? "" : "+",
err_segs[i].rt_delay_jiffies);
@@ -3648,6 +3714,27 @@ rcu_torture_cleanup(void)
else
pr_cont(" ...");
}
+ if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_GP) &&
+ cur_ops->gather_gp_seqs && cur_ops->format_gp_seqs) {
+ char buf1[20+1];
+ char buf2[20+1];
+ char sepchar = '-';
+
+ cur_ops->format_gp_seqs(err_segs[i].rt_gp_seq,
+ buf1, ARRAY_SIZE(buf1));
+ cur_ops->format_gp_seqs(err_segs[i].rt_gp_seq_end,
+ buf2, ARRAY_SIZE(buf2));
+ if (err_segs[i].rt_gp_seq == err_segs[i].rt_gp_seq_end) {
+ if (buf2[0]) {
+ for (j = 0; buf2[j]; j++)
+ buf2[j] = '.';
+ if (j)
+ buf2[j - 1] = ' ';
+ }
+ sepchar = ' ';
+ }
+ pr_cont(" %s%c%s", buf1, sepchar, buf2);
+ }
if (err_segs[i].rt_delay_ms != 0) {
pr_cont(" %s%ldms", firsttime ? "" : "+",
err_segs[i].rt_delay_ms);
@@ -3994,6 +4081,14 @@ rcu_torture_init(void)
rcu_torture_init_srcu_lockdep();
+ if (nfakewriters >= 0) {
+ nrealfakewriters = nfakewriters;
+ } else {
+ nrealfakewriters = num_online_cpus() - 2 - nfakewriters;
+ if (nrealfakewriters <= 0)
+ nrealfakewriters = 1;
+ }
+
if (nreaders >= 0) {
nrealreaders = nreaders;
} else {
@@ -4050,8 +4145,9 @@ rcu_torture_init(void)
writer_task);
if (torture_init_error(firsterr))
goto unwind;
- if (nfakewriters > 0) {
- fakewriter_tasks = kcalloc(nfakewriters,
+
+ if (nrealfakewriters > 0) {
+ fakewriter_tasks = kcalloc(nrealfakewriters,
sizeof(fakewriter_tasks[0]),
GFP_KERNEL);
if (fakewriter_tasks == NULL) {
@@ -4060,7 +4156,7 @@ rcu_torture_init(void)
goto unwind;
}
}
- for (i = 0; i < nfakewriters; i++) {
+ for (i = 0; i < nrealfakewriters; i++) {
firsterr = torture_create_kthread(rcu_torture_fakewriter,
NULL, fakewriter_tasks[i]);
if (torture_init_error(firsterr))
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index 1b47376acdc4..f11a7c2af778 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -216,6 +216,36 @@ static const struct ref_scale_ops srcu_ops = {
.name = "srcu"
};
+static void srcu_fast_ref_scale_read_section(const int nloops)
+{
+ int i;
+ struct srcu_ctr __percpu *scp;
+
+ for (i = nloops; i >= 0; i--) {
+ scp = srcu_read_lock_fast(srcu_ctlp);
+ srcu_read_unlock_fast(srcu_ctlp, scp);
+ }
+}
+
+static void srcu_fast_ref_scale_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+ struct srcu_ctr __percpu *scp;
+
+ for (i = nloops; i >= 0; i--) {
+ scp = srcu_read_lock_fast(srcu_ctlp);
+ un_delay(udl, ndl);
+ srcu_read_unlock_fast(srcu_ctlp, scp);
+ }
+}
+
+static const struct ref_scale_ops srcu_fast_ops = {
+ .init = rcu_sync_scale_init,
+ .readsection = srcu_fast_ref_scale_read_section,
+ .delaysection = srcu_fast_ref_scale_delay_section,
+ .name = "srcu-fast"
+};
+
static void srcu_lite_ref_scale_read_section(const int nloops)
{
int i;
@@ -1163,7 +1193,7 @@ ref_scale_init(void)
long i;
int firsterr = 0;
static const struct ref_scale_ops *scale_ops[] = {
- &rcu_ops, &srcu_ops, &srcu_lite_ops, RCU_TRACE_OPS RCU_TASKS_OPS
+ &rcu_ops, &srcu_ops, &srcu_fast_ops, &srcu_lite_ops, RCU_TRACE_OPS RCU_TASKS_OPS
&refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops,
&acqrel_ops, &sched_clock_ops, &clock_ops, &jiffies_ops,
&typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 4dcbf8aa80ff..6e9fe2ce1075 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -20,7 +20,11 @@
#include "rcu_segcblist.h"
#include "rcu.h"
+#ifndef CONFIG_TREE_RCU
int rcu_scheduler_active __read_mostly;
+#else // #ifndef CONFIG_TREE_RCU
+extern int rcu_scheduler_active;
+#endif // #else // #ifndef CONFIG_TREE_RCU
static LIST_HEAD(srcu_boot_list);
static bool srcu_init_done;
@@ -98,7 +102,7 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
{
int newval;
- preempt_disable(); // Needed for PREEMPT_AUTO
+ preempt_disable(); // Needed for PREEMPT_LAZY
newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1;
WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval);
preempt_enable();
@@ -120,7 +124,7 @@ void srcu_drive_gp(struct work_struct *wp)
struct srcu_struct *ssp;
ssp = container_of(wp, struct srcu_struct, srcu_work);
- preempt_disable(); // Needed for PREEMPT_AUTO
+ preempt_disable(); // Needed for PREEMPT_LAZY
if (ssp->srcu_gp_running || ULONG_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) {
preempt_enable();
return; /* Already running or nothing to do. */
@@ -138,7 +142,7 @@ void srcu_drive_gp(struct work_struct *wp)
WRITE_ONCE(ssp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */
preempt_enable();
swait_event_exclusive(ssp->srcu_wq, !READ_ONCE(ssp->srcu_lock_nesting[idx]));
- preempt_disable(); // Needed for PREEMPT_AUTO
+ preempt_disable(); // Needed for PREEMPT_LAZY
WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
preempt_enable();
@@ -159,7 +163,7 @@ void srcu_drive_gp(struct work_struct *wp)
* at interrupt level, but the ->srcu_gp_running checks will
* straighten that out.
*/
- preempt_disable(); // Needed for PREEMPT_AUTO
+ preempt_disable(); // Needed for PREEMPT_LAZY
WRITE_ONCE(ssp->srcu_gp_running, false);
idx = ULONG_CMP_LT(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max));
preempt_enable();
@@ -172,7 +176,7 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
{
unsigned long cookie;
- preempt_disable(); // Needed for PREEMPT_AUTO
+ preempt_disable(); // Needed for PREEMPT_LAZY
cookie = get_state_synchronize_srcu(ssp);
if (ULONG_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie)) {
preempt_enable();
@@ -199,7 +203,7 @@ void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
rhp->func = func;
rhp->next = NULL;
- preempt_disable(); // Needed for PREEMPT_AUTO
+ preempt_disable(); // Needed for PREEMPT_LAZY
local_irq_save(flags);
*ssp->srcu_cb_tail = rhp;
ssp->srcu_cb_tail = &rhp->next;
@@ -261,7 +265,7 @@ unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp)
{
unsigned long ret;
- preempt_disable(); // Needed for PREEMPT_AUTO
+ preempt_disable(); // Needed for PREEMPT_LAZY
ret = get_state_synchronize_srcu(ssp);
srcu_gp_start_if_needed(ssp);
preempt_enable();
@@ -282,11 +286,13 @@ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
}
EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu);
+#ifndef CONFIG_TREE_RCU
/* Lockdep diagnostics. */
void __init rcu_scheduler_starting(void)
{
rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
}
+#endif // #ifndef CONFIG_TREE_RCU
/*
* Queue work for srcu_struct structures with early boot callbacks.
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index b83c74c4dcc0..9a59b071501b 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -116,8 +116,9 @@ do { \
/*
* Initialize SRCU per-CPU data. Note that statically allocated
* srcu_struct structures might already have srcu_read_lock() and
- * srcu_read_unlock() running against them. So if the is_static parameter
- * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
+ * srcu_read_unlock() running against them. So if the is_static
+ * parameter is set, don't initialize ->srcu_ctrs[].srcu_locks and
+ * ->srcu_ctrs[].srcu_unlocks.
*/
static void init_srcu_struct_data(struct srcu_struct *ssp)
{
@@ -128,8 +129,6 @@ static void init_srcu_struct_data(struct srcu_struct *ssp)
* Initialize the per-CPU srcu_data array, which feeds into the
* leaves of the srcu_node tree.
*/
- BUILD_BUG_ON(ARRAY_SIZE(sdp->srcu_lock_count) !=
- ARRAY_SIZE(sdp->srcu_unlock_count));
for_each_possible_cpu(cpu) {
sdp = per_cpu_ptr(ssp->sda, cpu);
spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
@@ -247,15 +246,16 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
ssp->srcu_sup->node = NULL;
mutex_init(&ssp->srcu_sup->srcu_cb_mutex);
mutex_init(&ssp->srcu_sup->srcu_gp_mutex);
- ssp->srcu_idx = 0;
ssp->srcu_sup->srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL;
ssp->srcu_sup->srcu_barrier_seq = 0;
mutex_init(&ssp->srcu_sup->srcu_barrier_mutex);
atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
ssp->srcu_sup->sda_is_static = is_static;
- if (!is_static)
+ if (!is_static) {
ssp->sda = alloc_percpu(struct srcu_data);
+ ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
+ }
if (!ssp->sda)
goto err_free_sup;
init_srcu_struct_data(ssp);
@@ -429,10 +429,10 @@ static bool srcu_gp_is_expedited(struct srcu_struct *ssp)
}
/*
- * Computes approximate total of the readers' ->srcu_lock_count[] values
- * for the rank of per-CPU counters specified by idx, and returns true if
- * the caller did the proper barrier (gp), and if the count of the locks
- * matches that of the unlocks passed in.
+ * Computes approximate total of the readers' ->srcu_ctrs[].srcu_locks
+ * values for the rank of per-CPU counters specified by idx, and returns
+ * true if the caller did the proper barrier (gp), and if the count of
+ * the locks matches that of the unlocks passed in.
*/
static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, unsigned long unlocks)
{
@@ -443,20 +443,20 @@ static bool srcu_readers_lock_idx(struct srcu_struct *ssp, int idx, bool gp, uns
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- sum += atomic_long_read(&sdp->srcu_lock_count[idx]);
+ sum += atomic_long_read(&sdp->srcu_ctrs[idx].srcu_locks);
if (IS_ENABLED(CONFIG_PROVE_RCU))
mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
}
WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
"Mixed reader flavors for srcu_struct at %ps.\n", ssp);
- if (mask & SRCU_READ_FLAVOR_LITE && !gp)
+ if (mask & SRCU_READ_FLAVOR_SLOWGP && !gp)
return false;
return sum == unlocks;
}
/*
- * Returns approximate total of the readers' ->srcu_unlock_count[] values
- * for the rank of per-CPU counters specified by idx.
+ * Returns approximate total of the readers' ->srcu_ctrs[].srcu_unlocks
+ * values for the rank of per-CPU counters specified by idx.
*/
static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx, unsigned long *rdm)
{
@@ -467,7 +467,7 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx, u
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- sum += atomic_long_read(&sdp->srcu_unlock_count[idx]);
+ sum += atomic_long_read(&sdp->srcu_ctrs[idx].srcu_unlocks);
mask = mask | READ_ONCE(sdp->srcu_reader_flavor);
}
WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - 1)),
@@ -487,7 +487,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
unsigned long unlocks;
unlocks = srcu_readers_unlock_idx(ssp, idx, &rdm);
- did_gp = !!(rdm & SRCU_READ_FLAVOR_LITE);
+ did_gp = !!(rdm & SRCU_READ_FLAVOR_SLOWGP);
/*
* Make sure that a lock is always counted if the corresponding
@@ -509,48 +509,49 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
* If the locks are the same as the unlocks, then there must have
* been no readers on this index at some point in this function.
* But there might be more readers, as a task might have read
- * the current ->srcu_idx but not yet have incremented its CPU's
- * ->srcu_lock_count[idx] counter. In fact, it is possible
+ * the current ->srcu_ctrp but not yet have incremented its CPU's
+ * ->srcu_ctrs[idx].srcu_locks counter. In fact, it is possible
* that most of the tasks have been preempted between fetching
- * ->srcu_idx and incrementing ->srcu_lock_count[idx]. And there
- * could be almost (ULONG_MAX / sizeof(struct task_struct)) tasks
- * in a system whose address space was fully populated with memory.
- * Call this quantity Nt.
+ * ->srcu_ctrp and incrementing ->srcu_ctrs[idx].srcu_locks. And
+ * there could be almost (ULONG_MAX / sizeof(struct task_struct))
+ * tasks in a system whose address space was fully populated
+ * with memory. Call this quantity Nt.
*
- * So suppose that the updater is preempted at this point in the
- * code for a long time. That now-preempted updater has already
- * flipped ->srcu_idx (possibly during the preceding grace period),
- * done an smp_mb() (again, possibly during the preceding grace
- * period), and summed up the ->srcu_unlock_count[idx] counters.
- * How many times can a given one of the aforementioned Nt tasks
- * increment the old ->srcu_idx value's ->srcu_lock_count[idx]
- * counter, in the absence of nesting?
+ * So suppose that the updater is preempted at this
+ * point in the code for a long time. That now-preempted
+ * updater has already flipped ->srcu_ctrp (possibly during
+ * the preceding grace period), done an smp_mb() (again,
+ * possibly during the preceding grace period), and summed up
+ * the ->srcu_ctrs[idx].srcu_unlocks counters. How many times
+ * can a given one of the aforementioned Nt tasks increment the
+ * old ->srcu_ctrp value's ->srcu_ctrs[idx].srcu_locks counter,
+ * in the absence of nesting?
*
* It can clearly do so once, given that it has already fetched
- * the old value of ->srcu_idx and is just about to use that value
- * to index its increment of ->srcu_lock_count[idx]. But as soon as
- * it leaves that SRCU read-side critical section, it will increment
- * ->srcu_unlock_count[idx], which must follow the updater's above
- * read from that same value. Thus, as soon the reading task does
- * an smp_mb() and a later fetch from ->srcu_idx, that task will be
- * guaranteed to get the new index. Except that the increment of
- * ->srcu_unlock_count[idx] in __srcu_read_unlock() is after the
- * smp_mb(), and the fetch from ->srcu_idx in __srcu_read_lock()
- * is before the smp_mb(). Thus, that task might not see the new
- * value of ->srcu_idx until the -second- __srcu_read_lock(),
- * which in turn means that this task might well increment
- * ->srcu_lock_count[idx] for the old value of ->srcu_idx twice,
- * not just once.
+ * the old value of ->srcu_ctrp and is just about to use that
+ * value to index its increment of ->srcu_ctrs[idx].srcu_locks.
+ * But as soon as it leaves that SRCU read-side critical section,
+ * it will increment ->srcu_ctrs[idx].srcu_unlocks, which must
+ * follow the updater's above read from that same value. Thus,
+ as soon the reading task does an smp_mb() and a later fetch from
+ * ->srcu_ctrp, that task will be guaranteed to get the new index.
+ * Except that the increment of ->srcu_ctrs[idx].srcu_unlocks
+ * in __srcu_read_unlock() is after the smp_mb(), and the fetch
+ * from ->srcu_ctrp in __srcu_read_lock() is before the smp_mb().
+ * Thus, that task might not see the new value of ->srcu_ctrp until
+ * the -second- __srcu_read_lock(), which in turn means that this
+ * task might well increment ->srcu_ctrs[idx].srcu_locks for the
+ * old value of ->srcu_ctrp twice, not just once.
*
* However, it is important to note that a given smp_mb() takes
* effect not just for the task executing it, but also for any
* later task running on that same CPU.
*
- * That is, there can be almost Nt + Nc further increments of
- * ->srcu_lock_count[idx] for the old index, where Nc is the number
- * of CPUs. But this is OK because the size of the task_struct
- * structure limits the value of Nt and current systems limit Nc
- * to a few thousand.
+ * That is, there can be almost Nt + Nc further increments
+ * of ->srcu_ctrs[idx].srcu_locks for the old index, where Nc
+ * is the number of CPUs. But this is OK because the size of
+ * the task_struct structure limits the value of Nt and current
+ * systems limit Nc to a few thousand.
*
* OK, but what about nesting? This does impose a limit on
* nesting of half of the size of the task_struct structure
@@ -581,10 +582,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- sum += atomic_long_read(&sdp->srcu_lock_count[0]);
- sum += atomic_long_read(&sdp->srcu_lock_count[1]);
- sum -= atomic_long_read(&sdp->srcu_unlock_count[0]);
- sum -= atomic_long_read(&sdp->srcu_unlock_count[1]);
+ sum += atomic_long_read(&sdp->srcu_ctrs[0].srcu_locks);
+ sum += atomic_long_read(&sdp->srcu_ctrs[1].srcu_locks);
+ sum -= atomic_long_read(&sdp->srcu_ctrs[0].srcu_unlocks);
+ sum -= atomic_long_read(&sdp->srcu_ctrs[1].srcu_unlocks);
}
return sum;
}
@@ -647,6 +648,7 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
unsigned long jbase = SRCU_INTERVAL;
struct srcu_usage *sup = ssp->srcu_sup;
+ lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
if (srcu_gp_is_expedited(ssp))
jbase = 0;
if (rcu_seq_state(READ_ONCE(sup->srcu_gp_seq))) {
@@ -674,9 +676,13 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
void cleanup_srcu_struct(struct srcu_struct *ssp)
{
int cpu;
+ unsigned long delay;
struct srcu_usage *sup = ssp->srcu_sup;
- if (WARN_ON(!srcu_get_delay(ssp)))
+ spin_lock_irq_rcu_node(ssp->srcu_sup);
+ delay = srcu_get_delay(ssp);
+ spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ if (WARN_ON(!delay))
return; /* Just leak it! */
if (WARN_ON(srcu_readers_active(ssp)))
return; /* Just leak it! */
@@ -684,7 +690,7 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- del_timer_sync(&sdp->delay_work);
+ timer_delete_sync(&sdp->delay_work);
flush_work(&sdp->work);
if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
return; /* Forgot srcu_barrier(), so just leak it! */
@@ -743,12 +749,11 @@ EXPORT_SYMBOL_GPL(__srcu_check_read_flavor);
*/
int __srcu_read_lock(struct srcu_struct *ssp)
{
- int idx;
+ struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
- idx = READ_ONCE(ssp->srcu_idx) & 0x1;
- this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter);
+ this_cpu_inc(scp->srcu_locks.counter);
smp_mb(); /* B */ /* Avoid leaking the critical section. */
- return idx;
+ return __srcu_ptr_to_ctr(ssp, scp);
}
EXPORT_SYMBOL_GPL(__srcu_read_lock);
@@ -760,7 +765,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
{
smp_mb(); /* C */ /* Avoid leaking the critical section. */
- this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter);
+ this_cpu_inc(__srcu_ctr_to_ptr(ssp, idx)->srcu_unlocks.counter);
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
@@ -773,13 +778,12 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
*/
int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
{
- int idx;
- struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
+ struct srcu_ctr __percpu *scpp = READ_ONCE(ssp->srcu_ctrp);
+ struct srcu_ctr *scp = raw_cpu_ptr(scpp);
- idx = READ_ONCE(ssp->srcu_idx) & 0x1;
- atomic_long_inc(&sdp->srcu_lock_count[idx]);
+ atomic_long_inc(&scp->srcu_locks);
smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */
- return idx;
+ return __srcu_ptr_to_ctr(ssp, scpp);
}
EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
@@ -790,10 +794,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
*/
void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
{
- struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
-
smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */
- atomic_long_inc(&sdp->srcu_unlock_count[idx]);
+ atomic_long_inc(&raw_cpu_ptr(__srcu_ctr_to_ptr(ssp, idx))->srcu_unlocks);
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
@@ -1096,13 +1098,15 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
/*
* Wait until all readers counted by array index idx complete, but
* loop an additional time if there is an expedited grace period pending.
- * The caller must ensure that ->srcu_idx is not changed while checking.
+ * The caller must ensure that ->srcu_ctrp is not changed while checking.
*/
static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
{
unsigned long curdelay;
+ spin_lock_irq_rcu_node(ssp->srcu_sup);
curdelay = !srcu_get_delay(ssp);
+ spin_unlock_irq_rcu_node(ssp->srcu_sup);
for (;;) {
if (srcu_readers_active_idx_check(ssp, idx))
@@ -1114,30 +1118,30 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
}
/*
- * Increment the ->srcu_idx counter so that future SRCU readers will
+ * Increment the ->srcu_ctrp counter so that future SRCU readers will
* use the other rank of the ->srcu_(un)lock_count[] arrays. This allows
* us to wait for pre-existing readers in a starvation-free manner.
*/
static void srcu_flip(struct srcu_struct *ssp)
{
/*
- * Because the flip of ->srcu_idx is executed only if the
+ * Because the flip of ->srcu_ctrp is executed only if the
* preceding call to srcu_readers_active_idx_check() found that
- * the ->srcu_unlock_count[] and ->srcu_lock_count[] sums matched
- * and because that summing uses atomic_long_read(), there is
- * ordering due to a control dependency between that summing and
- * the WRITE_ONCE() in this call to srcu_flip(). This ordering
- * ensures that if this updater saw a given reader's increment from
- * __srcu_read_lock(), that reader was using a value of ->srcu_idx
- * from before the previous call to srcu_flip(), which should be
- * quite rare. This ordering thus helps forward progress because
- * the grace period could otherwise be delayed by additional
- * calls to __srcu_read_lock() using that old (soon to be new)
- * value of ->srcu_idx.
+ * the ->srcu_ctrs[].srcu_unlocks and ->srcu_ctrs[].srcu_locks sums
+ * matched and because that summing uses atomic_long_read(),
+ * there is ordering due to a control dependency between that
+ * summing and the WRITE_ONCE() in this call to srcu_flip().
+ * This ordering ensures that if this updater saw a given reader's
+ * increment from __srcu_read_lock(), that reader was using a value
+ * of ->srcu_ctrp from before the previous call to srcu_flip(),
+ * which should be quite rare. This ordering thus helps forward
+ * progress because the grace period could otherwise be delayed
+ * by additional calls to __srcu_read_lock() using that old (soon
+ * to be new) value of ->srcu_ctrp.
*
* This sum-equality check and ordering also ensures that if
* a given call to __srcu_read_lock() uses the new value of
- * ->srcu_idx, this updater's earlier scans cannot have seen
+ * ->srcu_ctrp, this updater's earlier scans cannot have seen
* that reader's increments, which is all to the good, because
* this grace period need not wait on that reader. After all,
* if those earlier scans had seen that reader, there would have
@@ -1152,7 +1156,8 @@ static void srcu_flip(struct srcu_struct *ssp)
*/
smp_mb(); /* E */ /* Pairs with B and C. */
- WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
+ WRITE_ONCE(ssp->srcu_ctrp,
+ &ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0])]);
/*
* Ensure that if the updater misses an __srcu_read_unlock()
@@ -1198,7 +1203,7 @@ static bool srcu_should_expedite(struct srcu_struct *ssp)
check_init_srcu_struct(ssp);
/* If _lite() readers, don't do unsolicited expediting. */
- if (this_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)
+ if (this_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_SLOWGP)
return false;
/* If the local srcu_data structure has callbacks, not idle. */
sdp = raw_cpu_ptr(ssp->sda);
@@ -1398,8 +1403,12 @@ static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
* read-side critical sections are delimited by srcu_read_lock() and
* srcu_read_unlock(), and may be nested.
*
- * The callback will be invoked from process context, but must nevertheless
- * be fast and must not block.
+ * The callback will be invoked from process context, but with bh
+ * disabled. The callback function must therefore be fast and must
+ * not block.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
*/
void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
rcu_callback_t func)
@@ -1465,8 +1474,9 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
*
* Wait for the count to drain to zero of both indexes. To avoid the
* possible starvation of synchronize_srcu(), it waits for the count of
- * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first,
- * and then flip the srcu_idx and wait for the count of the other index.
+ * the index=!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0]) to drain to zero
+ * at first, and then flip the ->srcu_ctrp and wait for the count of the
+ * other index.
*
* Can block; must be called from process context.
*
@@ -1675,7 +1685,7 @@ EXPORT_SYMBOL_GPL(srcu_barrier);
*/
unsigned long srcu_batches_completed(struct srcu_struct *ssp)
{
- return READ_ONCE(ssp->srcu_idx);
+ return READ_ONCE(ssp->srcu_sup->srcu_gp_seq);
}
EXPORT_SYMBOL_GPL(srcu_batches_completed);
@@ -1692,7 +1702,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
/*
* Because readers might be delayed for an extended period after
- * fetching ->srcu_idx for their index, at any point in time there
+ * fetching ->srcu_ctrp for their index, at any point in time there
* might well be readers using both idx=0 and idx=1. We therefore
* need to wait for readers to clear from both index values before
* invoking a callback.
@@ -1720,7 +1730,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
}
if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
- idx = 1 ^ (ssp->srcu_idx & 1);
+ idx = !(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0]);
if (!try_check_zero(ssp, idx, 1)) {
mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
return; /* readers present, retry later. */
@@ -1738,7 +1748,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
* SRCU read-side critical sections are normally short,
* so check at least twice in quick succession after a flip.
*/
- idx = 1 ^ (ssp->srcu_idx & 1);
+ idx = !(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0]);
if (!try_check_zero(ssp, idx, 2)) {
mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
return; /* readers present, retry later. */
@@ -1849,7 +1859,9 @@ static void process_srcu(struct work_struct *work)
ssp = sup->srcu_ssp;
srcu_advance_state(ssp);
+ spin_lock_irq_rcu_node(ssp->srcu_sup);
curdelay = srcu_get_delay(ssp);
+ spin_unlock_irq_rcu_node(ssp->srcu_sup);
if (curdelay) {
WRITE_ONCE(sup->reschedule_count, 0);
} else {
@@ -1896,7 +1908,7 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
int ss_state = READ_ONCE(ssp->srcu_sup->srcu_size_state);
int ss_state_idx = ss_state;
- idx = ssp->srcu_idx & 0x1;
+ idx = ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0];
if (ss_state < 0 || ss_state >= ARRAY_SIZE(srcu_size_state_name))
ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1;
pr_alert("%s%s Tree SRCU g%ld state %d (%s)",
@@ -1914,8 +1926,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
struct srcu_data *sdp;
sdp = per_cpu_ptr(ssp->sda, cpu);
- u0 = data_race(atomic_long_read(&sdp->srcu_unlock_count[!idx]));
- u1 = data_race(atomic_long_read(&sdp->srcu_unlock_count[idx]));
+ u0 = data_race(atomic_long_read(&sdp->srcu_ctrs[!idx].srcu_unlocks));
+ u1 = data_race(atomic_long_read(&sdp->srcu_ctrs[idx].srcu_unlocks));
/*
* Make sure that a lock is always counted if the corresponding
@@ -1923,8 +1935,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
*/
smp_rmb();
- l0 = data_race(atomic_long_read(&sdp->srcu_lock_count[!idx]));
- l1 = data_race(atomic_long_read(&sdp->srcu_lock_count[idx]));
+ l0 = data_race(atomic_long_read(&sdp->srcu_ctrs[!idx].srcu_locks));
+ l1 = data_race(atomic_long_read(&sdp->srcu_ctrs[idx].srcu_locks));
c0 = l0 - u0;
c1 = l1 - u1;
@@ -2001,6 +2013,7 @@ static int srcu_module_coming(struct module *mod)
ssp->sda = alloc_percpu(struct srcu_data);
if (WARN_ON_ONCE(!ssp->sda))
return -ENOMEM;
+ ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[0];
}
return 0;
}
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 59314da5eb60..c0cc7ae41106 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1086,7 +1086,7 @@ static void rcu_tasks_postscan(struct list_head *hop)
}
if (!IS_ENABLED(CONFIG_TINY_RCU))
- del_timer_sync(&tasks_rcu_exit_srcu_stall_timer);
+ timer_delete_sync(&tasks_rcu_exit_srcu_stall_timer);
}
/* See if tasks are still holding out, complain if so. */
@@ -2256,7 +2256,7 @@ void __init tasks_cblist_init_generic(void)
#endif
}
-void __init rcu_init_tasks_generic(void)
+static int __init rcu_init_tasks_generic(void)
{
#ifdef CONFIG_TASKS_RCU
rcu_spawn_tasks_kthread();
@@ -2272,7 +2272,10 @@ void __init rcu_init_tasks_generic(void)
// Run the self-tests.
rcu_tasks_initiate_self_tests();
+
+ return 0;
}
+core_initcall(rcu_init_tasks_generic);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
static inline void rcu_tasks_bootup_oddness(void) {}
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 4b3f31911465..c1ebfd51768b 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -85,15 +85,8 @@ void rcu_sched_clock_irq(int user)
static inline bool rcu_reclaim_tiny(struct rcu_head *head)
{
rcu_callback_t f;
- unsigned long offset = (unsigned long)head->func;
rcu_lock_acquire(&rcu_callback_map);
- if (__is_kvfree_rcu_offset(offset)) {
- trace_rcu_invoke_kvfree_callback("", head, offset);
- kvfree((void *)head - offset);
- rcu_lock_release(&rcu_callback_map);
- return true;
- }
trace_rcu_invoke_callback("", head);
f = head->func;
@@ -159,10 +152,6 @@ void synchronize_rcu(void)
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
-static void tiny_rcu_leak_callback(struct rcu_head *rhp)
-{
-}
-
/*
* Post an RCU callback to be invoked after the end of an RCU grace
* period. But since we have but one CPU, that would be after any
@@ -178,9 +167,6 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func);
mem_dump_obj(head);
}
-
- if (!__is_kvfree_rcu_offset((unsigned long)head->func))
- WRITE_ONCE(head->func, tiny_rcu_leak_callback);
return;
}
@@ -246,15 +232,18 @@ bool poll_state_synchronize_rcu(unsigned long oldstate)
}
EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
-#ifdef CONFIG_KASAN_GENERIC
-void kvfree_call_rcu(struct rcu_head *head, void *ptr)
+#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST)
+unsigned long long rcutorture_gather_gp_seqs(void)
{
- if (head)
- kasan_record_aux_stack(ptr);
+ return READ_ONCE(rcu_ctrlblk.gp_seq) & 0xffffULL;
+}
+EXPORT_SYMBOL_GPL(rcutorture_gather_gp_seqs);
- __kvfree_call_rcu(head, ptr);
+void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len)
+{
+ snprintf(cp, len, "g%04llx", seqs & 0xffffULL);
}
-EXPORT_SYMBOL_GPL(kvfree_call_rcu);
+EXPORT_SYMBOL_GPL(rcutorture_format_gp_seqs);
#endif
void __init rcu_init(void)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 475f31deed14..659f83e71048 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -538,6 +538,26 @@ void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
}
EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
+/* Gather grace-period sequence numbers for rcutorture diagnostics. */
+unsigned long long rcutorture_gather_gp_seqs(void)
+{
+ return ((READ_ONCE(rcu_state.gp_seq) & 0xffffULL) << 40) |
+ ((READ_ONCE(rcu_state.expedited_sequence) & 0xffffffULL) << 16) |
+ (READ_ONCE(rcu_state.gp_seq_polled) & 0xffffULL);
+}
+EXPORT_SYMBOL_GPL(rcutorture_gather_gp_seqs);
+
+/* Format grace-period sequence numbers for rcutorture diagnostics. */
+void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len)
+{
+ unsigned int egp = (seqs >> 16) & 0xffffffULL;
+ unsigned int ggp = (seqs >> 40) & 0xffffULL;
+ unsigned int pgp = seqs & 0xffffULL;
+
+ snprintf(cp, len, "g%04x:e%06x:p%04x", ggp, egp, pgp);
+}
+EXPORT_SYMBOL_GPL(rcutorture_format_gp_seqs);
+
#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
/*
* An empty function that will trigger a reschedule on
@@ -1254,7 +1274,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
/* Handle the ends of any preceding grace periods first. */
if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) ||
- unlikely(READ_ONCE(rdp->gpwrap))) {
+ unlikely(rdp->gpwrap)) {
if (!offloaded)
ret = rcu_advance_cbs(rnp, rdp); /* Advance CBs. */
rdp->core_needs_qs = false;
@@ -1268,7 +1288,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
/* Now handle the beginnings of any new-to-this-CPU grace periods. */
if (rcu_seq_new_gp(rdp->gp_seq, rnp->gp_seq) ||
- unlikely(READ_ONCE(rdp->gpwrap))) {
+ unlikely(rdp->gpwrap)) {
/*
* If the current grace period is waiting for this CPU,
* set up to detect a quiescent state, otherwise don't
@@ -1283,7 +1303,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */
if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)
WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);
- if (IS_ENABLED(CONFIG_PROVE_RCU) && READ_ONCE(rdp->gpwrap))
+ if (IS_ENABLED(CONFIG_PROVE_RCU) && rdp->gpwrap)
WRITE_ONCE(rdp->last_sched_clock, jiffies);
WRITE_ONCE(rdp->gpwrap, false);
rcu_gpnum_ovf(rnp, rdp);
@@ -1612,12 +1632,10 @@ static void rcu_sr_normal_complete(struct llist_node *node)
{
struct rcu_synchronize *rs = container_of(
(struct rcu_head *) node, struct rcu_synchronize, head);
- unsigned long oldstate = (unsigned long) rs->head.func;
WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) &&
- !poll_state_synchronize_rcu(oldstate),
- "A full grace period is not passed yet: %lu",
- rcu_seq_diff(get_state_synchronize_rcu(), oldstate));
+ !poll_state_synchronize_rcu_full(&rs->oldstate),
+ "A full grace period is not passed yet!\n");
/* Finally. */
complete(&rs->completion);
@@ -1801,10 +1819,14 @@ static noinline_for_stack bool rcu_gp_init(void)
/* Advance to a new grace period and initialize state. */
record_gp_stall_check_time();
+ /*
+ * A new wait segment must be started before gp_seq advanced, so
+ * that previous gp waiters won't observe the new gp_seq.
+ */
+ start_new_poll = rcu_sr_normal_gp_init();
/* Record GP times before starting GP, hence rcu_seq_start(). */
rcu_seq_start(&rcu_state.gp_seq);
ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
- start_new_poll = rcu_sr_normal_gp_init();
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap);
raw_spin_unlock_irq_rcu_node(rnp);
@@ -2931,13 +2953,8 @@ static int __init rcu_spawn_core_kthreads(void)
static void rcutree_enqueue(struct rcu_data *rdp, struct rcu_head *head, rcu_callback_t func)
{
rcu_segcblist_enqueue(&rdp->cblist, head);
- if (__is_kvfree_rcu_offset((unsigned long)func))
- trace_rcu_kvfree_callback(rcu_state.name, head,
- (unsigned long)func,
- rcu_segcblist_n_cbs(&rdp->cblist));
- else
- trace_rcu_callback(rcu_state.name, head,
- rcu_segcblist_n_cbs(&rdp->cblist));
+ trace_rcu_callback(rcu_state.name, head,
+ rcu_segcblist_n_cbs(&rdp->cblist));
trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));
}
@@ -3107,7 +3124,7 @@ module_param(enable_rcu_lazy, bool, 0444);
* critical sections have completed.
*
* Use this API instead of call_rcu() if you don't want the callback to be
- * invoked after very long periods of time, which can happen on systems without
+ * delayed for very long periods of time, which can happen on systems without
* memory pressure and on systems which are lightly loaded or mostly idle.
* This function will cause callbacks to be invoked sooner than later at the
* expense of extra power. Other than that, this function is identical to, and
@@ -3138,6 +3155,12 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry);
* might well execute concurrently with RCU read-side critical sections
* that started after call_rcu() was invoked.
*
+ * It is perfectly legal to repost an RCU callback, potentially with
+ * a different callback function, from within its callback function.
+ * The specified function will be invoked after another full grace period
+ * has elapsed. This use case is similar in form to the common practice
+ * of reposting a timer from within its own handler.
+ *
* RCU read-side critical sections are delimited by rcu_read_lock()
* and rcu_read_unlock(), and may be nested. In addition, but only in
* v5.0 and later, regions of code across which interrupts, preemption,
@@ -3166,6 +3189,13 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry);
*
* Implementation of these memory-ordering guarantees is described here:
* Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
+ *
+ * Specific to call_rcu() (as opposed to the other call_rcu*() functions),
+ * in kernels built with CONFIG_RCU_LAZY=y, call_rcu() might delay for many
+ * seconds before starting the grace period needed by the corresponding
+ * callback. This delay can significantly improve energy-efficiency
+ * on low-utilization battery-powered devices. To avoid this delay,
+ * in latency-sensitive kernel code, use call_rcu_hurry().
*/
void call_rcu(struct rcu_head *head, rcu_callback_t func)
{
@@ -3214,7 +3244,7 @@ static void synchronize_rcu_normal(void)
* snapshot before adding a request.
*/
if (IS_ENABLED(CONFIG_PROVE_RCU))
- rs.head.func = (void *) get_state_synchronize_rcu();
+ get_state_synchronize_rcu_full(&rs.oldstate);
rcu_sr_normal_add_req(&rs);
@@ -3357,14 +3387,17 @@ EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
*/
void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
{
- struct rcu_node *rnp = rcu_get_root();
-
/*
* Any prior manipulation of RCU-protected data must happen
* before the loads from ->gp_seq and ->expedited_sequence.
*/
smp_mb(); /* ^^^ */
- rgosp->rgos_norm = rcu_seq_snap(&rnp->gp_seq);
+
+ // Yes, rcu_state.gp_seq, not rnp_root->gp_seq, the latter's use
+ // in poll_state_synchronize_rcu_full() notwithstanding. Use of
+ // the latter here would result in too-short grace periods due to
+ // interactions with newly onlined CPUs.
+ rgosp->rgos_norm = rcu_seq_snap(&rcu_state.gp_seq);
rgosp->rgos_exp = rcu_seq_snap(&rcu_state.expedited_sequence);
}
EXPORT_SYMBOL_GPL(get_state_synchronize_rcu_full);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 77efed89c79e..8d4895c854c5 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -230,17 +230,19 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_node *rnp, bool wake)
* specified leaf rcu_node structure, which is acquired by the caller.
*/
static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, unsigned long flags,
- unsigned long mask, bool wake)
+ unsigned long mask_in, bool wake)
__releases(rnp->lock)
{
int cpu;
+ unsigned long mask;
struct rcu_data *rdp;
raw_lockdep_assert_held_rcu_node(rnp);
- if (!(rnp->expmask & mask)) {
+ if (!(rnp->expmask & mask_in)) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
+ mask = mask_in & rnp->expmask;
WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask);
for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
rdp = per_cpu_ptr(&rcu_data, cpu);
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 2605dd234a13..fa269d34167a 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -206,7 +206,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- del_timer(&rdp_gp->nocb_timer);
+ timer_delete(&rdp_gp->nocb_timer);
}
if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
@@ -822,7 +822,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- del_timer(&my_rdp->nocb_timer);
+ timer_delete(&my_rdp->nocb_timer);
}
WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
@@ -1557,8 +1557,11 @@ static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
/* Dump out nocb kthread state for the specified rcu_data structure. */
static void show_rcu_nocb_state(struct rcu_data *rdp)
{
- char bufw[20];
- char bufr[20];
+ char bufd[22];
+ char bufw[45];
+ char bufr[45];
+ char bufn[22];
+ char bufb[22];
struct rcu_data *nocb_next_rdp;
struct rcu_segcblist *rsclp = &rdp->cblist;
bool waslocked;
@@ -1572,9 +1575,13 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
typeof(*rdp),
nocb_entry_rdp);
- sprintf(bufw, "%ld", rsclp->gp_seq[RCU_WAIT_TAIL]);
- sprintf(bufr, "%ld", rsclp->gp_seq[RCU_NEXT_READY_TAIL]);
- pr_info(" CB %d^%d->%d %c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n",
+ sprintf(bufd, "%ld", rsclp->seglen[RCU_DONE_TAIL]);
+ sprintf(bufw, "%ld(%ld)", rsclp->seglen[RCU_WAIT_TAIL], rsclp->gp_seq[RCU_WAIT_TAIL]);
+ sprintf(bufr, "%ld(%ld)", rsclp->seglen[RCU_NEXT_READY_TAIL],
+ rsclp->gp_seq[RCU_NEXT_READY_TAIL]);
+ sprintf(bufn, "%ld", rsclp->seglen[RCU_NEXT_TAIL]);
+ sprintf(bufb, "%ld", rcu_cblist_n_cbs(&rdp->nocb_bypass));
+ pr_info(" CB %d^%d->%d %c%c%c%c%c F%ld L%ld C%d %c%s%c%s%c%s%c%s%c%s q%ld %c CPU %d%s\n",
rdp->cpu, rdp->nocb_gp_rdp->cpu,
nocb_next_rdp ? nocb_next_rdp->cpu : -1,
"kK"[!!rdp->nocb_cb_kthread],
@@ -1586,12 +1593,15 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
jiffies - rdp->nocb_nobypass_last,
rdp->nocb_nobypass_count,
".D"[rcu_segcblist_ready_cbs(rsclp)],
+ rcu_segcblist_segempty(rsclp, RCU_DONE_TAIL) ? "" : bufd,
".W"[!rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL)],
rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL) ? "" : bufw,
".R"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL)],
rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL) ? "" : bufr,
".N"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL)],
+ rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL) ? "" : bufn,
".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)],
+ !rcu_cblist_n_cbs(&rdp->nocb_bypass) ? "" : bufb,
rcu_segcblist_n_cbs(&rdp->cblist),
rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : '.',
rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_cb_kthread) : -1,
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 3600152b858e..3c0bbbbb686f 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -833,8 +833,17 @@ void rcu_read_unlock_strict(void)
{
struct rcu_data *rdp;
- if (irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
+ if (irqs_disabled() || in_atomic_preempt_off() || !rcu_state.gp_kthread)
return;
+
+ /*
+ * rcu_report_qs_rdp() can only be invoked with a stable rdp and
+ * from the local CPU.
+ *
+ * The in_atomic_preempt_off() check ensures that we come here holding
+ * the last preempt_count (which will get dropped once we return to
+ * __rcu_read_unlock().
+ */
rdp = this_cpu_ptr(&rcu_data);
rdp->cpu_no_qs.b.norm = false;
rcu_report_qs_rdp(rdp);
@@ -975,13 +984,16 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
*/
static void rcu_flavor_sched_clock_irq(int user)
{
- if (user || rcu_is_cpu_rrupt_from_idle()) {
+ if (user || rcu_is_cpu_rrupt_from_idle() ||
+ (IS_ENABLED(CONFIG_PREEMPT_COUNT) &&
+ (preempt_count() == HARDIRQ_OFFSET))) {
/*
* Get here if this CPU took its interrupt from user
- * mode or from the idle loop, and if this is not a
- * nested interrupt. In this case, the CPU is in
- * a quiescent state, so note it.
+ * mode, from the idle loop without this being a nested
+ * interrupt, or while not holding the task preempt count
+ * (with PREEMPT_COUNT=y). In this case, the CPU is in a
+ * quiescent state, so note it.
*
* No memory barrier is required here because rcu_qs()
* references only CPU-local variables that other CPUs