From 3705b88db0d7cc4a097c32d9e554054103d3f807 Mon Sep 17 00:00:00 2001 From: Antti P Miettinen Date: Fri, 5 Oct 2012 09:59:15 +0300 Subject: rcu: Add a module parameter to force use of expedited RCU primitives There have been some embedded applications that would benefit from use of expedited grace-period primitives. In some ways, this is similar to synchronize_net() doing either a normal or an expedited grace period depending on lock state, but with control outside of the kernel. This commit therefore adds rcu_expedited boot and sysfs parameters that cause the kernel to substitute expedited primitives for the normal grace-period primitives. [ paulmck: Add trace/event/rcu.h to kernel/srcu.c to avoid build error. Get rid of infinite loop through contention path.] Signed-off-by: Antti P Miettinen Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index f92115488187..c177ba0cce9a 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -679,7 +679,10 @@ void synchronize_rcu(void) "Illegal synchronize_rcu() in RCU read-side critical section"); if (!rcu_scheduler_active) return; - wait_rcu_gp(call_rcu); + if (rcu_expedited) + synchronize_rcu_expedited(); + else + wait_rcu_gp(call_rcu); } EXPORT_SYMBOL_GPL(synchronize_rcu); @@ -831,7 +834,7 @@ void synchronize_rcu_expedited(void) udelay(trycount * num_online_cpus()); } else { put_online_cpus(); - synchronize_rcu(); + wait_rcu_gp(call_rcu); return; } } -- cgit v1.2.3 From 7b2e6011f150c42235c4a541d20cf6891afe878a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 8 Oct 2012 10:54:03 -0700 Subject: rcu: Rename ->onofflock to ->orphan_lock The ->onofflock field in the rcu_state structure at one time synchronized CPU-hotplug operations for RCU. However, its scope has decreased over time so that it now only protects the lists of orphaned RCU callbacks. This commit therefore renames it to ->orphan_lock to reflect its current use. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 12 ++++++------ kernel/rcutree.h | 7 +++---- kernel/rcutree_plugin.h | 3 ++- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ac8aed8ee417..89148860e2ba 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -70,7 +70,7 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; .fqs_state = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ - .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ + .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \ .orphan_nxttail = &sname##_state.orphan_nxtlist, \ .orphan_donetail = &sname##_state.orphan_donelist, \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ @@ -1573,7 +1573,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) /* * Send the specified CPU's RCU callbacks to the orphanage. The * specified CPU must be offline, and the caller must hold the - * ->onofflock. + * ->orphan_lock. */ static void rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, @@ -1623,7 +1623,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, /* * Adopt the RCU callbacks from the specified rcu_state structure's - * orphanage. The caller must hold the ->onofflock. + * orphanage. The caller must hold the ->orphan_lock. */ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) { @@ -1702,7 +1702,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) /* Exclude any attempts to start a new grace period. */ mutex_lock(&rsp->onoff_mutex); - raw_spin_lock_irqsave(&rsp->onofflock, flags); + raw_spin_lock_irqsave(&rsp->orphan_lock, flags); /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); @@ -1729,10 +1729,10 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) /* * We still hold the leaf rcu_node structure lock here, and * irqs are still disabled. The reason for this subterfuge is - * because invoking rcu_report_unblock_qs_rnp() with ->onofflock + * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock * held leads to deadlock. */ - raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ + raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */ rnp = rdp->mynode; if (need_report & RCU_OFL_TASKS_NORM_GP) rcu_report_unblock_qs_rnp(rnp, flags); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index a240f032848e..a7c945d149cf 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -383,9 +383,8 @@ struct rcu_state { /* End of fields guarded by root rcu_node's lock. */ - raw_spinlock_t onofflock ____cacheline_internodealigned_in_smp; - /* exclude on/offline and */ - /* starting new GP. */ + raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp; + /* Protect following fields. */ struct rcu_head *orphan_nxtlist; /* Orphaned callbacks that */ /* need a grace period. */ struct rcu_head **orphan_nxttail; /* Tail of above. */ @@ -394,7 +393,7 @@ struct rcu_state { struct rcu_head **orphan_donetail; /* Tail of above. */ long qlen_lazy; /* Number of lazy callbacks. */ long qlen; /* Total number of callbacks. */ - /* End of fields guarded by onofflock. */ + /* End of fields guarded by orphan_lock. */ struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index f92115488187..2b281cf0b6f2 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -757,7 +757,8 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, * grace period for the specified rcu_node structure. If there are no such * tasks, report it up the rcu_node hierarchy. * - * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. + * Caller must hold sync_rcu_preempt_exp_mutex and must exclude + * CPU hotplug operations. */ static void sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) -- cgit v1.2.3 From f0a0e6f282c72247e7c8ec17c68d528c1bb4d49e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 23 Oct 2012 13:47:01 -0700 Subject: rcu: Clarify memory-ordering properties of grace-period primitives This commit explicitly states the memory-ordering properties of the RCU grace-period primitives. Although these properties were in some sense implied by the fundmental property of RCU ("a grace period must wait for all pre-existing RCU read-side critical sections to complete"), stating it explicitly will be a great labor-saving device. Reported-by: Oleg Nesterov Signed-off-by: Paul E. McKenney Reviewed-by: Oleg Nesterov --- include/linux/rcupdate.h | 25 +++++++++++++++++++++++++ kernel/rcutree.c | 29 +++++++++++++++++++++++++---- kernel/rcutree_plugin.h | 8 ++++++++ 3 files changed, 58 insertions(+), 4 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7c968e4f929e..6256759fb81e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -90,6 +90,25 @@ extern void do_trace_rcu_torture_read(char *rcutorturename, * that started after call_rcu() was invoked. RCU read-side critical * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. + * + * Note that all CPUs must agree that the grace period extended beyond + * all pre-existing RCU read-side critical section. On systems with more + * than one CPU, this means that when "func()" is invoked, each CPU is + * guaranteed to have executed a full memory barrier since the end of its + * last RCU read-side critical section whose beginning preceded the call + * to call_rcu(). It also means that each CPU executing an RCU read-side + * critical section that continues beyond the start of "func()" must have + * executed a memory barrier after the call_rcu() but before the beginning + * of that RCU read-side critical section. Note that these guarantees + * include CPUs that are offline, idle, or executing in user mode, as + * well as CPUs that are executing in the kernel. + * + * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the + * resulting RCU callback function "func()", then both CPU A and CPU B are + * guaranteed to execute a full memory barrier during the time interval + * between the call to call_rcu() and the invocation of "func()" -- even + * if CPU A and CPU B are the same CPU (but again only if the system has + * more than one CPU). */ extern void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); @@ -118,6 +137,9 @@ extern void call_rcu(struct rcu_head *head, * OR * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. * These may be nested. + * + * See the description of call_rcu() for more detailed information on + * memory ordering guarantees. */ extern void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *head)); @@ -137,6 +159,9 @@ extern void call_rcu_bh(struct rcu_head *head, * OR * anything that disables preemption. * These may be nested. + * + * See the description of call_rcu() for more detailed information on + * memory ordering guarantees. */ extern void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e4c2192b47c8..15a2beec320f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2228,10 +2228,28 @@ static inline int rcu_blocking_is_gp(void) * rcu_read_lock_sched(). * * This means that all preempt_disable code sequences, including NMI and - * hardware-interrupt handlers, in progress on entry will have completed - * before this primitive returns. However, this does not guarantee that - * softirq handlers will have completed, since in some kernels, these - * handlers can run in process context, and can block. + * non-threaded hardware-interrupt handlers, in progress on entry will + * have completed before this primitive returns. However, this does not + * guarantee that softirq handlers will have completed, since in some + * kernels, these handlers can run in process context, and can block. + * + * Note that this guarantee implies further memory-ordering guarantees. + * On systems with more than one CPU, when synchronize_sched() returns, + * each CPU is guaranteed to have executed a full memory barrier since the + * end of its last RCU-sched read-side critical section whose beginning + * preceded the call to synchronize_sched(). In addition, each CPU having + * an RCU read-side critical section that extends beyond the return from + * synchronize_sched() is guaranteed to have executed a full memory barrier + * after the beginning of synchronize_sched() and before the beginning of + * that RCU read-side critical section. Note that these guarantees include + * CPUs that are offline, idle, or executing in user mode, as well as CPUs + * that are executing in the kernel. + * + * Furthermore, if CPU A invoked synchronize_sched(), which returned + * to its caller on CPU B, then both CPU A and CPU B are guaranteed + * to have executed a full memory barrier during the execution of + * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but + * again only if the system has more than one CPU). * * This primitive provides the guarantees made by the (now removed) * synchronize_kernel() API. In contrast, synchronize_rcu() only @@ -2259,6 +2277,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched); * read-side critical sections have completed. RCU read-side critical * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), * and may be nested. + * + * See the description of synchronize_sched() for more detailed information + * on memory ordering guarantees. */ void synchronize_rcu_bh(void) { diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index f92115488187..57e0ef8ed721 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -670,6 +670,9 @@ EXPORT_SYMBOL_GPL(kfree_call_rcu); * concurrently with new RCU read-side critical sections that began while * synchronize_rcu() was waiting. RCU read-side critical sections are * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. + * + * See the description of synchronize_sched() for more detailed information + * on memory ordering guarantees. */ void synchronize_rcu(void) { @@ -875,6 +878,11 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); /** * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. + * + * Note that this primitive does not necessarily wait for an RCU grace period + * to complete. For example, if there are no RCU callbacks queued anywhere + * in the system, then rcu_barrier() is within its rights to return + * immediately, without waiting for anything, much less an RCU grace period. */ void rcu_barrier(void) { -- cgit v1.2.3