From 84483ea42cd4f2781d6e97a83ab3ebd0ff19fb10 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Jun 2010 16:48:13 -0700 Subject: rcu: add shiny new debug assists to Documentation/RCU/checklist.txt Add a section describing PROVE_RCU, DEBUG_OBJECTS_RCU_HEAD, and the __rcu sparse checking to the RCU checklist. Suggested-by: David Miller Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- Documentation/RCU/checklist.txt | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'Documentation') diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 790d1a812376..c7c6788956f4 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -365,3 +365,26 @@ over a rather long period of time, but improvements are always welcome! and the compiler to freely reorder code into and out of RCU read-side critical sections. It is the responsibility of the RCU update-side primitives to deal with this. + +17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and + the __rcu sparse checks to validate your RCU code. These + can help find problems as follows: + + CONFIG_PROVE_RCU: check that accesses to RCU-protected data + structures are carried out under the proper RCU + read-side critical section, while holding the right + combination of locks, or whatever other conditions + are appropriate. + + CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the + same object to call_rcu() (or friends) before an RCU + grace period has elapsed since the last time that you + passed that same object to call_rcu() (or friends). + + __rcu sparse checks: tag the pointer to the RCU-protected data + structure with __rcu, and sparse will warn you if you + access that pointer without the services of one of the + variants of rcu_dereference(). + + These debugging aids can help you find problems that are + otherwise extremely difficult to spot. -- cgit v1.2.3 From 65e423f8ee5843e1ea3f2d94adf4ba3560a17f7b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 19 May 2010 10:42:16 -0700 Subject: Update documentation to note the passage of INIT_RCU_HEAD() Signed-off-by: Alexey Dobriyan Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- Documentation/DocBook/kernel-locking.tmpl | 8 -------- 1 file changed, 8 deletions(-) (limited to 'Documentation') diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 084f6ad7b7a0..e6cc57460212 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1725,14 +1725,6 @@ the amount of locking which needs to be done. if (++cache_num > MAX_CACHE_SIZE) { struct object *i, *outcast = NULL; list_for_each_entry(i, &cache, list) { -@@ -85,6 +94,7 @@ - obj->popularity = 0; - atomic_set(&obj->refcnt, 1); /* The cache holds a reference */ - spin_lock_init(&obj->lock); -+ INIT_RCU_HEAD(&obj->rcu); - - spin_lock_irqsave(&cache_lock, flags); - __cache_add(obj); @@ -104,12 +114,11 @@ struct object *cache_find(int id) { -- cgit v1.2.3 From ded5e5ed2f3348ba2f9a319c6497e46c22850e97 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 19 May 2010 10:46:55 -0700 Subject: Update call_rcu() usage, add synchronize_rcu() Reported-by: Kyle Hubert Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- Documentation/DocBook/kernel-locking.tmpl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index e6cc57460212..ed64d220baf2 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1645,7 +1645,9 @@ the amount of locking which needs to be done. all the readers who were traversing the list when we deleted the element are finished. We use call_rcu() to register a callback which will actually destroy the object once - the readers are finished. + all pre-existing readers are finished. Alternatively, + synchronize_rcu() may be used to block until + all pre-existing are finished. But how does Read Copy Update know when the readers are @@ -1714,7 +1716,7 @@ the amount of locking which needs to be done. - object_put(obj); + list_del_rcu(&obj->list); cache_num--; -+ call_rcu(&obj->rcu, cache_delete_rcu, obj); ++ call_rcu(&obj->rcu, cache_delete_rcu); } /* Must be holding cache_lock */ -- cgit v1.2.3 From 5cc6517abdeccb6690b344a43b5ce8eaee82da3c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Aug 2010 16:34:22 -0700 Subject: rcu: document ways of stalling updates in low-memory situations Signed-off-by: Paul E. McKenney --- Documentation/RCU/checklist.txt | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index c7c6788956f4..0c134f8afc6f 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -218,13 +218,22 @@ over a rather long period of time, but improvements are always welcome! include: a. Keeping a count of the number of data-structure elements - used by the RCU-protected data structure, including those - waiting for a grace period to elapse. Enforce a limit - on this number, stalling updates as needed to allow - previously deferred frees to complete. - - Alternatively, limit only the number awaiting deferred - free rather than the total number of elements. + used by the RCU-protected data structure, including + those waiting for a grace period to elapse. Enforce a + limit on this number, stalling updates as needed to allow + previously deferred frees to complete. Alternatively, + limit only the number awaiting deferred free rather than + the total number of elements. + + One way to stall the updates is to acquire the update-side + mutex. (Don't try this with a spinlock -- other CPUs + spinning on the lock could prevent the grace period + from ever ending.) Another way to stall the updates + is for the updates to use a wrapper function around + the memory allocator, so that this wrapper function + simulates OOM when there is too much memory awaiting an + RCU grace period. There are of course many other + variations on this theme. b. Limiting update rate. For example, if updates occur only once per hour, then no explicit rate limiting is required, -- cgit v1.2.3 From 2c96c7751d2bb822542b03ddfaca70933f5aaf02 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 23 Aug 2010 16:34:02 -0700 Subject: rcu: upgrade stallwarn.txt documentation for CPU-bound RT processes CPU-bound real-time processes can cause RCU CPU stall warnings, and much other trouble as well. Document the fact that they can cause RCU CPU stall warnings. Suggested-by: Darren Hart Signed-off-by: Paul E. McKenney --- Documentation/RCU/stallwarn.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'Documentation') diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 44c6dcc93d6d..862c08ef1fde 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -80,6 +80,24 @@ o A CPU looping with bottom halves disabled. This condition can o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel without invoking schedule(). +o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might + happen to preempt a low-priority task in the middle of an RCU + read-side critical section. This is especially damaging if + that low-priority task is not permitted to run on any other CPU, + in which case the next RCU grace period can never complete, which + will eventually cause the system to run out of memory and hang. + While the system is in the process of running itself out of + memory, you might see stall-warning messages. + +o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that + is running at a higher priority than the RCU softirq threads. + This will prevent RCU callbacks from ever being invoked, + and in a CONFIG_TREE_PREEMPT_RCU kernel will further prevent + RCU grace periods from ever completing. Either way, the + system will eventually run out of memory and hang. In the + CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning + messages. + o A bug in the RCU implementation. o A hardware failure. This is quite unlikely, but has occurred -- cgit v1.2.3 From 269dcc1c2ec25864308ee03a3fa26ea819d9f5d0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 7 Sep 2010 14:23:09 -0700 Subject: rcu: Add tracing data to support queueing models The current tracing data is not sufficient to deduce the average time that a callback spends waiting for a grace period to end. Add three per-CPU counters recording the number of callbacks invoked (ci), the number of callbacks orphaned (co), and the number of callbacks adopted (ca). Given the existing callback queue length (ql), the average wait time in absence of CPU hotplug operations is ql/ci. The units of wait time will be in terms of the duration over which ci was measured. In the presence of CPU hotplug operations, there is room for argument, but ql/(ci-co+ca) won't steer you too far wrong. Also fixes a typo called out by Lucas De Marchi . Signed-off-by: Paul E. McKenney --- Documentation/RCU/trace.txt | 13 ++++++++++++- kernel/rcutree.c | 3 +++ kernel/rcutree.h | 3 +++ kernel/rcutree_trace.c | 10 +++++++--- 4 files changed, 25 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index efd8cc95c06b..a851118775d8 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -125,6 +125,17 @@ o "b" is the batch limit for this CPU. If more than this number of RCU callbacks is ready to invoke, then the remainder will be deferred. +o "ci" is the number of RCU callbacks that have been invoked for + this CPU. Note that ci+ql is the number of callbacks that have + been registered in absence of CPU-hotplug activity. + +o "co" is the number of RCU callbacks that have been orphaned due to + this CPU going offline. + +o "ca" is the number of RCU callbacks that have been adopted due to + other CPUs going offline. Note that ci+co-ca+ql is the number of + RCU callbacks registered on this CPU. + There is also an rcu/rcudata.csv file with the same information in comma-separated-variable spreadsheet format. @@ -180,7 +191,7 @@ o "s" is the "signaled" state that drives force_quiescent_state()'s o "jfq" is the number of jiffies remaining for this grace period before force_quiescent_state() is invoked to help push things - along. Note that CPUs in dyntick-idle mode thoughout the grace + along. Note that CPUs in dyntick-idle mode throughout the grace period will not report on their own, but rather must be check by some other CPU via force_quiescent_state(). diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 42140a860bb9..e75073504a31 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1004,6 +1004,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) for (i = 0; i < RCU_NEXT_SIZE; i++) rdp->nxttail[i] = &rdp->nxtlist; rsp->orphan_qlen += rdp->qlen; + rdp->n_cbs_orphaned += rdp->qlen; rdp->qlen = 0; raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ } @@ -1025,6 +1026,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; rdp->qlen += rsp->orphan_qlen; + rdp->n_cbs_adopted += rsp->orphan_qlen; rsp->orphan_cbs_list = NULL; rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; rsp->orphan_qlen = 0; @@ -1156,6 +1158,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) /* Update count, and requeue any remaining callbacks. */ rdp->qlen -= count; + rdp->n_cbs_invoked += count; if (list != NULL) { *tail = rdp->nxtlist; rdp->nxtlist = list; diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7918ba61873f..91d4170c5c13 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -202,6 +202,9 @@ struct rcu_data { long qlen; /* # of queued callbacks */ long qlen_last_fqs_check; /* qlen at last check for QS forcing */ + unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ + unsigned long n_cbs_orphaned; /* RCU cbs sent to orphanage. */ + unsigned long n_cbs_adopted; /* RCU cbs adopted from orphanage. */ unsigned long n_force_qs_snap; /* did other CPU force QS recently? */ long blimit; /* Upper limit on a processed batch */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 458e032a3a30..d15430b9d122 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -64,7 +64,9 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->dynticks_fqs); #endif /* #ifdef CONFIG_NO_HZ */ seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); - seq_printf(m, " ql=%ld b=%ld\n", rdp->qlen, rdp->blimit); + seq_printf(m, " ql=%ld b=%ld", rdp->qlen, rdp->blimit); + seq_printf(m, " ci=%lu co=%lu ca=%lu\n", + rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); } #define PRINT_RCU_DATA(name, func, m) \ @@ -119,7 +121,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) rdp->dynticks_fqs); #endif /* #ifdef CONFIG_NO_HZ */ seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); - seq_printf(m, ",%ld,%ld\n", rdp->qlen, rdp->blimit); + seq_printf(m, ",%ld,%ld", rdp->qlen, rdp->blimit); + seq_printf(m, ",%lu,%lu,%lu\n", + rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); } static int show_rcudata_csv(struct seq_file *m, void *unused) @@ -128,7 +132,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) #ifdef CONFIG_NO_HZ seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); #endif /* #ifdef CONFIG_NO_HZ */ - seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n"); + seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); #ifdef CONFIG_TREE_PREEMPT_RCU seq_puts(m, "\"rcu_preempt:\"\n"); PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); -- cgit v1.2.3